diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
153 files changed, 23032 insertions, 11573 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 183f5dc1c3f2..a5cd5dacf055 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -19,6 +19,8 @@ config DRM_I915 select INPUT if ACPI select ACPI_VIDEO if ACPI select ACPI_BUTTON if ACPI + select SYNC_FILE + select IOSF_MBI help Choose this option if you have a system that has "Intel Graphics Media Accelerator" or "HD Graphics" integrated graphics, diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 597648c7a645..e091809a9a9e 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -24,7 +24,9 @@ config DRM_I915_DEBUG select X86_MSR # used by igt/pm_rpm select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y + select DRM_DEBUG_MM_SELFTEST select DRM_I915_SW_FENCE_DEBUG_OBJECTS + select DRM_I915_SELFTEST default n help Choose this option to turn on extra driver debugging that may affect @@ -58,3 +60,30 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS Recommended for driver developers only. If in doubt, say "N". + +config DRM_I915_SELFTEST + bool "Enable selftests upon driver load" + depends on DRM_I915 + default n + select FAULT_INJECTION + select PRIME_NUMBERS + help + Choose this option to allow the driver to perform selftests upon + loading; also requires the i915.selftest=1 module parameter. To + exit the module after running the selftests (i.e. to prevent normal + module initialisation afterwards) use i915.selftest=-1. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_I915_LOW_LEVEL_TRACEPOINTS + bool "Enable low level request tracing events" + depends on DRM_I915 + default n + help + Choose this option to turn on low level request tracing events. + This provides the ability to precisely monitor engine utilisation + and also analyze the request dependency resolving timeline. + + If in doubt, say "N". diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index c62ab45683c0..2cf04504e494 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o # GEM code i915-y += i915_cmd_parser.o \ i915_gem_batch_pool.o \ + i915_gem_clflush.o \ i915_gem_context.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ @@ -72,6 +73,7 @@ i915-y += intel_audio.o \ intel_atomic.o \ intel_atomic_plane.o \ intel_bios.o \ + intel_cdclk.o \ intel_color.o \ intel_display.o \ intel_dpio_phy.o \ @@ -103,8 +105,8 @@ i915-y += dvo_ch7017.o \ intel_dp.o \ intel_dsi.o \ intel_dsi_dcs_backlight.o \ - intel_dsi_panel_vbt.o \ intel_dsi_pll.o \ + intel_dsi_vbt.o \ intel_dvo.o \ intel_hdmi.o \ intel_i2c.o \ @@ -116,6 +118,9 @@ i915-y += dvo_ch7017.o \ # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o +i915-$(CONFIG_DRM_I915_SELFTEST) += \ + selftests/i915_random.o \ + selftests/i915_selftest.o # virtual gpu code i915-y += i915_vgpu.o diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 2b92cc8a7d1a..41b2c3aaa04a 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -616,9 +616,6 @@ static inline u32 get_opcode(u32 cmd, int ring_id) { struct decode_info *d_info; - if (ring_id >= I915_NUM_ENGINES) - return INVALID_OP; - d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)]; if (d_info == NULL) return INVALID_OP; @@ -661,9 +658,6 @@ static inline void print_opcode(u32 cmd, int ring_id) struct decode_info *d_info; int i; - if (ring_id >= I915_NUM_ENGINES) - return; - d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)]; if (d_info == NULL) return; @@ -1215,7 +1209,7 @@ static int gen8_check_mi_display_flip(struct parser_exec_state *s, if (!info->async_flip) return 0; - if (IS_SKYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { stride = vgpu_vreg(s->vgpu, info->stride_reg) & GENMASK(9, 0); tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) & GENMASK(12, 10)) >> 10; @@ -1243,7 +1237,7 @@ static int gen8_update_plane_mmio_from_mi_display_flip( set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12), info->surf_val << 12); - if (IS_SKYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0), info->stride_val); set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10), @@ -1267,7 +1261,7 @@ static int decode_mi_display_flip(struct parser_exec_state *s, if (IS_BROADWELL(dev_priv)) return gen8_decode_mi_display_flip(s, info); - if (IS_SKYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) return skl_decode_mi_display_flip(s, info); return -ENODEV; @@ -1278,7 +1272,9 @@ static int check_mi_display_flip(struct parser_exec_state *s, { struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; - if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv)) + if (IS_BROADWELL(dev_priv) + || IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv)) return gen8_check_mi_display_flip(s, info); return -ENODEV; } @@ -1289,7 +1285,9 @@ static int update_plane_mmio_from_mi_display_flip( { struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; - if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv)) + if (IS_BROADWELL(dev_priv) + || IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv)) return gen8_update_plane_mmio_from_mi_display_flip(s, info); return -ENODEV; } @@ -1557,7 +1555,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm, len += copy_len; gma += copy_len; } - return 0; + return len; } @@ -1569,7 +1567,8 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) { struct intel_gvt *gvt = s->vgpu->gvt; - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { + if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) + || IS_KABYLAKE(gvt->dev_priv)) { /* BDW decides privilege based on address space */ if (cmd_val(s, 0) & (1 << 8)) return 0; @@ -1673,7 +1672,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, gma, gma + bb_size, dst); - if (ret) { + if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); goto unmap_src; } @@ -2478,7 +2477,7 @@ static int cmd_parser_exec(struct parser_exec_state *s) t1 = get_cycles(); - memcpy(&s_before_advance_custom, s, sizeof(struct parser_exec_state)); + s_before_advance_custom = *s; if (info->handler) { ret = info->handler(s); @@ -2604,6 +2603,9 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) unsigned long gma_head, gma_tail, gma_bottom, ring_size, ring_tail; struct parser_exec_state s; int ret = 0; + struct intel_vgpu_workload *workload = container_of(wa_ctx, + struct intel_vgpu_workload, + wa_ctx); /* ring base is page aligned */ if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, GTT_PAGE_SIZE))) @@ -2618,14 +2620,14 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) s.buf_type = RING_BUFFER_INSTRUCTION; s.buf_addr_type = GTT_BUFFER; - s.vgpu = wa_ctx->workload->vgpu; - s.ring_id = wa_ctx->workload->ring_id; + s.vgpu = workload->vgpu; + s.ring_id = workload->ring_id; s.ring_start = wa_ctx->indirect_ctx.guest_gma; s.ring_size = ring_size; s.ring_head = gma_head; s.ring_tail = gma_tail; s.rb_va = wa_ctx->indirect_ctx.shadow_va; - s.workload = wa_ctx->workload; + s.workload = workload; ret = ip_gma_set(&s, gma_head); if (ret) @@ -2640,11 +2642,8 @@ out: static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx; - struct intel_ring *ring = shadow_ctx->engine[ring_id].ring; unsigned long gma_head, gma_tail, gma_top, guest_rb_size; - unsigned int copy_len = 0; + u32 *cs; int ret; guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl); @@ -2658,36 +2657,33 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_top = workload->rb_start + guest_rb_size; /* allocate shadow ring buffer */ - ret = intel_ring_begin(workload->req, workload->rb_len / 4); - if (ret) - return ret; + cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* get shadow ring buffer va */ - workload->shadow_ring_buffer_va = ring->vaddr + ring->tail; + workload->shadow_ring_buffer_va = cs; /* head > tail --> copy head <-> top */ if (gma_head > gma_tail) { ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_top, - workload->shadow_ring_buffer_va); - if (ret) { + gma_head, gma_top, cs); + if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); return ret; } - copy_len = gma_top - gma_head; + cs += ret / sizeof(u32); gma_head = workload->rb_start; } /* copy head or start <-> tail */ - ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_tail, - workload->shadow_ring_buffer_va + copy_len); - if (ret) { + ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs); + if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); return ret; } - ring->tail += workload->rb_len; - intel_ring_advance(ring); + cs += ret / sizeof(u32); + intel_ring_advance(workload->req, cs); return 0; } @@ -2714,12 +2710,15 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) { int ctx_size = wa_ctx->indirect_ctx.size; unsigned long guest_gma = wa_ctx->indirect_ctx.guest_gma; - struct intel_vgpu *vgpu = wa_ctx->workload->vgpu; + struct intel_vgpu_workload *workload = container_of(wa_ctx, + struct intel_vgpu_workload, + wa_ctx); + struct intel_vgpu *vgpu = workload->vgpu; struct drm_i915_gem_object *obj; int ret = 0; void *map; - obj = i915_gem_object_create(wa_ctx->workload->vgpu->gvt->dev_priv, + obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv, roundup(ctx_size + CACHELINE_BYTES, PAGE_SIZE)); if (IS_ERR(obj)) @@ -2739,11 +2738,11 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) goto unmap_src; } - ret = copy_gma_to_hva(wa_ctx->workload->vgpu, - wa_ctx->workload->vgpu->gtt.ggtt_mm, + ret = copy_gma_to_hva(workload->vgpu, + workload->vgpu->gtt.ggtt_mm, guest_gma, guest_gma + ctx_size, map); - if (ret) { + if (ret < 0) { gvt_vgpu_err("fail to copy guest indirect ctx\n"); goto unmap_src; } @@ -2778,7 +2777,10 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) { int ret; - struct intel_vgpu *vgpu = wa_ctx->workload->vgpu; + struct intel_vgpu_workload *workload = container_of(wa_ctx, + struct intel_vgpu_workload, + wa_ctx); + struct intel_vgpu *vgpu = workload->vgpu; if (wa_ctx->indirect_ctx.size == 0) return 0; diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 5419ae6ec633..e0261fcc5b50 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -161,8 +161,9 @@ static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = { #define DPCD_HEADER_SIZE 0xb +/* let the virtual display supports DP1.2 */ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = { - 0x11, 0x0a, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + 0x12, 0x014, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static void emulate_monitor_status_change(struct intel_vgpu *vgpu) @@ -172,26 +173,64 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) SDE_PORTC_HOTPLUG_CPT | SDE_PORTD_HOTPLUG_CPT); - if (IS_SKYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | SDE_PORTE_HOTPLUG_SPT); + vgpu_vreg(vgpu, SKL_FUSE_STATUS) |= + SKL_FUSE_DOWNLOAD_STATUS | + SKL_FUSE_PG0_DIST_STATUS | + SKL_FUSE_PG1_DIST_STATUS | + SKL_FUSE_PG2_DIST_STATUS; + vgpu_vreg(vgpu, LCPLL1_CTL) |= + LCPLL_PLL_ENABLE | + LCPLL_PLL_LOCK; + vgpu_vreg(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE; + + } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { - vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT; vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED; + vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= + ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | + TRANS_DDI_PORT_MASK); + vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= + (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | + (PORT_B << TRANS_DDI_PORT_SHIFT) | + TRANS_DDI_FUNC_ENABLE); + vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE; + vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE; + vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT; } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT; + vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= + ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | + TRANS_DDI_PORT_MASK); + vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= + (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | + (PORT_C << TRANS_DDI_PORT_SHIFT) | + TRANS_DDI_FUNC_ENABLE); + vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE; + vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE; vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED; } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; + vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= + ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | + TRANS_DDI_PORT_MASK); + vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= + (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | + (PORT_D << TRANS_DDI_PORT_SHIFT) | + TRANS_DDI_FUNC_ENABLE); + vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE; + vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE; vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; } - if (IS_SKYLAKE(dev_priv) && + if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT; } @@ -353,7 +392,7 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - if (IS_SKYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) clean_virtual_dp_monitor(vgpu, PORT_D); else clean_virtual_dp_monitor(vgpu, PORT_B); @@ -375,7 +414,7 @@ int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution) intel_vgpu_init_i2c_edid(vgpu); - if (IS_SKYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D, resolution); else diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index d186c157f65f..dca989eb2d42 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -56,8 +56,8 @@ static int context_switch_events[] = { static int ring_id_to_context_switch_event(int ring_id) { - if (WARN_ON(ring_id < RCS && ring_id > - ARRAY_SIZE(context_switch_events))) + if (WARN_ON(ring_id < RCS || + ring_id >= ARRAY_SIZE(context_switch_events))) return -EINVAL; return context_switch_events[ring_id]; @@ -394,9 +394,11 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) { - int ring_id = wa_ctx->workload->ring_id; - struct i915_gem_context *shadow_ctx = - wa_ctx->workload->vgpu->shadow_ctx; + struct intel_vgpu_workload *workload = container_of(wa_ctx, + struct intel_vgpu_workload, + wa_ctx); + int ring_id = workload->ring_id; + struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_gem_object *ctx_obj = shadow_ctx->engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; @@ -680,15 +682,12 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, CACHELINE_BYTES; workload->wa_ctx.per_ctx.guest_gma = per_ctx & PER_CTX_ADDR_MASK; - workload->wa_ctx.workload = workload; WARN_ON(workload->wa_ctx.indirect_ctx.size && !(per_ctx & 0x1)); } if (emulate_schedule_in) - memcpy(&workload->elsp_dwords, - &vgpu->execlist[ring_id].elsp_dwords, - sizeof(workload->elsp_dwords)); + workload->elsp_dwords = vgpu->execlist[ring_id].elsp_dwords; gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n", workload, ring_id, head, tail, start, ctl); diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index b832bea64e03..c6f0077f590d 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2224,7 +2224,8 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) gvt_dbg_core("init gtt\n"); - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { + if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) + || IS_KABYLAKE(gvt->dev_priv)) { gvt->gtt.pte_ops = &gen8_gtt_pte_ops; gvt->gtt.gma_ops = &gen8_gtt_gma_ops; gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table; @@ -2293,12 +2294,15 @@ void intel_gvt_clean_gtt(struct intel_gvt *gvt) void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; + struct drm_i915_private *dev_priv = gvt->dev_priv; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; u32 index; u32 offset; u32 num_entries; struct intel_gvt_gtt_entry e; + intel_runtime_pm_get(dev_priv); + memset(&e, 0, sizeof(struct intel_gvt_gtt_entry)); e.type = GTT_TYPE_GGTT_PTE; ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn); @@ -2313,6 +2317,8 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; for (offset = 0; offset < num_entries; offset++) ops->set_entry(NULL, &e, index + offset, false, 0, vgpu); + + intel_runtime_pm_put(dev_priv); } /** diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index ef3baa0c4754..7dea5e5d5567 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -108,7 +108,8 @@ static void init_device_info(struct intel_gvt *gvt) struct intel_gvt_device_info *info = &gvt->device_info; struct pci_dev *pdev = gvt->dev_priv->drm.pdev; - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { + if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) + || IS_KABYLAKE(gvt->dev_priv)) { info->max_support_vgpus = 8; info->cfg_space_size = 256; info->mmio_size = 2 * 1024 * 1024; @@ -145,6 +146,11 @@ static int gvt_service_thread(void *data) intel_gvt_emulate_vblank(gvt); mutex_unlock(&gvt->lock); } + + if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED, + (void *)&gvt->service_request)) { + intel_gvt_schedule(gvt); + } } return 0; @@ -198,6 +204,8 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) idr_destroy(&gvt->vgpu_idr); + intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); + kfree(dev_priv->gvt); dev_priv->gvt = NULL; } @@ -216,6 +224,7 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) int intel_gvt_init_device(struct drm_i915_private *dev_priv) { struct intel_gvt *gvt; + struct intel_vgpu *vgpu; int ret; /* @@ -288,6 +297,14 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) goto out_clean_types; } + vgpu = intel_gvt_create_idle_vgpu(gvt); + if (IS_ERR(vgpu)) { + ret = PTR_ERR(vgpu); + gvt_err("failed to create idle vgpu\n"); + goto out_clean_types; + } + gvt->idle_vgpu = vgpu; + gvt_dbg_core("gvt device initialization is done\n"); dev_priv->gvt = gvt; return 0; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index becae2fa3b29..930732e5c780 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -138,6 +138,10 @@ struct intel_vgpu_display { struct intel_vgpu_sbi sbi; }; +struct vgpu_sched_ctl { + int weight; +}; + struct intel_vgpu { struct intel_gvt *gvt; int id; @@ -147,6 +151,7 @@ struct intel_vgpu { bool failsafe; bool resetting; void *sched_data; + struct vgpu_sched_ctl sched_ctl; struct intel_vgpu_fence fence; struct intel_vgpu_gm gm; @@ -160,6 +165,7 @@ struct intel_vgpu { struct list_head workload_q_head[I915_NUM_ENGINES]; struct kmem_cache *workloads; atomic_t running_workload_num; + ktime_t last_ctx_submit_time; DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); struct i915_gem_context *shadow_ctx; @@ -215,6 +221,7 @@ struct intel_vgpu_type { unsigned int low_gm_size; unsigned int high_gm_size; unsigned int fence; + unsigned int weight; enum intel_vgpu_edid resolution; }; @@ -236,6 +243,7 @@ struct intel_gvt { DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS); struct intel_vgpu_type *types; unsigned int num_types; + struct intel_vgpu *idle_vgpu; struct task_struct *service_thread; wait_queue_head_t service_thread_wq; @@ -249,6 +257,7 @@ static inline struct intel_gvt *to_gvt(struct drm_i915_private *i915) enum { INTEL_GVT_REQUEST_EMULATE_VBLANK = 0, + INTEL_GVT_REQUEST_SCHED = 1, }; static inline void intel_gvt_request_service(struct intel_gvt *gvt, @@ -322,6 +331,8 @@ struct intel_vgpu_creation_params { __u64 resolution; __s32 primary; __u64 vgpu_id; + + __u32 weight; }; int intel_vgpu_alloc_resource(struct intel_vgpu *vgpu, @@ -376,6 +387,8 @@ static inline void intel_vgpu_write_pci_bar(struct intel_vgpu *vgpu, int intel_gvt_init_vgpu_types(struct intel_gvt *gvt); void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt); +struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt); +void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu); struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, struct intel_vgpu_type *type); void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 6da9ae1618e3..0ad1a508e2af 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -68,6 +68,8 @@ unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt) return D_BDW; else if (IS_SKYLAKE(gvt->dev_priv)) return D_SKL; + else if (IS_KABYLAKE(gvt->dev_priv)) + return D_KBL; return 0; } @@ -234,7 +236,8 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu, old = vgpu_vreg(vgpu, offset); new = CALC_MODE_MASK_REG(old, *(u32 *)p_data); - if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { + if (IS_SKYLAKE(vgpu->gvt->dev_priv) + || IS_KABYLAKE(vgpu->gvt->dev_priv)) { switch (offset) { case FORCEWAKE_RENDER_GEN9_REG: ack_reg_offset = FORCEWAKE_ACK_RENDER_GEN9_REG; @@ -823,8 +826,9 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu, write_vreg(vgpu, offset, p_data, bytes); data = vgpu_vreg(vgpu, offset); - if (IS_SKYLAKE(vgpu->gvt->dev_priv) && - offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) { + if ((IS_SKYLAKE(vgpu->gvt->dev_priv) + || IS_KABYLAKE(vgpu->gvt->dev_priv)) + && offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) { /* SKL DPB/C/D aux ctl register changed */ return 0; } else if (IS_BROADWELL(vgpu->gvt->dev_priv) && @@ -1311,7 +1315,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, switch (cmd) { case GEN9_PCODE_READ_MEM_LATENCY: - if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { + if (IS_SKYLAKE(vgpu->gvt->dev_priv) + || IS_KABYLAKE(vgpu->gvt->dev_priv)) { /** * "Read memory latency" command on gen9. * Below memory latency values are read @@ -1324,7 +1329,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, } break; case SKL_PCODE_CDCLK_CONTROL: - if (IS_SKYLAKE(vgpu->gvt->dev_priv)) + if (IS_SKYLAKE(vgpu->gvt->dev_priv) + || IS_KABYLAKE(vgpu->gvt->dev_priv)) *data0 = SKL_CDCLK_READY_FOR_CHANGE; break; case GEN6_PCODE_READ_RC6VIDS: @@ -1418,6 +1424,7 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, execlist->elsp_dwords.data[execlist->elsp_dwords.index] = data; if (execlist->elsp_dwords.index == 3) { + vgpu->last_ctx_submit_time = ktime_get(); ret = intel_vgpu_submit_execlist(vgpu, ring_id); if(ret) gvt_vgpu_err("fail submit workload on ring %d\n", @@ -2592,219 +2599,232 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write); MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL); - MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write); + MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + dp_aux_ch_ctl_mmio_write); + MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + dp_aux_ch_ctl_mmio_write); + MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + dp_aux_ch_ctl_mmio_write); - MMIO_D(HSW_PWR_WELL_BIOS, D_SKL); - MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL, NULL, skl_power_well_ctl_write); + MMIO_D(HSW_PWR_WELL_BIOS, D_SKL_PLUS); + MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL_PLUS, NULL, + skl_power_well_ctl_write); + MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL_PLUS, NULL, mailbox_write); MMIO_D(0xa210, D_SKL_PLUS); MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DH(0x4ddc, D_SKL, NULL, skl_misc_ctl_write); - MMIO_DH(0x42080, D_SKL, NULL, skl_misc_ctl_write); - MMIO_D(0x45504, D_SKL); - MMIO_D(0x45520, D_SKL); - MMIO_D(0x46000, D_SKL); - MMIO_DH(0x46010, D_SKL, NULL, skl_lcpll_write); - MMIO_DH(0x46014, D_SKL, NULL, skl_lcpll_write); - MMIO_D(0x6C040, D_SKL); - MMIO_D(0x6C048, D_SKL); - MMIO_D(0x6C050, D_SKL); - MMIO_D(0x6C044, D_SKL); - MMIO_D(0x6C04C, D_SKL); - MMIO_D(0x6C054, D_SKL); - MMIO_D(0x6c058, D_SKL); - MMIO_D(0x6c05c, D_SKL); - MMIO_DH(0X6c060, D_SKL, dpll_status_read, NULL); - - MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 1), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 1), D_SKL, NULL, pf_write); - - MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 1), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 1), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 1), D_SKL, NULL, pf_write); - - MMIO_DH(SKL_PS_CTRL(PIPE_A, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_CTRL(PIPE_A, 1), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_CTRL(PIPE_B, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_CTRL(PIPE_B, 1), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_CTRL(PIPE_C, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_CTRL(PIPE_C, 1), D_SKL, NULL, pf_write); - - MMIO_DH(PLANE_BUF_CFG(PIPE_A, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_A, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_A, 2), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_A, 3), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_BUF_CFG(PIPE_B, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_B, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_B, 2), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_B, 3), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_BUF_CFG(PIPE_C, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_C, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_C, 2), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_C, 3), D_SKL, NULL, NULL); - - MMIO_DH(CUR_BUF_CFG(PIPE_A), D_SKL, NULL, NULL); - MMIO_DH(CUR_BUF_CFG(PIPE_B), D_SKL, NULL, NULL); - MMIO_DH(CUR_BUF_CFG(PIPE_C), D_SKL, NULL, NULL); - - MMIO_F(PLANE_WM(PIPE_A, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(PLANE_WM(PIPE_A, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(PLANE_WM(PIPE_A, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - - MMIO_F(PLANE_WM(PIPE_B, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(PLANE_WM(PIPE_B, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(PLANE_WM(PIPE_B, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - - MMIO_F(PLANE_WM(PIPE_C, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(PLANE_WM(PIPE_C, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(PLANE_WM(PIPE_C, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - - MMIO_F(CUR_WM(PIPE_A, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(CUR_WM(PIPE_B, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(CUR_WM(PIPE_C, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - - MMIO_DH(PLANE_WM_TRANS(PIPE_A, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_WM_TRANS(PIPE_A, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_WM_TRANS(PIPE_A, 2), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_WM_TRANS(PIPE_B, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_WM_TRANS(PIPE_B, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_WM_TRANS(PIPE_B, 2), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_WM_TRANS(PIPE_C, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_WM_TRANS(PIPE_C, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_WM_TRANS(PIPE_C, 2), D_SKL, NULL, NULL); - - MMIO_DH(CUR_WM_TRANS(PIPE_A), D_SKL, NULL, NULL); - MMIO_DH(CUR_WM_TRANS(PIPE_B), D_SKL, NULL, NULL); - MMIO_DH(CUR_WM_TRANS(PIPE_C), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 2), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 3), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 2), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 3), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL, NULL, NULL); - - MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL, NULL, NULL); - - MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL, NULL, NULL); - - MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL, NULL, NULL); - - MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL, NULL, NULL); - - MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL, NULL, NULL); - - MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL, NULL, NULL); - - MMIO_D(0x70380, D_SKL); - MMIO_D(0x71380, D_SKL); - MMIO_D(0x72380, D_SKL); - MMIO_D(0x7039c, D_SKL); - - MMIO_F(0x80000, 0x3000, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_D(0x8f074, D_SKL); - MMIO_D(0x8f004, D_SKL); - MMIO_D(0x8f034, D_SKL); - - MMIO_D(0xb11c, D_SKL); - - MMIO_D(0x51000, D_SKL); - MMIO_D(0x6c00c, D_SKL); - - MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL); - MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL); - - MMIO_D(0xd08, D_SKL); - MMIO_DFH(0x20e0, D_SKL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, skl_misc_ctl_write); + MMIO_DH(0x42080, D_SKL_PLUS, NULL, skl_misc_ctl_write); + MMIO_D(0x45504, D_SKL_PLUS); + MMIO_D(0x45520, D_SKL_PLUS); + MMIO_D(0x46000, D_SKL_PLUS); + MMIO_DH(0x46010, D_SKL | D_KBL, NULL, skl_lcpll_write); + MMIO_DH(0x46014, D_SKL | D_KBL, NULL, skl_lcpll_write); + MMIO_D(0x6C040, D_SKL | D_KBL); + MMIO_D(0x6C048, D_SKL | D_KBL); + MMIO_D(0x6C050, D_SKL | D_KBL); + MMIO_D(0x6C044, D_SKL | D_KBL); + MMIO_D(0x6C04C, D_SKL | D_KBL); + MMIO_D(0x6C054, D_SKL | D_KBL); + MMIO_D(0x6c058, D_SKL | D_KBL); + MMIO_D(0x6c05c, D_SKL | D_KBL); + MMIO_DH(0X6c060, D_SKL | D_KBL, dpll_status_read, NULL); + + MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write); + + MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write); + + MMIO_DH(SKL_PS_CTRL(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_CTRL(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_CTRL(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_CTRL(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_CTRL(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_CTRL(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write); + + MMIO_DH(PLANE_BUF_CFG(PIPE_A, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_BUF_CFG(PIPE_B, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_BUF_CFG(PIPE_C, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(CUR_BUF_CFG(PIPE_A), D_SKL_PLUS, NULL, NULL); + MMIO_DH(CUR_BUF_CFG(PIPE_B), D_SKL_PLUS, NULL, NULL); + MMIO_DH(CUR_BUF_CFG(PIPE_C), D_SKL_PLUS, NULL, NULL); + + MMIO_F(PLANE_WM(PIPE_A, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(PLANE_WM(PIPE_A, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(PLANE_WM(PIPE_A, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + + MMIO_F(PLANE_WM(PIPE_B, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(PLANE_WM(PIPE_B, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(PLANE_WM(PIPE_B, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + + MMIO_F(PLANE_WM(PIPE_C, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(PLANE_WM(PIPE_C, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(PLANE_WM(PIPE_C, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + + MMIO_F(CUR_WM(PIPE_A, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(CUR_WM(PIPE_B, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(CUR_WM(PIPE_C, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_WM_TRANS(PIPE_A, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_WM_TRANS(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_WM_TRANS(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_WM_TRANS(PIPE_B, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_WM_TRANS(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_WM_TRANS(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_WM_TRANS(PIPE_C, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_WM_TRANS(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_WM_TRANS(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(CUR_WM_TRANS(PIPE_A), D_SKL_PLUS, NULL, NULL); + MMIO_DH(CUR_WM_TRANS(PIPE_B), D_SKL_PLUS, NULL, NULL); + MMIO_DH(CUR_WM_TRANS(PIPE_C), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL_PLUS, NULL, NULL); + + MMIO_D(0x70380, D_SKL_PLUS); + MMIO_D(0x71380, D_SKL_PLUS); + MMIO_D(0x72380, D_SKL_PLUS); + MMIO_D(0x7039c, D_SKL_PLUS); + + MMIO_F(0x80000, 0x3000, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_D(0x8f074, D_SKL | D_KBL); + MMIO_D(0x8f004, D_SKL | D_KBL); + MMIO_D(0x8f034, D_SKL | D_KBL); + + MMIO_D(0xb11c, D_SKL | D_KBL); + + MMIO_D(0x51000, D_SKL | D_KBL); + MMIO_D(0x6c00c, D_SKL_PLUS); + + MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); + MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); + + MMIO_D(0xd08, D_SKL_PLUS); + MMIO_DFH(0x20e0, D_SKL_PLUS, F_MODE_MASK, NULL, NULL); + MMIO_DFH(0x20ec, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* TRTT */ - MMIO_DFH(0x4de0, D_SKL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4de4, D_SKL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4de8, D_SKL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4dec, D_SKL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4df0, D_SKL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4df4, D_SKL, F_CMD_ACCESS, NULL, gen9_trtte_write); - MMIO_DH(0x4dfc, D_SKL, NULL, gen9_trtt_chicken_write); + MMIO_DFH(0x4de0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4de4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4de8, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4dec, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4df0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4df4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write); + MMIO_DH(0x4dfc, D_SKL | D_KBL, NULL, gen9_trtt_chicken_write); - MMIO_D(0x45008, D_SKL); + MMIO_D(0x45008, D_SKL | D_KBL); - MMIO_D(0x46430, D_SKL); + MMIO_D(0x46430, D_SKL | D_KBL); - MMIO_D(0x46520, D_SKL); + MMIO_D(0x46520, D_SKL | D_KBL); - MMIO_D(0xc403c, D_SKL); - MMIO_D(0xb004, D_SKL); + MMIO_D(0xc403c, D_SKL | D_KBL); + MMIO_D(0xb004, D_SKL_PLUS); MMIO_DH(DMA_CTRL, D_SKL_PLUS, NULL, dma_ctrl_write); - MMIO_D(0x65900, D_SKL); - MMIO_D(0x1082c0, D_SKL); - MMIO_D(0x4068, D_SKL); - MMIO_D(0x67054, D_SKL); - MMIO_D(0x6e560, D_SKL); - MMIO_D(0x6e554, D_SKL); - MMIO_D(0x2b20, D_SKL); - MMIO_D(0x65f00, D_SKL); - MMIO_D(0x65f08, D_SKL); - MMIO_D(0x320f0, D_SKL); - - MMIO_DFH(_REG_VCS2_EXCC, D_SKL, F_CMD_ACCESS, NULL, NULL); - MMIO_D(0x70034, D_SKL); - MMIO_D(0x71034, D_SKL); - MMIO_D(0x72034, D_SKL); - - MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL); - MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL); - MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL); - MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL); - MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL); - MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL); - - MMIO_D(0x44500, D_SKL); + MMIO_D(0x65900, D_SKL_PLUS); + MMIO_D(0x1082c0, D_SKL | D_KBL); + MMIO_D(0x4068, D_SKL | D_KBL); + MMIO_D(0x67054, D_SKL | D_KBL); + MMIO_D(0x6e560, D_SKL | D_KBL); + MMIO_D(0x6e554, D_SKL | D_KBL); + MMIO_D(0x2b20, D_SKL | D_KBL); + MMIO_D(0x65f00, D_SKL | D_KBL); + MMIO_D(0x65f08, D_SKL | D_KBL); + MMIO_D(0x320f0, D_SKL | D_KBL); + + MMIO_DFH(_REG_VCS2_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_REG_VECS_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_D(0x70034, D_SKL_PLUS); + MMIO_D(0x71034, D_SKL_PLUS); + MMIO_D(0x72034, D_SKL_PLUS); + + MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL_PLUS); + MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL_PLUS); + MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL_PLUS); + MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL_PLUS); + MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL_PLUS); + MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL_PLUS); + + MMIO_D(0x44500, D_SKL_PLUS); MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL, F_MODE_MASK | F_CMD_ACCESS, + MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL | D_KBL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + + MMIO_D(0x4ab8, D_KBL); + MMIO_D(0x940c, D_SKL_PLUS); + MMIO_D(0x2248, D_SKL_PLUS | D_KBL); + MMIO_D(0x4ab0, D_SKL | D_KBL); + MMIO_D(0x20d4, D_SKL | D_KBL); + return 0; } @@ -2881,7 +2901,8 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) ret = init_broadwell_mmio_info(gvt); if (ret) goto err; - } else if (IS_SKYLAKE(dev_priv)) { + } else if (IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv)) { ret = init_broadwell_mmio_info(gvt); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index 92bb247e3478..9d6812f0957f 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -580,7 +580,7 @@ static void gen8_init_irq( SET_BIT_INFO(irq, 4, PRIMARY_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C); SET_BIT_INFO(irq, 5, SPRITE_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C); - } else if (IS_SKYLAKE(gvt->dev_priv)) { + } else if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) { SET_BIT_INFO(irq, 25, AUX_CHANNEL_B, INTEL_GVT_IRQ_INFO_DE_PORT); SET_BIT_INFO(irq, 26, AUX_CHANNEL_C, INTEL_GVT_IRQ_INFO_DE_PORT); SET_BIT_INFO(irq, 27, AUX_CHANNEL_D, INTEL_GVT_IRQ_INFO_DE_PORT); @@ -690,7 +690,8 @@ int intel_gvt_init_irq(struct intel_gvt *gvt) gvt_dbg_core("init irq framework\n"); - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { + if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) + || IS_KABYLAKE(gvt->dev_priv)) { irq->ops = &gen8_irq_ops; irq->irq_map = gen8_irq_map; } else { diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index e466259034e2..1ae0b4083ce1 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -295,10 +295,12 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev, return 0; return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" - "fence: %d\nresolution: %s\n", + "fence: %d\nresolution: %s\n" + "weight: %d\n", BYTES_TO_MB(type->low_gm_size), BYTES_TO_MB(type->high_gm_size), - type->fence, vgpu_edid_str(type->resolution)); + type->fence, vgpu_edid_str(type->resolution), + type->weight); } static MDEV_TYPE_ATTR_RO(available_instances); @@ -1150,8 +1152,40 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, return 0; } +static ssize_t +vgpu_id_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct mdev_device *mdev = mdev_from_dev(dev); + + if (mdev) { + struct intel_vgpu *vgpu = (struct intel_vgpu *) + mdev_get_drvdata(mdev); + return sprintf(buf, "%d\n", vgpu->id); + } + return sprintf(buf, "\n"); +} + +static DEVICE_ATTR_RO(vgpu_id); + +static struct attribute *intel_vgpu_attrs[] = { + &dev_attr_vgpu_id.attr, + NULL +}; + +static const struct attribute_group intel_vgpu_group = { + .name = "intel_vgpu", + .attrs = intel_vgpu_attrs, +}; + +static const struct attribute_group *intel_vgpu_groups[] = { + &intel_vgpu_group, + NULL, +}; + static const struct mdev_parent_ops intel_vgpu_ops = { .supported_type_groups = intel_vgpu_type_groups, + .mdev_attr_groups = intel_vgpu_groups, .create = intel_vgpu_create, .remove = intel_vgpu_remove, diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index a3a027025cd0..7edd66f38ef9 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -44,20 +44,21 @@ struct intel_vgpu; #define D_HSW (1 << 2) #define D_BDW (1 << 3) #define D_SKL (1 << 4) +#define D_KBL (1 << 5) -#define D_GEN9PLUS (D_SKL) -#define D_GEN8PLUS (D_BDW | D_SKL) -#define D_GEN75PLUS (D_HSW | D_BDW | D_SKL) -#define D_GEN7PLUS (D_IVB | D_HSW | D_BDW | D_SKL) +#define D_GEN9PLUS (D_SKL | D_KBL) +#define D_GEN8PLUS (D_BDW | D_SKL | D_KBL) +#define D_GEN75PLUS (D_HSW | D_BDW | D_SKL | D_KBL) +#define D_GEN7PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL) -#define D_SKL_PLUS (D_SKL) -#define D_BDW_PLUS (D_BDW | D_SKL) -#define D_HSW_PLUS (D_HSW | D_BDW | D_SKL) -#define D_IVB_PLUS (D_IVB | D_HSW | D_BDW | D_SKL) +#define D_SKL_PLUS (D_SKL | D_KBL) +#define D_BDW_PLUS (D_BDW | D_SKL | D_KBL) +#define D_HSW_PLUS (D_HSW | D_BDW | D_SKL | D_KBL) +#define D_IVB_PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL) #define D_PRE_BDW (D_SNB | D_IVB | D_HSW) #define D_PRE_SKL (D_SNB | D_IVB | D_HSW | D_BDW) -#define D_ALL (D_SNB | D_IVB | D_HSW | D_BDW | D_SKL) +#define D_ALL (D_SNB | D_IVB | D_HSW | D_BDW | D_SKL | D_KBL) struct intel_gvt_mmio_info { u32 offset; diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 0beb83563b08..c6e7972ac21d 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -44,7 +44,7 @@ struct render_mmio { u32 value; }; -static struct render_mmio gen8_render_mmio_list[] = { +static struct render_mmio gen8_render_mmio_list[] __cacheline_aligned = { {RCS, _MMIO(0x229c), 0xffff, false}, {RCS, _MMIO(0x2248), 0x0, false}, {RCS, _MMIO(0x2098), 0x0, false}, @@ -75,7 +75,7 @@ static struct render_mmio gen8_render_mmio_list[] = { {BCS, _MMIO(0x22028), 0x0, false}, }; -static struct render_mmio gen9_render_mmio_list[] = { +static struct render_mmio gen9_render_mmio_list[] __cacheline_aligned = { {RCS, _MMIO(0x229c), 0xffff, false}, {RCS, _MMIO(0x2248), 0x0, false}, {RCS, _MMIO(0x2098), 0x0, false}, @@ -126,6 +126,18 @@ static struct render_mmio gen9_render_mmio_list[] = { {VCS2, _MMIO(0x1c028), 0xffff, false}, {VECS, _MMIO(0x1a028), 0xffff, false}, + + {RCS, _MMIO(0x7304), 0xffff, true}, + {RCS, _MMIO(0x2248), 0x0, false}, + {RCS, _MMIO(0x940c), 0x0, false}, + {RCS, _MMIO(0x4ab8), 0x0, false}, + + {RCS, _MMIO(0x4ab0), 0x0, false}, + {RCS, _MMIO(0x20d4), 0x0, false}, + + {RCS, _MMIO(0xb004), 0x0, false}, + {RCS, _MMIO(0x20a0), 0x0, false}, + {RCS, _MMIO(0x20e4), 0xffff, false}, }; static u32 gen9_render_mocs[I915_NUM_ENGINES][64]; @@ -159,7 +171,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) */ fw = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ | FW_REG_WRITE); - if (ring_id == RCS && IS_SKYLAKE(dev_priv)) + if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) fw |= FORCEWAKE_RENDER; intel_uncore_forcewake_get(dev_priv, fw); @@ -192,9 +204,6 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id) if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; - if (!IS_SKYLAKE(dev_priv)) - return; - offset.reg = regs[ring_id]; for (i = 0; i < 64; i++) { gen9_render_mocs[ring_id][i] = I915_READ(offset); @@ -230,9 +239,6 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; - if (!IS_SKYLAKE(dev_priv)) - return; - offset.reg = regs[ring_id]; for (i = 0; i < 64; i++) { vgpu_vreg(vgpu, offset) = I915_READ(offset); @@ -265,7 +271,8 @@ void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id) u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { + if (IS_SKYLAKE(vgpu->gvt->dev_priv) + || IS_KABYLAKE(vgpu->gvt->dev_priv)) { mmio = gen9_render_mmio_list; array_size = ARRAY_SIZE(gen9_render_mmio_list); load_mocs(vgpu, ring_id); @@ -312,7 +319,7 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id) u32 v; int i, array_size; - if (IS_SKYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { mmio = gen9_render_mmio_list; array_size = ARRAY_SIZE(gen9_render_mmio_list); restore_mocs(vgpu, ring_id); diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 34b9acdf3479..79ba4b3440aa 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -47,19 +47,92 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu) return false; } +struct vgpu_sched_data { + struct list_head lru_list; + struct intel_vgpu *vgpu; + + ktime_t sched_in_time; + ktime_t sched_out_time; + ktime_t sched_time; + ktime_t left_ts; + ktime_t allocated_ts; + + struct vgpu_sched_ctl sched_ctl; +}; + +struct gvt_sched_data { + struct intel_gvt *gvt; + struct hrtimer timer; + unsigned long period; + struct list_head lru_runq_head; +}; + +static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu) +{ + ktime_t delta_ts; + struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data; + + delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time; + + vgpu_data->sched_time += delta_ts; + vgpu_data->left_ts -= delta_ts; +} + +#define GVT_TS_BALANCE_PERIOD_MS 100 +#define GVT_TS_BALANCE_STAGE_NUM 10 + +static void gvt_balance_timeslice(struct gvt_sched_data *sched_data) +{ + struct vgpu_sched_data *vgpu_data; + struct list_head *pos; + static uint64_t stage_check; + int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM; + + /* The timeslice accumulation reset at stage 0, which is + * allocated again without adding previous debt. + */ + if (stage == 0) { + int total_weight = 0; + ktime_t fair_timeslice; + + list_for_each(pos, &sched_data->lru_runq_head) { + vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); + total_weight += vgpu_data->sched_ctl.weight; + } + + list_for_each(pos, &sched_data->lru_runq_head) { + vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); + fair_timeslice = ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS) * + vgpu_data->sched_ctl.weight / + total_weight; + + vgpu_data->allocated_ts = fair_timeslice; + vgpu_data->left_ts = vgpu_data->allocated_ts; + } + } else { + list_for_each(pos, &sched_data->lru_runq_head) { + vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); + + /* timeslice for next 100ms should add the left/debt + * slice of previous stages. + */ + vgpu_data->left_ts += vgpu_data->allocated_ts; + } + } +} + static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; enum intel_engine_id i; struct intel_engine_cs *engine; + struct vgpu_sched_data *vgpu_data; + ktime_t cur_time; /* no target to schedule */ if (!scheduler->next_vgpu) return; - gvt_dbg_sched("try to schedule next vgpu %d\n", - scheduler->next_vgpu->id); - /* * after the flag is set, workload dispatch thread will * stop dispatching workload for current vgpu @@ -68,14 +141,18 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) /* still have uncompleted workload? */ for_each_engine(engine, gvt->dev_priv, i) { - if (scheduler->current_workload[i]) { - gvt_dbg_sched("still have running workload\n"); + if (scheduler->current_workload[i]) return; - } } - gvt_dbg_sched("switch to next vgpu %d\n", - scheduler->next_vgpu->id); + cur_time = ktime_get(); + if (scheduler->current_vgpu) { + vgpu_data = scheduler->current_vgpu->sched_data; + vgpu_data->sched_out_time = cur_time; + vgpu_update_timeslice(scheduler->current_vgpu); + } + vgpu_data = scheduler->next_vgpu->sched_data; + vgpu_data->sched_in_time = cur_time; /* switch current vgpu */ scheduler->current_vgpu = scheduler->next_vgpu; @@ -88,97 +165,106 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) wake_up(&scheduler->waitq[i]); } -struct tbs_vgpu_data { - struct list_head list; - struct intel_vgpu *vgpu; - /* put some per-vgpu sched stats here */ -}; - -struct tbs_sched_data { - struct intel_gvt *gvt; - struct delayed_work work; - unsigned long period; - struct list_head runq_head; -}; - -#define GVT_DEFAULT_TIME_SLICE (msecs_to_jiffies(1)) - -static void tbs_sched_func(struct work_struct *work) +static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data) { - struct tbs_sched_data *sched_data = container_of(work, - struct tbs_sched_data, work.work); - struct tbs_vgpu_data *vgpu_data; - - struct intel_gvt *gvt = sched_data->gvt; - struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - + struct vgpu_sched_data *vgpu_data; struct intel_vgpu *vgpu = NULL; - struct list_head *pos, *head; - - mutex_lock(&gvt->lock); - - /* no vgpu or has already had a target */ - if (list_empty(&sched_data->runq_head) || scheduler->next_vgpu) - goto out; - - if (scheduler->current_vgpu) { - vgpu_data = scheduler->current_vgpu->sched_data; - head = &vgpu_data->list; - } else { - head = &sched_data->runq_head; - } + struct list_head *head = &sched_data->lru_runq_head; + struct list_head *pos; /* search a vgpu with pending workload */ list_for_each(pos, head) { - if (pos == &sched_data->runq_head) - continue; - vgpu_data = container_of(pos, struct tbs_vgpu_data, list); + vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); if (!vgpu_has_pending_workload(vgpu_data->vgpu)) continue; - vgpu = vgpu_data->vgpu; - break; + /* Return the vGPU only if it has time slice left */ + if (vgpu_data->left_ts > 0) { + vgpu = vgpu_data->vgpu; + break; + } } + return vgpu; +} + +/* in nanosecond */ +#define GVT_DEFAULT_TIME_SLICE 1000000 + +static void tbs_sched_func(struct gvt_sched_data *sched_data) +{ + struct intel_gvt *gvt = sched_data->gvt; + struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; + struct vgpu_sched_data *vgpu_data; + struct intel_vgpu *vgpu = NULL; + static uint64_t timer_check; + + if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS)) + gvt_balance_timeslice(sched_data); + + /* no active vgpu or has already had a target */ + if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu) + goto out; + + vgpu = find_busy_vgpu(sched_data); if (vgpu) { scheduler->next_vgpu = vgpu; - gvt_dbg_sched("pick next vgpu %d\n", vgpu->id); + + /* Move the last used vGPU to the tail of lru_list */ + vgpu_data = vgpu->sched_data; + list_del_init(&vgpu_data->lru_list); + list_add_tail(&vgpu_data->lru_list, + &sched_data->lru_runq_head); + } else { + scheduler->next_vgpu = gvt->idle_vgpu; } out: - if (scheduler->next_vgpu) { - gvt_dbg_sched("try to schedule next vgpu %d\n", - scheduler->next_vgpu->id); + if (scheduler->next_vgpu) try_to_schedule_next_vgpu(gvt); - } +} - /* - * still have vgpu on runq - * or last schedule haven't finished due to running workload - */ - if (!list_empty(&sched_data->runq_head) || scheduler->next_vgpu) - schedule_delayed_work(&sched_data->work, sched_data->period); +void intel_gvt_schedule(struct intel_gvt *gvt) +{ + struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; + mutex_lock(&gvt->lock); + tbs_sched_func(sched_data); mutex_unlock(&gvt->lock); } +static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data) +{ + struct gvt_sched_data *data; + + data = container_of(timer_data, struct gvt_sched_data, timer); + + intel_gvt_request_service(data->gvt, INTEL_GVT_REQUEST_SCHED); + + hrtimer_add_expires_ns(&data->timer, data->period); + + return HRTIMER_RESTART; +} + static int tbs_sched_init(struct intel_gvt *gvt) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - struct tbs_sched_data *data; + struct gvt_sched_data *data; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; - INIT_LIST_HEAD(&data->runq_head); - INIT_DELAYED_WORK(&data->work, tbs_sched_func); + INIT_LIST_HEAD(&data->lru_runq_head); + hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + data->timer.function = tbs_timer_fn; data->period = GVT_DEFAULT_TIME_SLICE; data->gvt = gvt; scheduler->sched_data = data; + return 0; } @@ -186,25 +272,28 @@ static void tbs_sched_clean(struct intel_gvt *gvt) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - struct tbs_sched_data *data = scheduler->sched_data; + struct gvt_sched_data *data = scheduler->sched_data; + + hrtimer_cancel(&data->timer); - cancel_delayed_work(&data->work); kfree(data); scheduler->sched_data = NULL; } static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) { - struct tbs_vgpu_data *data; + struct vgpu_sched_data *data; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; + data->sched_ctl.weight = vgpu->sched_ctl.weight; data->vgpu = vgpu; - INIT_LIST_HEAD(&data->list); + INIT_LIST_HEAD(&data->lru_list); vgpu->sched_data = data; + return 0; } @@ -216,21 +305,24 @@ static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu) static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) { - struct tbs_sched_data *sched_data = vgpu->gvt->scheduler.sched_data; - struct tbs_vgpu_data *vgpu_data = vgpu->sched_data; + struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data; + struct vgpu_sched_data *vgpu_data = vgpu->sched_data; - if (!list_empty(&vgpu_data->list)) + if (!list_empty(&vgpu_data->lru_list)) return; - list_add_tail(&vgpu_data->list, &sched_data->runq_head); - schedule_delayed_work(&sched_data->work, 0); + list_add_tail(&vgpu_data->lru_list, &sched_data->lru_runq_head); + + if (!hrtimer_active(&sched_data->timer)) + hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(), + sched_data->period), HRTIMER_MODE_ABS); } static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu) { - struct tbs_vgpu_data *vgpu_data = vgpu->sched_data; + struct vgpu_sched_data *vgpu_data = vgpu->sched_data; - list_del_init(&vgpu_data->list); + list_del_init(&vgpu_data->lru_list); } static struct intel_gvt_sched_policy_ops tbs_schedule_ops = { diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.h b/drivers/gpu/drm/i915/gvt/sched_policy.h index bb8b9097e41a..ba00a5f7455f 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.h +++ b/drivers/gpu/drm/i915/gvt/sched_policy.h @@ -43,6 +43,8 @@ struct intel_gvt_sched_policy_ops { void (*stop_schedule)(struct intel_vgpu *vgpu); }; +void intel_gvt_schedule(struct intel_gvt *gvt); + int intel_gvt_init_sched_policy(struct intel_gvt *gvt); void intel_gvt_clean_sched_policy(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index a44782412f2c..bada32b33237 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -249,7 +249,7 @@ out: workload->status = ret; if (!IS_ERR_OR_NULL(rq)) - i915_add_request_no_flush(rq); + i915_add_request(rq); else engine->context_unpin(engine, shadow_ctx); @@ -279,11 +279,8 @@ static struct intel_vgpu_workload *pick_next_workload( goto out; } - if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id))) { - gvt_dbg_sched("ring id %d stop - no available workload\n", - ring_id); + if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id))) goto out; - } /* * still have current workload, maybe the workload disptacher @@ -453,7 +450,8 @@ static int workload_thread(void *priv) struct intel_vgpu_workload *workload = NULL; struct intel_vgpu *vgpu = NULL; int ret; - bool need_force_wake = IS_SKYLAKE(gvt->dev_priv); + bool need_force_wake = IS_SKYLAKE(gvt->dev_priv) + || IS_KABYLAKE(gvt->dev_priv); DEFINE_WAIT_FUNC(wait, woken_wake_function); kfree(p); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 2833dfa8c9ae..2cd725c0573e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -67,7 +67,6 @@ struct shadow_per_ctx { }; struct intel_shadow_wa_ctx { - struct intel_vgpu_workload *workload; struct shadow_indirect_ctx indirect_ctx; struct shadow_per_ctx per_ctx; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 649ef280cc9a..6e3cbd8caec2 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -64,18 +64,28 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); } +#define VGPU_MAX_WEIGHT 16 +#define VGPU_WEIGHT(vgpu_num) \ + (VGPU_MAX_WEIGHT / (vgpu_num)) + static struct { unsigned int low_mm; unsigned int high_mm; unsigned int fence; + + /* A vGPU with a weight of 8 will get twice as much GPU as a vGPU + * with a weight of 4 on a contended host, different vGPU type has + * different weight set. Legal weights range from 1 to 16. + */ + unsigned int weight; enum intel_vgpu_edid edid; char *name; } vgpu_types[] = { /* Fixed vGPU type table */ - { MB_TO_BYTES(64), MB_TO_BYTES(384), 4, GVT_EDID_1024_768, "8" }, - { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" }, - { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" }, - { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" }, + { MB_TO_BYTES(64), MB_TO_BYTES(384), 4, VGPU_WEIGHT(8), GVT_EDID_1024_768, "8" }, + { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, VGPU_WEIGHT(4), GVT_EDID_1920_1200, "4" }, + { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, VGPU_WEIGHT(2), GVT_EDID_1920_1200, "2" }, + { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, VGPU_WEIGHT(1), GVT_EDID_1920_1200, "1" }, }; /** @@ -120,6 +130,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) gvt->types[i].low_gm_size = vgpu_types[i].low_mm; gvt->types[i].high_gm_size = vgpu_types[i].high_mm; gvt->types[i].fence = vgpu_types[i].fence; + + if (vgpu_types[i].weight < 1 || + vgpu_types[i].weight > VGPU_MAX_WEIGHT) + return -EINVAL; + + gvt->types[i].weight = vgpu_types[i].weight; gvt->types[i].resolution = vgpu_types[i].edid; gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm, high_avail / vgpu_types[i].high_mm); @@ -131,11 +147,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) sprintf(gvt->types[i].name, "GVTg_V5_%s", vgpu_types[i].name); - gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u res %s\n", + gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u weight %u res %s\n", i, gvt->types[i].name, gvt->types[i].avail_instance, gvt->types[i].low_gm_size, gvt->types[i].high_gm_size, gvt->types[i].fence, + gvt->types[i].weight, vgpu_edid_str(gvt->types[i].resolution)); } @@ -250,6 +267,59 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) mutex_unlock(&gvt->lock); } +#define IDLE_VGPU_IDR 0 + +/** + * intel_gvt_create_idle_vgpu - create an idle virtual GPU + * @gvt: GVT device + * + * This function is called when user wants to create an idle virtual GPU. + * + * Returns: + * pointer to intel_vgpu, error pointer if failed. + */ +struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt) +{ + struct intel_vgpu *vgpu; + enum intel_engine_id i; + int ret; + + vgpu = vzalloc(sizeof(*vgpu)); + if (!vgpu) + return ERR_PTR(-ENOMEM); + + vgpu->id = IDLE_VGPU_IDR; + vgpu->gvt = gvt; + + for (i = 0; i < I915_NUM_ENGINES; i++) + INIT_LIST_HEAD(&vgpu->workload_q_head[i]); + + ret = intel_vgpu_init_sched_policy(vgpu); + if (ret) + goto out_free_vgpu; + + vgpu->active = false; + + return vgpu; + +out_free_vgpu: + vfree(vgpu); + return ERR_PTR(ret); +} + +/** + * intel_gvt_destroy_vgpu - destroy an idle virtual GPU + * @vgpu: virtual GPU + * + * This function is called when user wants to destroy an idle virtual GPU. + * + */ +void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu) +{ + intel_vgpu_clean_sched_policy(vgpu); + vfree(vgpu); +} + static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, struct intel_vgpu_creation_params *param) { @@ -266,13 +336,15 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, mutex_lock(&gvt->lock); - ret = idr_alloc(&gvt->vgpu_idr, vgpu, 1, GVT_MAX_VGPU, GFP_KERNEL); + ret = idr_alloc(&gvt->vgpu_idr, vgpu, IDLE_VGPU_IDR + 1, GVT_MAX_VGPU, + GFP_KERNEL); if (ret < 0) goto out_free_vgpu; vgpu->id = ret; vgpu->handle = param->handle; vgpu->gvt = gvt; + vgpu->sched_ctl.weight = param->weight; bitmap_zero(vgpu->tlb_handle_pending, I915_NUM_ENGINES); intel_vgpu_init_cfg_space(vgpu, param->primary); @@ -358,6 +430,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, param.low_gm_sz = type->low_gm_size; param.high_gm_sz = type->high_gm_size; param.fence_sz = type->fence; + param.weight = type->weight; param.resolution = type->resolution; /* XXX current param based on MB */ diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 21b1cd917d81..7af100f84410 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1279,11 +1279,17 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, * space. Parsing should be faster in some cases this way. */ batch_end = cmd + (batch_len / sizeof(*batch_end)); - while (cmd < batch_end) { + do { u32 length; - if (*cmd == MI_BATCH_BUFFER_END) + if (*cmd == MI_BATCH_BUFFER_END) { + if (needs_clflush_after) { + void *ptr = ptr_mask_bits(shadow_batch_obj->mm.mapping); + drm_clflush_virt_range(ptr, + (void *)(cmd + 1) - ptr); + } break; + } desc = find_cmd(engine, *cmd, desc, &default_desc); if (!desc) { @@ -1323,17 +1329,14 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, } cmd += length; - } - - if (cmd >= batch_end) { - DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); - ret = -EINVAL; - } + if (cmd >= batch_end) { + DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); + ret = -EINVAL; + break; + } + } while (1); - if (ret == 0 && needs_clflush_after) - drm_clflush_virt_range(shadow_batch_obj->mm.mapping, batch_len); i915_gem_object_unpin_map(shadow_batch_obj); - return ret; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fa69d72fdcb9..d689e511744e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -27,7 +27,7 @@ */ #include <linux/debugfs.h> -#include <linux/list_sort.h> +#include <linux/sort.h> #include "intel_drv.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) @@ -35,30 +35,21 @@ static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) return to_i915(node->minor->dev); } -/* As the drm_debugfs_init() routines are called before dev->dev_private is - * allocated we need to hook into the minor for release. */ -static int -drm_add_fake_info_node(struct drm_minor *minor, - struct dentry *ent, - const void *key) +static __always_inline void seq_print_param(struct seq_file *m, + const char *name, + const char *type, + const void *x) { - struct drm_info_node *node; - - node = kmalloc(sizeof(*node), GFP_KERNEL); - if (node == NULL) { - debugfs_remove(ent); - return -ENOMEM; - } - - node->minor = minor; - node->dent = ent; - node->info_ent = (void *)key; - - mutex_lock(&minor->debugfs_lock); - list_add(&node->list, &minor->debugfs_list); - mutex_unlock(&minor->debugfs_lock); - - return 0; + if (!__builtin_strcmp(type, "bool")) + seq_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); + else if (!__builtin_strcmp(type, "int")) + seq_printf(m, "i915.%s=%d\n", name, *(const int *)x); + else if (!__builtin_strcmp(type, "unsigned int")) + seq_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "char *")) + seq_printf(m, "i915.%s=%s\n", name, *(const char **)x); + else + BUILD_BUG(); } static int i915_capabilities(struct seq_file *m, void *data) @@ -69,10 +60,17 @@ static int i915_capabilities(struct seq_file *m, void *data) seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv)); seq_printf(m, "platform: %s\n", intel_platform_name(info->platform)); seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv)); + #define PRINT_FLAG(x) seq_printf(m, #x ": %s\n", yesno(info->x)) DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); #undef PRINT_FLAG + kernel_param_lock(THIS_MODULE); +#define PRINT_PARAM(T, x) seq_print_param(m, #x, #T, &i915.x); + I915_PARAMS_FOR_EACH(PRINT_PARAM); +#undef PRINT_PARAM + kernel_param_unlock(THIS_MODULE); + return 0; } @@ -206,13 +204,12 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits); } -static int obj_rank_by_stolen(void *priv, - struct list_head *A, struct list_head *B) +static int obj_rank_by_stolen(const void *A, const void *B) { - struct drm_i915_gem_object *a = - container_of(A, struct drm_i915_gem_object, obj_exec_link); - struct drm_i915_gem_object *b = - container_of(B, struct drm_i915_gem_object, obj_exec_link); + const struct drm_i915_gem_object *a = + *(const struct drm_i915_gem_object **)A; + const struct drm_i915_gem_object *b = + *(const struct drm_i915_gem_object **)B; if (a->stolen->start < b->stolen->start) return -1; @@ -225,49 +222,60 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; + struct drm_i915_gem_object **objects; struct drm_i915_gem_object *obj; u64 total_obj_size, total_gtt_size; - LIST_HEAD(stolen); - int count, ret; + unsigned long total, count, n; + int ret; + + total = READ_ONCE(dev_priv->mm.object_count); + objects = drm_malloc_ab(total, sizeof(*objects)); + if (!objects) + return -ENOMEM; ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret) - return ret; + goto out; total_obj_size = total_gtt_size = count = 0; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { + if (count == total) + break; + if (obj->stolen == NULL) continue; - list_add(&obj->obj_exec_link, &stolen); - + objects[count++] = obj; total_obj_size += obj->base.size; total_gtt_size += i915_gem_obj_total_ggtt_size(obj); - count++; + } list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { + if (count == total) + break; + if (obj->stolen == NULL) continue; - list_add(&obj->obj_exec_link, &stolen); - + objects[count++] = obj; total_obj_size += obj->base.size; - count++; } - list_sort(NULL, &stolen, obj_rank_by_stolen); + + sort(objects, count, sizeof(*objects), obj_rank_by_stolen, NULL); + seq_puts(m, "Stolen:\n"); - while (!list_empty(&stolen)) { - obj = list_first_entry(&stolen, typeof(*obj), obj_exec_link); + for (n = 0; n < count; n++) { seq_puts(m, " "); - describe_obj(m, obj); + describe_obj(m, objects[n]); seq_putc(m, '\n'); - list_del_init(&obj->obj_exec_link); } - mutex_unlock(&dev->struct_mutex); - - seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n", + seq_printf(m, "Total %lu objects, %llu bytes, %llu GTT size\n", count, total_obj_size, total_gtt_size); - return 0; + + mutex_unlock(&dev->struct_mutex); +out: + drm_free_large(objects); + return ret; } struct file_stats { @@ -454,7 +462,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) dpy_count, dpy_size); seq_printf(m, "%llu [%llu] gtt total\n", - ggtt->base.total, ggtt->mappable_end - ggtt->base.start); + ggtt->base.total, ggtt->mappable_end); seq_putc(m, '\n'); print_batch_pool_stats(m, dev_priv); @@ -482,7 +490,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mutex_lock(&dev->struct_mutex); request = list_first_entry_or_null(&file_priv->mm.request_list, struct drm_i915_gem_request, - client_list); + client_link); rcu_read_lock(); task = pid_task(request && request->ctx->pid ? request->ctx->pid : file->pid, @@ -702,14 +710,14 @@ static void i915_ring_seqno_info(struct seq_file *m, seq_printf(m, "Current sequence (%s): %x\n", engine->name, intel_engine_get_seqno(engine)); - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); seq_printf(m, "Waiting (%s): %s [%d] on %x\n", engine->name, w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); } static int i915_gem_seqno_info(struct seq_file *m, void *data) @@ -853,10 +861,22 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(VLV_IIR_RW)); seq_printf(m, "Display IMR:\t%08x\n", I915_READ(VLV_IMR)); - for_each_pipe(dev_priv, pipe) + for_each_pipe(dev_priv, pipe) { + enum intel_display_power_domain power_domain; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, + power_domain)) { + seq_printf(m, "Pipe %c power disabled\n", + pipe_name(pipe)); + continue; + } + seq_printf(m, "Pipe %c stat:\t%08x\n", pipe_name(pipe), I915_READ(PIPESTAT(pipe))); + intel_display_power_put(dev_priv, power_domain); + } seq_printf(m, "Master IER:\t%08x\n", I915_READ(VLV_MASTER_IER)); @@ -954,101 +974,99 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) } #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) - -static ssize_t -i915_error_state_write(struct file *filp, - const char __user *ubuf, - size_t cnt, - loff_t *ppos) +static ssize_t gpu_state_read(struct file *file, char __user *ubuf, + size_t count, loff_t *pos) { - struct i915_error_state_file_priv *error_priv = filp->private_data; - - DRM_DEBUG_DRIVER("Resetting error state\n"); - i915_destroy_error_state(error_priv->i915); + struct i915_gpu_state *error = file->private_data; + struct drm_i915_error_state_buf str; + ssize_t ret; + loff_t tmp; - return cnt; -} - -static int i915_error_state_open(struct inode *inode, struct file *file) -{ - struct drm_i915_private *dev_priv = inode->i_private; - struct i915_error_state_file_priv *error_priv; + if (!error) + return 0; - error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL); - if (!error_priv) - return -ENOMEM; + ret = i915_error_state_buf_init(&str, error->i915, count, *pos); + if (ret) + return ret; - error_priv->i915 = dev_priv; + ret = i915_error_state_to_str(&str, error); + if (ret) + goto out; - i915_error_state_get(&dev_priv->drm, error_priv); + tmp = 0; + ret = simple_read_from_buffer(ubuf, count, &tmp, str.buf, str.bytes); + if (ret < 0) + goto out; - file->private_data = error_priv; + *pos = str.start + ret; +out: + i915_error_state_buf_release(&str); + return ret; +} +static int gpu_state_release(struct inode *inode, struct file *file) +{ + i915_gpu_state_put(file->private_data); return 0; } -static int i915_error_state_release(struct inode *inode, struct file *file) +static int i915_gpu_info_open(struct inode *inode, struct file *file) { - struct i915_error_state_file_priv *error_priv = file->private_data; + struct drm_i915_private *i915 = inode->i_private; + struct i915_gpu_state *gpu; - i915_error_state_put(error_priv); - kfree(error_priv); + intel_runtime_pm_get(i915); + gpu = i915_capture_gpu_state(i915); + intel_runtime_pm_put(i915); + if (!gpu) + return -ENOMEM; + file->private_data = gpu; return 0; } -static ssize_t i915_error_state_read(struct file *file, char __user *userbuf, - size_t count, loff_t *pos) +static const struct file_operations i915_gpu_info_fops = { + .owner = THIS_MODULE, + .open = i915_gpu_info_open, + .read = gpu_state_read, + .llseek = default_llseek, + .release = gpu_state_release, +}; + +static ssize_t +i915_error_state_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) { - struct i915_error_state_file_priv *error_priv = file->private_data; - struct drm_i915_error_state_buf error_str; - loff_t tmp_pos = 0; - ssize_t ret_count = 0; - int ret; + struct i915_gpu_state *error = filp->private_data; - ret = i915_error_state_buf_init(&error_str, error_priv->i915, - count, *pos); - if (ret) - return ret; + if (!error) + return 0; - ret = i915_error_state_to_str(&error_str, error_priv); - if (ret) - goto out; + DRM_DEBUG_DRIVER("Resetting error state\n"); + i915_reset_error_state(error->i915); - ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos, - error_str.buf, - error_str.bytes); + return cnt; +} - if (ret_count < 0) - ret = ret_count; - else - *pos = error_str.start + ret_count; -out: - i915_error_state_buf_release(&error_str); - return ret ?: ret_count; +static int i915_error_state_open(struct inode *inode, struct file *file) +{ + file->private_data = i915_first_error_state(inode->i_private); + return 0; } static const struct file_operations i915_error_state_fops = { .owner = THIS_MODULE, .open = i915_error_state_open, - .read = i915_error_state_read, + .read = gpu_state_read, .write = i915_error_state_write, .llseek = default_llseek, - .release = i915_error_state_release, + .release = gpu_state_release, }; - #endif static int -i915_next_seqno_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno); - return 0; -} - -static int i915_next_seqno_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; @@ -1066,13 +1084,12 @@ i915_next_seqno_set(void *data, u64 val) } DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, - i915_next_seqno_get, i915_next_seqno_set, + NULL, i915_next_seqno_set, "0x%llx\n"); static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; int ret = 0; intel_runtime_pm_get(dev_priv); @@ -1135,10 +1152,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) } /* RPSTAT1 is in the GT power well */ - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - goto out; - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); reqf = I915_READ(GEN6_RPNSWREQ); @@ -1173,7 +1186,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) cagf = intel_gpu_freq(dev_priv, cagf); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - mutex_unlock(&dev->struct_mutex); if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { pm_ier = I915_READ(GEN6_PMIER); @@ -1190,7 +1202,8 @@ static int i915_frequency_info(struct seq_file *m, void *unused) } seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n", pm_ier, pm_imr, pm_isr, pm_iir, pm_mask); - seq_printf(m, "pm_intr_keep: 0x%08x\n", dev_priv->rps.pm_intr_keep); + seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", + dev_priv->rps.pm_intrmsk_mbz); seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); seq_printf(m, "Render p-state ratio: %d\n", (gt_perf_status & (IS_GEN9(dev_priv) ? 0x1ff00 : 0xff00)) >> 8); @@ -1224,21 +1237,18 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -1262,11 +1272,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_puts(m, "no P-state info available\n"); } - seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk_freq); + seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk.hw.cdclk); seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq); seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq); -out: intel_runtime_pm_put(dev_priv); return ret; } @@ -1309,16 +1318,18 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) enum intel_engine_id id; if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) - seq_printf(m, "Wedged\n"); - if (test_bit(I915_RESET_IN_PROGRESS, &dev_priv->gpu_error.flags)) - seq_printf(m, "Reset in progress\n"); + seq_puts(m, "Wedged\n"); + if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) + seq_puts(m, "Reset in progress: struct_mutex backoff\n"); + if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags)) + seq_puts(m, "Reset in progress: reset handoff to waiter\n"); if (waitqueue_active(&dev_priv->gpu_error.wait_queue)) - seq_printf(m, "Waiter holding struct mutex\n"); + seq_puts(m, "Waiter holding struct mutex\n"); if (waitqueue_active(&dev_priv->gpu_error.reset_queue)) - seq_printf(m, "struct_mutex blocked for reset\n"); + seq_puts(m, "struct_mutex blocked for reset\n"); if (!i915.enable_hangcheck) { - seq_printf(m, "Hangcheck disabled\n"); + seq_puts(m, "Hangcheck disabled\n"); return 0; } @@ -1333,35 +1344,40 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_runtime_pm_put(dev_priv); - if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) { - seq_printf(m, "Hangcheck active, fires in %dms\n", + if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer)) + seq_printf(m, "Hangcheck active, timer fires in %dms\n", jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires - jiffies)); - } else - seq_printf(m, "Hangcheck inactive\n"); + else if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) + seq_puts(m, "Hangcheck active, work pending\n"); + else + seq_puts(m, "Hangcheck inactive\n"); + + seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); for_each_engine(engine, dev_priv, id) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node *rb; seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x]\n", + seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine)); + intel_engine_last_submit(engine), + engine->timeline->inflight_seqnos); seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings)), yesno(engine->hangcheck.stalled)); - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); seq_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, @@ -1393,14 +1409,10 @@ static int ironlake_drpc_info(struct seq_file *m) u32 rgvmodectl, rstdbyctl; u16 crstandvid; - intel_runtime_pm_get(dev_priv); - rgvmodectl = I915_READ(MEMMODECTL); rstdbyctl = I915_READ(RSTDBYCTL); crstandvid = I915_READ16(CRSTANDVID); - intel_runtime_pm_put(dev_priv); - seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN)); seq_printf(m, "Boost freq: %d\n", (rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >> @@ -1450,33 +1462,38 @@ static int ironlake_drpc_info(struct seq_file *m) static int i915_forcewake_domains(struct seq_file *m, void *data) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_i915_private *i915 = node_to_i915(m->private); struct intel_uncore_forcewake_domain *fw_domain; + unsigned int tmp; - spin_lock_irq(&dev_priv->uncore.lock); - for_each_fw_domain(fw_domain, dev_priv) { + for_each_fw_domain(fw_domain, i915, tmp) seq_printf(m, "%s.wake_count = %u\n", intel_uncore_forcewake_domain_to_str(fw_domain->id), - fw_domain->wake_count); - } - spin_unlock_irq(&dev_priv->uncore.lock); + READ_ONCE(fw_domain->wake_count)); return 0; } +static void print_rc6_res(struct seq_file *m, + const char *title, + const i915_reg_t reg) +{ + struct drm_i915_private *dev_priv = node_to_i915(m->private); + + seq_printf(m, "%s %u (%llu us)\n", + title, I915_READ(reg), + intel_rc6_residency_us(dev_priv, reg)); +} + static int vlv_drpc_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); u32 rpmodectl1, rcctl1, pw_status; - intel_runtime_pm_get(dev_priv); - pw_status = I915_READ(VLV_GTLC_PW_STATUS); rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); - intel_runtime_pm_put(dev_priv); - seq_printf(m, "Video Turbo Mode: %s\n", yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO)); seq_printf(m, "Turbo enabled: %s\n", @@ -1494,10 +1511,8 @@ static int vlv_drpc_info(struct seq_file *m) seq_printf(m, "Media Power Well: %s\n", (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down"); - seq_printf(m, "Render RC6 residency since boot: %u\n", - I915_READ(VLV_GT_RENDER_RC6)); - seq_printf(m, "Media RC6 residency since boot: %u\n", - I915_READ(VLV_GT_MEDIA_RC6)); + print_rc6_res(m, "Render RC6 residency since boot:", VLV_GT_RENDER_RC6); + print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6); return i915_forcewake_domains(m, NULL); } @@ -1505,21 +1520,12 @@ static int vlv_drpc_info(struct seq_file *m) static int gen6_drpc_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0; u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; unsigned forcewake_count; - int count = 0, ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - intel_runtime_pm_get(dev_priv); - - spin_lock_irq(&dev_priv->uncore.lock); - forcewake_count = dev_priv->uncore.fw_domain[FW_DOMAIN_ID_RENDER].wake_count; - spin_unlock_irq(&dev_priv->uncore.lock); + int count = 0; + forcewake_count = READ_ONCE(dev_priv->uncore.fw_domain[FW_DOMAIN_ID_RENDER].wake_count); if (forcewake_count) { seq_puts(m, "RC information inaccurate because somebody " "holds a forcewake reference \n"); @@ -1539,13 +1545,11 @@ static int gen6_drpc_info(struct seq_file *m) gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE); gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); } - mutex_unlock(&dev->struct_mutex); + mutex_lock(&dev_priv->rps.hw_lock); sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); mutex_unlock(&dev_priv->rps.hw_lock); - intel_runtime_pm_put(dev_priv); - seq_printf(m, "Video Turbo Mode: %s\n", yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO)); seq_printf(m, "HW control enabled: %s\n", @@ -1601,14 +1605,11 @@ static int gen6_drpc_info(struct seq_file *m) } /* Not exactly sure what this is */ - seq_printf(m, "RC6 \"Locked to RPn\" residency since boot: %u\n", - I915_READ(GEN6_GT_GFX_RC6_LOCKED)); - seq_printf(m, "RC6 residency since boot: %u\n", - I915_READ(GEN6_GT_GFX_RC6)); - seq_printf(m, "RC6+ residency since boot: %u\n", - I915_READ(GEN6_GT_GFX_RC6p)); - seq_printf(m, "RC6++ residency since boot: %u\n", - I915_READ(GEN6_GT_GFX_RC6pp)); + print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", + GEN6_GT_GFX_RC6_LOCKED); + print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6); + print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); + print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); seq_printf(m, "RC6 voltage: %dmV\n", GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff))); @@ -1622,13 +1623,20 @@ static int gen6_drpc_info(struct seq_file *m) static int i915_drpc_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + int err; + + intel_runtime_pm_get(dev_priv); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - return vlv_drpc_info(m); + err = vlv_drpc_info(m); else if (INTEL_GEN(dev_priv) >= 6) - return gen6_drpc_info(m); + err = gen6_drpc_info(m); else - return ironlake_drpc_info(m); + err = ironlake_drpc_info(m); + + intel_runtime_pm_put(dev_priv); + + return err; } static int i915_frontbuffer_tracking(struct seq_file *m, void *unused) @@ -1749,7 +1757,9 @@ static int i915_sr_status(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); - if (HAS_PCH_SPLIT(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) + /* no global SR status; inspect per-plane WM */; + else if (HAS_PCH_SPLIT(dev_priv)) sr_enabled = I915_READ(WM1_LP_ILK) & WM1_LP_SR_EN; else if (IS_I965GM(dev_priv) || IS_G4X(dev_priv) || IS_I945G(dev_priv) || IS_I945GM(dev_priv)) @@ -1814,7 +1824,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) if (ret) goto out; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER; @@ -1834,8 +1844,8 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) &ia_freq); seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", intel_gpu_freq(dev_priv, (gpu_freq * - (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1))), + (IS_GEN9_BC(dev_priv) ? + GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); } @@ -1929,9 +1939,8 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) { - seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)", - ring->space, ring->head, ring->tail, - ring->last_retired_head); + seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u)", + ring->space, ring->head, ring->tail); } static int i915_context_status(struct seq_file *m, void *unused) @@ -2328,10 +2337,10 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", rps_power_to_str(dev_priv->rps.power)); seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", - 100 * rpup / rpupei, + rpup && rpupei ? 100 * rpup / rpupei : 0, dev_priv->rps.up_threshold); seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", - 100 * rpdown / rpdownei, + rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, dev_priv->rps.down_threshold); } else { seq_puts(m, "\nRPS Autotuning inactive\n"); @@ -2377,7 +2386,9 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "\tRSA: offset is %d; size = %d\n", huc_fw->rsa_offset, huc_fw->rsa_size); + intel_runtime_pm_get(dev_priv); seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); + intel_runtime_pm_put(dev_priv); return 0; } @@ -2409,6 +2420,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "\tRSA: offset is %d; size = %d\n", guc_fw->rsa_offset, guc_fw->rsa_size); + intel_runtime_pm_get(dev_priv); + tmp = I915_READ(GUC_STATUS); seq_printf(m, "\nGuC status 0x%08x:\n", tmp); @@ -2422,6 +2435,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) for (i = 0; i < 16; i++) seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i))); + intel_runtime_pm_put(dev_priv); + return 0; } @@ -2459,9 +2474,9 @@ static void i915_guc_client_info(struct seq_file *m, enum intel_engine_id id; uint64_t tot = 0; - seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", - client->priority, client->ctx_index, client->proc_desc_offset); - seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n", + seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n", + client->priority, client->stage_id, client->proc_desc_offset); + seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n", client->doorbell_id, client->doorbell_offset, client->doorbell_cookie); seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n", client->wq_size, client->wq_offset, client->wq_tail); @@ -2496,7 +2511,7 @@ static int i915_guc_info(struct seq_file *m, void *data) } seq_printf(m, "Doorbell map:\n"); - seq_printf(m, "\t%*pb\n", GUC_MAX_DOORBELLS, guc->doorbell_bitmap); + seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap); seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline); seq_printf(m, "GuC total action count: %llu\n", guc->action_count); @@ -2703,12 +2718,14 @@ static int i915_sink_crc(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; struct intel_connector *connector; + struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp = NULL; int ret; u8 crc[6]; drm_modeset_lock_all(dev); - for_each_intel_connector(dev, connector) { + drm_connector_list_iter_begin(dev, &conn_iter); + for_each_intel_connector_iter(connector, &conn_iter) { struct drm_crtc *crtc; if (!connector->base.state->best_encoder) @@ -2734,6 +2751,7 @@ static int i915_sink_crc(struct seq_file *m, void *data) } ret = -ENODEV; out: + drm_connector_list_iter_end(&conn_iter); drm_modeset_unlock_all(dev); return ret; } @@ -2803,15 +2821,10 @@ static int i915_power_domain_info(struct seq_file *m, void *unused) seq_printf(m, "%-25s %d\n", power_well->name, power_well->count); - for (power_domain = 0; power_domain < POWER_DOMAIN_NUM; - power_domain++) { - if (!(BIT(power_domain) & power_well->domains)) - continue; - + for_each_power_domain(power_domain, power_well->domains) seq_printf(m, " %-23s %d\n", intel_display_power_domain_str(power_domain), power_domains->domain_use_count[power_domain]); - } } mutex_unlock(&power_domains->lock); @@ -3175,9 +3188,9 @@ static int i915_display_info(struct seq_file *m, void *unused) struct drm_device *dev = &dev_priv->drm; struct intel_crtc *crtc; struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; intel_runtime_pm_get(dev_priv); - drm_modeset_lock_all(dev); seq_printf(m, "CRTC info\n"); seq_printf(m, "---------\n"); for_each_intel_crtc(dev, crtc) { @@ -3185,6 +3198,7 @@ static int i915_display_info(struct seq_file *m, void *unused) struct intel_crtc_state *pipe_config; int x, y; + drm_modeset_lock(&crtc->base.mutex, NULL); pipe_config = to_intel_crtc_state(crtc->base.state); seq_printf(m, "CRTC %d: pipe: %c, active=%s, (size=%dx%d), dither=%s, bpp=%d\n", @@ -3209,15 +3223,19 @@ static int i915_display_info(struct seq_file *m, void *unused) seq_printf(m, "\tunderrun reporting: cpu=%s pch=%s \n", yesno(!crtc->cpu_fifo_underrun_disabled), yesno(!crtc->pch_fifo_underrun_disabled)); + drm_modeset_unlock(&crtc->base.mutex); } seq_printf(m, "\n"); seq_printf(m, "Connector info\n"); seq_printf(m, "--------------\n"); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + mutex_lock(&dev->mode_config.mutex); + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) intel_connector_info(m, connector); - } - drm_modeset_unlock_all(dev); + drm_connector_list_iter_end(&conn_iter); + mutex_unlock(&dev->mode_config.mutex); + intel_runtime_pm_put(dev_priv); return 0; @@ -3231,6 +3249,11 @@ static int i915_engine_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); + seq_printf(m, "GT awake? %s\n", + yesno(dev_priv->gt.awake)); + seq_printf(m, "Global active requests: %d\n", + dev_priv->gt.active_requests); + for_each_engine(engine, dev_priv, id) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct drm_i915_gem_request *rq; @@ -3238,11 +3261,12 @@ static int i915_engine_info(struct seq_file *m, void *unused) u64 addr; seq_printf(m, "%s\n", engine->name); - seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", + seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), + engine->timeline->inflight_seqnos); rcu_read_lock(); @@ -3320,15 +3344,21 @@ static int i915_engine_info(struct seq_file *m, void *unused) rcu_read_lock(); rq = READ_ONCE(engine->execlist_port[0].request); - if (rq) - print_request(m, rq, "\t\tELSP[0] "); - else + if (rq) { + seq_printf(m, "\t\tELSP[0] count=%d, ", + engine->execlist_port[0].count); + print_request(m, rq, "rq: "); + } else { seq_printf(m, "\t\tELSP[0] idle\n"); + } rq = READ_ONCE(engine->execlist_port[1].request); - if (rq) - print_request(m, rq, "\t\tELSP[1] "); - else + if (rq) { + seq_printf(m, "\t\tELSP[1] count=%d, ", + engine->execlist_port[1].count); + print_request(m, rq, "rq: "); + } else { seq_printf(m, "\t\tELSP[1] idle\n"); + } rcu_read_unlock(); spin_lock_irq(&engine->timeline->lock); @@ -3346,14 +3376,14 @@ static int i915_engine_info(struct seq_file *m, void *unused) I915_READ(RING_PP_DIR_DCLV(engine))); } - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); seq_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); seq_puts(m, "\n"); } @@ -3538,13 +3568,16 @@ static void drrs_status_per_crtc(struct seq_file *m, struct i915_drrs *drrs = &dev_priv->drrs; int vrefresh = 0; struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; - drm_for_each_connector(connector, dev) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { if (connector->state->crtc != &intel_crtc->base) continue; seq_printf(m, "%s:\n", connector->name); } + drm_connector_list_iter_end(&conn_iter); if (dev_priv->vbt.drrs_type == STATIC_DRRS_SUPPORT) seq_puts(m, "\tVBT: DRRS_type: Static"); @@ -3630,9 +3663,10 @@ static int i915_dp_mst_info(struct seq_file *m, void *unused) struct intel_encoder *intel_encoder; struct intel_digital_port *intel_dig_port; struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; - drm_modeset_lock_all(dev); - drm_for_each_connector(connector, dev) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) continue; @@ -3648,7 +3682,8 @@ static int i915_dp_mst_info(struct seq_file *m, void *unused) port_name(intel_dig_port->port)); drm_dp_mst_dump_topology(m, &intel_dig_port->dp.mst_mgr); } - drm_modeset_unlock_all(dev); + drm_connector_list_iter_end(&conn_iter); + return 0; } @@ -3660,14 +3695,12 @@ static ssize_t i915_displayport_test_active_write(struct file *file, int status = 0; struct drm_device *dev; struct drm_connector *connector; - struct list_head *connector_list; + struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp; int val = 0; dev = ((struct seq_file *)file->private_data)->private; - connector_list = &dev->mode_config.connector_list; - if (len == 0) return 0; @@ -3683,7 +3716,8 @@ static ssize_t i915_displayport_test_active_write(struct file *file, input_buffer[len] = '\0'; DRM_DEBUG_DRIVER("Copied %d bytes from user\n", (unsigned int)len); - list_for_each_entry(connector, connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) continue; @@ -3693,7 +3727,7 @@ static ssize_t i915_displayport_test_active_write(struct file *file, intel_dp = enc_to_intel_dp(connector->encoder); status = kstrtoint(input_buffer, 10, &val); if (status < 0) - goto out; + break; DRM_DEBUG_DRIVER("Got %d for test active\n", val); /* To prevent erroneous activation of the compliance * testing code, only accept an actual value of 1 here @@ -3704,6 +3738,7 @@ static ssize_t i915_displayport_test_active_write(struct file *file, intel_dp->compliance.test_active = 0; } } + drm_connector_list_iter_end(&conn_iter); out: kfree(input_buffer); if (status < 0) @@ -3717,10 +3752,11 @@ static int i915_displayport_test_active_show(struct seq_file *m, void *data) { struct drm_device *dev = m->private; struct drm_connector *connector; - struct list_head *connector_list = &dev->mode_config.connector_list; + struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp; - list_for_each_entry(connector, connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) continue; @@ -3735,6 +3771,7 @@ static int i915_displayport_test_active_show(struct seq_file *m, void *data) } else seq_puts(m, "0"); } + drm_connector_list_iter_end(&conn_iter); return 0; } @@ -3761,10 +3798,11 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) { struct drm_device *dev = m->private; struct drm_connector *connector; - struct list_head *connector_list = &dev->mode_config.connector_list; + struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp; - list_for_each_entry(connector, connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) continue; @@ -3772,10 +3810,23 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) if (connector->status == connector_status_connected && connector->encoder != NULL) { intel_dp = enc_to_intel_dp(connector->encoder); - seq_printf(m, "%lx", intel_dp->compliance.test_data.edid); + if (intel_dp->compliance.test_type == + DP_TEST_LINK_EDID_READ) + seq_printf(m, "%lx", + intel_dp->compliance.test_data.edid); + else if (intel_dp->compliance.test_type == + DP_TEST_LINK_VIDEO_PATTERN) { + seq_printf(m, "hdisplay: %d\n", + intel_dp->compliance.test_data.hdisplay); + seq_printf(m, "vdisplay: %d\n", + intel_dp->compliance.test_data.vdisplay); + seq_printf(m, "bpc: %u\n", + intel_dp->compliance.test_data.bpc); + } } else seq_puts(m, "0"); } + drm_connector_list_iter_end(&conn_iter); return 0; } @@ -3800,10 +3851,11 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data) { struct drm_device *dev = m->private; struct drm_connector *connector; - struct list_head *connector_list = &dev->mode_config.connector_list; + struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp; - list_for_each_entry(connector, connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) continue; @@ -3815,6 +3867,7 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data) } else seq_puts(m, "0"); } + drm_connector_list_iter_end(&conn_iter); return 0; } @@ -4076,7 +4129,9 @@ i915_wedged_get(void *data, u64 *val) static int i915_wedged_set(void *data, u64 val) { - struct drm_i915_private *dev_priv = data; + struct drm_i915_private *i915 = data; + struct intel_engine_cs *engine; + unsigned int tmp; /* * There is no safeguard against this debugfs entry colliding @@ -4086,11 +4141,19 @@ i915_wedged_set(void *data, u64 val) * while it is writing to 'i915_wedged' */ - if (i915_reset_in_progress(&dev_priv->gpu_error)) + if (i915_reset_backoff(&i915->gpu_error)) return -EAGAIN; - i915_handle_error(dev_priv, val, - "Manually setting wedged to %llu", val); + for_each_engine_masked(engine, i915, val, tmp) { + engine->hangcheck.seqno = intel_engine_get_seqno(engine); + engine->hangcheck.stalled = true; + } + + i915_handle_error(i915, val, "Manually setting wedged to %llu", val); + + wait_on_bit(&i915->gpu_error.flags, + I915_RESET_HANDOFF, + TASK_UNINTERRUPTIBLE); return 0; } @@ -4100,6 +4163,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, "%llu\n"); static int +fault_irq_set(struct drm_i915_private *i915, + unsigned long *irq, + unsigned long val) +{ + int err; + + err = mutex_lock_interruptible(&i915->drm.struct_mutex); + if (err) + return err; + + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_INTERRUPTIBLE); + if (err) + goto err_unlock; + + *irq = val; + mutex_unlock(&i915->drm.struct_mutex); + + /* Flush idle worker to disarm irq */ + while (flush_delayed_work(&i915->gt.idle_work)) + ; + + return 0; + +err_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int i915_ring_missed_irq_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; @@ -4111,18 +4205,9 @@ i915_ring_missed_irq_get(void *data, u64 *val) static int i915_ring_missed_irq_set(void *data, u64 val) { - struct drm_i915_private *dev_priv = data; - struct drm_device *dev = &dev_priv->drm; - int ret; + struct drm_i915_private *i915 = data; - /* Lock against concurrent debugfs callers */ - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - dev_priv->gpu_error.missed_irq_rings = val; - mutex_unlock(&dev->struct_mutex); - - return 0; + return fault_irq_set(i915, &i915->gpu_error.missed_irq_rings, val); } DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops, @@ -4142,13 +4227,12 @@ i915_ring_test_irq_get(void *data, u64 *val) static int i915_ring_test_irq_set(void *data, u64 val) { - struct drm_i915_private *dev_priv = data; + struct drm_i915_private *i915 = data; - val &= INTEL_INFO(dev_priv)->ring_mask; + val &= INTEL_INFO(i915)->ring_mask; DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val); - dev_priv->gpu_error.test_irq_rings = val; - return 0; + return fault_irq_set(i915, &i915->gpu_error.test_irq_rings, val); } DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops, @@ -4160,11 +4244,13 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops, #define DROP_RETIRE 0x4 #define DROP_ACTIVE 0x8 #define DROP_FREED 0x10 +#define DROP_SHRINK_ALL 0x20 #define DROP_ALL (DROP_UNBOUND | \ DROP_BOUND | \ DROP_RETIRE | \ DROP_ACTIVE | \ - DROP_FREED) + DROP_FREED | \ + DROP_SHRINK_ALL) static int i915_drop_caches_get(void *data, u64 *val) { @@ -4196,15 +4282,20 @@ i915_drop_caches_set(void *data, u64 val) goto unlock; } - if (val & (DROP_RETIRE | DROP_ACTIVE)) + if (val & DROP_RETIRE) i915_gem_retire_requests(dev_priv); + lockdep_set_current_reclaim_state(GFP_KERNEL); if (val & DROP_BOUND) i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND); if (val & DROP_UNBOUND) i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_UNBOUND); + if (val & DROP_SHRINK_ALL) + i915_gem_shrink_all(dev_priv); + lockdep_clear_current_reclaim_state(); + unlock: mutex_unlock(&dev->struct_mutex); @@ -4263,7 +4354,8 @@ i915_max_freq_set(void *data, u64 val) dev_priv->rps.max_freq_softlimit = val; - intel_set_rps(dev_priv, val); + if (intel_set_rps(dev_priv, val)) + DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); mutex_unlock(&dev_priv->rps.hw_lock); @@ -4318,7 +4410,8 @@ i915_min_freq_set(void *data, u64 val) dev_priv->rps.min_freq_softlimit = val; - intel_set_rps(dev_priv, val); + if (intel_set_rps(dev_priv, val)) + DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); mutex_unlock(&dev_priv->rps.hw_lock); @@ -4444,7 +4537,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; @@ -4593,37 +4686,81 @@ static const struct file_operations i915_forcewake_fops = { .release = i915_forcewake_release, }; -static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor) +static int i915_hpd_storm_ctl_show(struct seq_file *m, void *data) { - struct dentry *ent; + struct drm_i915_private *dev_priv = m->private; + struct i915_hotplug *hotplug = &dev_priv->hotplug; - ent = debugfs_create_file("i915_forcewake_user", - S_IRUSR, - root, to_i915(minor->dev), - &i915_forcewake_fops); - if (!ent) - return -ENOMEM; + seq_printf(m, "Threshold: %d\n", hotplug->hpd_storm_threshold); + seq_printf(m, "Detected: %s\n", + yesno(delayed_work_pending(&hotplug->reenable_work))); - return drm_add_fake_info_node(minor, ent, &i915_forcewake_fops); + return 0; } -static int i915_debugfs_create(struct dentry *root, - struct drm_minor *minor, - const char *name, - const struct file_operations *fops) +static ssize_t i915_hpd_storm_ctl_write(struct file *file, + const char __user *ubuf, size_t len, + loff_t *offp) { - struct dentry *ent; + struct seq_file *m = file->private_data; + struct drm_i915_private *dev_priv = m->private; + struct i915_hotplug *hotplug = &dev_priv->hotplug; + unsigned int new_threshold; + int i; + char *newline; + char tmp[16]; - ent = debugfs_create_file(name, - S_IRUGO | S_IWUSR, - root, to_i915(minor->dev), - fops); - if (!ent) - return -ENOMEM; + if (len >= sizeof(tmp)) + return -EINVAL; + + if (copy_from_user(tmp, ubuf, len)) + return -EFAULT; + + tmp[len] = '\0'; + + /* Strip newline, if any */ + newline = strchr(tmp, '\n'); + if (newline) + *newline = '\0'; + + if (strcmp(tmp, "reset") == 0) + new_threshold = HPD_STORM_DEFAULT_THRESHOLD; + else if (kstrtouint(tmp, 10, &new_threshold) != 0) + return -EINVAL; + + if (new_threshold > 0) + DRM_DEBUG_KMS("Setting HPD storm detection threshold to %d\n", + new_threshold); + else + DRM_DEBUG_KMS("Disabling HPD storm detection\n"); + + spin_lock_irq(&dev_priv->irq_lock); + hotplug->hpd_storm_threshold = new_threshold; + /* Reset the HPD storm stats so we don't accidentally trigger a storm */ + for_each_hpd_pin(i) + hotplug->stats[i].count = 0; + spin_unlock_irq(&dev_priv->irq_lock); + + /* Re-enable hpd immediately if we were in an irq storm */ + flush_delayed_work(&dev_priv->hotplug.reenable_work); + + return len; +} - return drm_add_fake_info_node(minor, ent, fops); +static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file) +{ + return single_open(file, i915_hpd_storm_ctl_show, inode->i_private); } +static const struct file_operations i915_hpd_storm_ctl_fops = { + .owner = THIS_MODULE, + .open = i915_hpd_storm_ctl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = i915_hpd_storm_ctl_write +}; + static const struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, @@ -4690,6 +4827,7 @@ static const struct i915_debugfs_files { {"i915_gem_drop_caches", &i915_drop_caches_fops}, #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) {"i915_error_state", &i915_error_state_fops}, + {"i915_gpu_info", &i915_gpu_info_fops}, #endif {"i915_next_seqno", &i915_next_seqno_fops}, {"i915_display_crc_ctl", &i915_display_crc_ctl_fops}, @@ -4700,28 +4838,34 @@ static const struct i915_debugfs_files { {"i915_dp_test_data", &i915_displayport_test_data_fops}, {"i915_dp_test_type", &i915_displayport_test_type_fops}, {"i915_dp_test_active", &i915_displayport_test_active_fops}, - {"i915_guc_log_control", &i915_guc_log_control_fops} + {"i915_guc_log_control", &i915_guc_log_control_fops}, + {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) { struct drm_minor *minor = dev_priv->drm.primary; + struct dentry *ent; int ret, i; - ret = i915_forcewake_create(minor->debugfs_root, minor); - if (ret) - return ret; + ent = debugfs_create_file("i915_forcewake_user", S_IRUSR, + minor->debugfs_root, to_i915(minor->dev), + &i915_forcewake_fops); + if (!ent) + return -ENOMEM; ret = intel_pipe_crc_create(minor); if (ret) return ret; for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) { - ret = i915_debugfs_create(minor->debugfs_root, minor, - i915_debugfs_files[i].name, + ent = debugfs_create_file(i915_debugfs_files[i].name, + S_IRUGO | S_IWUSR, + minor->debugfs_root, + to_i915(minor->dev), i915_debugfs_files[i].fops); - if (ret) - return ret; + if (!ent) + return -ENOMEM; } return drm_debugfs_create_files(i915_debugfs_list, @@ -4729,27 +4873,6 @@ int i915_debugfs_register(struct drm_i915_private *dev_priv) minor->debugfs_root, minor); } -void i915_debugfs_unregister(struct drm_i915_private *dev_priv) -{ - struct drm_minor *minor = dev_priv->drm.primary; - int i; - - drm_debugfs_remove_files(i915_debugfs_list, - I915_DEBUGFS_ENTRIES, minor); - - drm_debugfs_remove_files((struct drm_info_list *)&i915_forcewake_fops, - 1, minor); - - intel_pipe_crc_cleanup(minor); - - for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) { - struct drm_info_list *info_list = - (struct drm_info_list *)i915_debugfs_files[i].fops; - - drm_debugfs_remove_files(info_list, 1, minor); - } -} - struct dpcd_block { /* DPCD dump start address. */ unsigned int offset; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5c089b3c2a7e..3036d4835b0f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -43,6 +43,7 @@ #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_atomic_helper.h> #include <drm/i915_drm.h> #include "i915_drv.h" @@ -316,10 +317,9 @@ static int i915_getparam(struct drm_device *dev, void *data, value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool; break; case I915_PARAM_HUC_STATUS: - /* The register is already force-woken. We dont need - * any rpm here - */ + intel_runtime_pm_get(dev_priv); value = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; + intel_runtime_pm_put(dev_priv); break; case I915_PARAM_MMAP_GTT_VERSION: /* Though we've started our numbering from 1, and so class all @@ -348,6 +348,8 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_HANDLE_LUT: case I915_PARAM_HAS_COHERENT_PHYS_GTT: case I915_PARAM_HAS_EXEC_SOFTPIN: + case I915_PARAM_HAS_EXEC_ASYNC: + case I915_PARAM_HAS_EXEC_FENCE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -547,6 +549,7 @@ static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { static void i915_gem_fini(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->drm.struct_mutex); + intel_uc_fini_hw(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_context_fini(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); @@ -565,9 +568,7 @@ static int i915_load_modeset_init(struct drm_device *dev) if (i915_inject_load_failure()) return -ENODEV; - ret = intel_bios_init(dev_priv); - if (ret) - DRM_INFO("failed to find VBIOS tables\n"); + intel_bios_init(dev_priv); /* If we have > 1 VGA cards, then we need to arbitrate access * to the common VGA resources. @@ -605,12 +606,11 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_irq; - intel_huc_init(dev_priv); - intel_guc_init(dev_priv); + intel_uc_init_fw(dev_priv); ret = i915_gem_init(dev_priv); if (ret) - goto cleanup_irq; + goto cleanup_uc; intel_modeset_gem_init(dev); @@ -632,9 +632,9 @@ cleanup_gem: if (i915_gem_suspend(dev_priv)) DRM_ERROR("failed to idle hardware; continuing to unload!\n"); i915_gem_fini(dev_priv); +cleanup_uc: + intel_uc_fini_fw(dev_priv); cleanup_irq: - intel_guc_fini(dev_priv); - intel_huc_fini(dev_priv); drm_irq_uninstall(dev); intel_teardown_gmbus(dev_priv); cleanup_csr: @@ -754,6 +754,15 @@ out_err: return -ENOMEM; } +static void i915_engines_cleanup(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + kfree(engine); +} + static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) { destroy_workqueue(dev_priv->hotplug.dp_wq); @@ -767,10 +776,17 @@ static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) */ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) { - if (IS_HSW_EARLY_SDV(dev_priv) || - IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0)) + bool pre = false; + + pre |= IS_HSW_EARLY_SDV(dev_priv); + pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); + pre |= IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST); + + if (pre) { DRM_ERROR("This is a pre-production stepping. " "It may not be fully functional.\n"); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK); + } } /** @@ -806,9 +822,9 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, spin_lock_init(&dev_priv->gpu_error.lock); mutex_init(&dev_priv->backlight_lock); spin_lock_init(&dev_priv->uncore.lock); + spin_lock_init(&dev_priv->mm.object_stat_lock); spin_lock_init(&dev_priv->mmio_flip_lock); - spin_lock_init(&dev_priv->wm.dsparb_lock); mutex_init(&dev_priv->sb_lock); mutex_init(&dev_priv->modeset_restore_lock); mutex_init(&dev_priv->av_mutex); @@ -816,12 +832,15 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, mutex_init(&dev_priv->pps_mutex); intel_uc_init_early(dev_priv); - i915_memcpy_init_early(dev_priv); + ret = intel_engines_init_early(dev_priv); + if (ret) + return ret; + ret = i915_workqueues_init(dev_priv); if (ret < 0) - return ret; + goto err_engines; /* This must be called before any calls to HAS_PCH_* */ intel_detect_pch(dev_priv); @@ -850,6 +869,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, err_workqueues: i915_workqueues_cleanup(dev_priv); +err_engines: + i915_engines_cleanup(dev_priv); return ret; } @@ -862,6 +883,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) i915_perf_fini(dev_priv); i915_gem_load_cleanup(dev_priv); i915_workqueues_cleanup(dev_priv); + i915_engines_cleanup(dev_priv); } static int i915_mmio_setup(struct drm_i915_private *dev_priv) @@ -928,6 +950,7 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) goto put_bridge; intel_uncore_init(dev_priv); + i915_gem_init_mmio(dev_priv); return 0; @@ -965,7 +988,9 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores); - DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores)); + DRM_DEBUG_DRIVER("use GPU semaphores? %s\n", yesno(i915.semaphores)); + + intel_uc_sanitize_options(dev_priv); } /** @@ -1165,7 +1190,6 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_teardown_sysfs(dev_priv); i915_guc_log_unregister(dev_priv); - i915_debugfs_unregister(dev_priv); drm_dev_unregister(&dev_priv->drm); i915_gem_shrinker_cleanup(dev_priv); @@ -1184,11 +1208,15 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) */ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) { + const struct intel_device_info *match_info = + (struct intel_device_info *)ent->driver_data; struct drm_i915_private *dev_priv; int ret; - if (i915.nuclear_pageflip) - driver.driver_features |= DRIVER_ATOMIC; + /* Enable nuclear pageflip on ILK+, except vlv/chv */ + if (!i915.nuclear_pageflip && + (match_info->gen < 5 || match_info->has_gmch_display)) + driver.driver_features &= ~DRIVER_ATOMIC; ret = -ENOMEM; dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL); @@ -1196,8 +1224,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) ret = drm_dev_init(&dev_priv->drm, &driver, &pdev->dev); if (ret) { DRM_DEV_ERROR(&pdev->dev, "allocation failed\n"); - kfree(dev_priv); - return ret; + goto out_free; } dev_priv->drm.pdev = pdev; @@ -1205,7 +1232,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) ret = pci_enable_device(pdev); if (ret) - goto out_free_priv; + goto out_fini; pci_set_drvdata(pdev, &dev_priv->drm); @@ -1269,9 +1296,11 @@ out_runtime_pm_put: i915_driver_cleanup_early(dev_priv); out_pci_disable: pci_disable_device(pdev); -out_free_priv: +out_fini: i915_load_error(dev_priv, "Device initialization failed (%d)\n", ret); - drm_dev_unref(&dev_priv->drm); + drm_dev_fini(&dev_priv->drm); +out_free: + kfree(dev_priv); return ret; } @@ -1287,6 +1316,8 @@ void i915_driver_unload(struct drm_device *dev) intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + drm_atomic_helper_shutdown(dev); + intel_gvt_cleanup(dev_priv); i915_driver_unregister(dev_priv); @@ -1316,14 +1347,13 @@ void i915_driver_unload(struct drm_device *dev) /* Free error state after interrupts are fully disabled. */ cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); - i915_destroy_error_state(dev_priv); + i915_reset_error_state(dev_priv); /* Flush any outstanding unpin_work. */ drain_workqueue(dev_priv->wq); - intel_guc_fini(dev_priv); - intel_huc_fini(dev_priv); i915_gem_fini(dev_priv); + intel_uc_fini_fw(dev_priv); intel_fbc_cleanup_cfb(dev_priv); intel_power_domains_fini(dev_priv); @@ -1332,8 +1362,16 @@ void i915_driver_unload(struct drm_device *dev) i915_driver_cleanup_mmio(dev_priv); intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); +} + +static void i915_driver_release(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); i915_driver_cleanup_early(dev_priv); + drm_dev_fini(&dev_priv->drm); + + kfree(dev_priv); } static int i915_driver_open(struct drm_device *dev, struct drm_file *file) @@ -1365,17 +1403,14 @@ static void i915_driver_lastclose(struct drm_device *dev) vga_switcheroo_process_delayed_switch(); } -static void i915_driver_preclose(struct drm_device *dev, struct drm_file *file) +static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { + struct drm_i915_file_private *file_priv = file->driver_priv; + mutex_lock(&dev->struct_mutex); i915_gem_context_close(dev, file); i915_gem_release(dev, file); mutex_unlock(&dev->struct_mutex); -} - -static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; kfree(file_priv); } @@ -1452,7 +1487,7 @@ static int i915_drm_suspend(struct drm_device *dev) opregion_target_state = suspend_to_idle(dev_priv) ? PCI_D1 : PCI_D3cold; intel_opregion_notify_adapter(dev_priv, opregion_target_state); - intel_uncore_forcewake_reset(dev_priv, false); + intel_uncore_suspend(dev_priv); intel_opregion_unregister(dev_priv); intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED, true); @@ -1697,7 +1732,7 @@ static int i915_drm_resume_early(struct drm_device *dev) DRM_ERROR("Resume prepare failed: %d, continuing anyway\n", ret); - intel_uncore_early_sanitize(dev_priv, true); + intel_uncore_resume_early(dev_priv); if (IS_GEN9_LP(dev_priv)) { if (!dev_priv->suspended_to_idle) @@ -1713,6 +1748,8 @@ static int i915_drm_resume_early(struct drm_device *dev) !(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload)) intel_power_domains_init_hw(dev_priv, true); + i915_gem_sanitize(dev_priv); + enable_rpm_wakeref_asserts(dev_priv); out: @@ -1758,12 +1795,15 @@ void i915_reset(struct drm_i915_private *dev_priv) int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); + GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); - if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags)) + if (!test_bit(I915_RESET_HANDOFF, &error->flags)) return; /* Clear any previous failed attempts at recovery. Time to try again. */ - __clear_bit(I915_WEDGED, &error->flags); + if (!i915_gem_unset_wedged(dev_priv)) + goto wakeup; + error->reset_count++; pr_notice("drm/i915: Resetting chip after gpu hang\n"); @@ -1809,15 +1849,18 @@ void i915_reset(struct drm_i915_private *dev_priv) i915_queue_hangcheck(dev_priv); -wakeup: +finish: i915_gem_reset_finish(dev_priv); enable_irq(dev_priv->drm.irq); - wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS); + +wakeup: + clear_bit(I915_RESET_HANDOFF, &error->flags); + wake_up_bit(&error->flags, I915_RESET_HANDOFF); return; error: i915_gem_set_wedged(dev_priv); - goto wakeup; + goto finish; } static int i915_pm_suspend(struct device *kdev) @@ -2132,6 +2175,20 @@ static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv) I915_WRITE(VLV_GUNIT_CLOCK_GATE2, s->clock_gate_dis2); } +static int vlv_wait_for_pw_status(struct drm_i915_private *dev_priv, + u32 mask, u32 val) +{ + /* The HW does not like us polling for PW_STATUS frequently, so + * use the sleeping loop rather than risk the busy spin within + * intel_wait_for_register(). + * + * Transitioning between RC6 states should be at most 2ms (see + * valleyview_enable_rps) so use a 3ms timeout. + */ + return wait_for((I915_READ_NOTRACE(VLV_GTLC_PW_STATUS) & mask) == val, + 3); +} + int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on) { u32 val; @@ -2160,8 +2217,9 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on) static int vlv_allow_gt_wake(struct drm_i915_private *dev_priv, bool allow) { + u32 mask; u32 val; - int err = 0; + int err; val = I915_READ(VLV_GTLC_WAKE_CTRL); val &= ~VLV_GTLC_ALLOWWAKEREQ; @@ -2170,45 +2228,32 @@ static int vlv_allow_gt_wake(struct drm_i915_private *dev_priv, bool allow) I915_WRITE(VLV_GTLC_WAKE_CTRL, val); POSTING_READ(VLV_GTLC_WAKE_CTRL); - err = intel_wait_for_register(dev_priv, - VLV_GTLC_PW_STATUS, - VLV_GTLC_ALLOWWAKEACK, - allow, - 1); + mask = VLV_GTLC_ALLOWWAKEACK; + val = allow ? mask : 0; + + err = vlv_wait_for_pw_status(dev_priv, mask, val); if (err) DRM_ERROR("timeout disabling GT waking\n"); return err; } -static int vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv, - bool wait_for_on) +static void vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv, + bool wait_for_on) { u32 mask; u32 val; - int err; mask = VLV_GTLC_PW_MEDIA_STATUS_MASK | VLV_GTLC_PW_RENDER_STATUS_MASK; val = wait_for_on ? mask : 0; - if ((I915_READ(VLV_GTLC_PW_STATUS) & mask) == val) - return 0; - - DRM_DEBUG_KMS("waiting for GT wells to go %s (%08x)\n", - onoff(wait_for_on), - I915_READ(VLV_GTLC_PW_STATUS)); /* * RC6 transitioning can be delayed up to 2 msec (see * valleyview_enable_rps), use 3 msec for safety. */ - err = intel_wait_for_register(dev_priv, - VLV_GTLC_PW_STATUS, mask, val, - 3); - if (err) + if (vlv_wait_for_pw_status(dev_priv, mask, val)) DRM_ERROR("timeout waiting for GT wells to go %s\n", onoff(wait_for_on)); - - return err; } static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv) @@ -2229,7 +2274,7 @@ static int vlv_suspend_complete(struct drm_i915_private *dev_priv) * Bspec defines the following GT well on flags as debug only, so * don't treat them as hard failures. */ - (void)vlv_wait_for_gt_wells(dev_priv, false); + vlv_wait_for_gt_wells(dev_priv, false); mask = VLV_GTLC_RENDER_CTX_EXISTS | VLV_GTLC_MEDIA_CTX_EXISTS; WARN_ON((I915_READ(VLV_GTLC_WAKE_CTRL) & mask) != mask); @@ -2340,7 +2385,7 @@ static int intel_runtime_suspend(struct device *kdev) return ret; } - intel_uncore_forcewake_reset(dev_priv, false); + intel_uncore_suspend(dev_priv); enable_rpm_wakeref_asserts(dev_priv); WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count)); @@ -2530,7 +2575,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), @@ -2572,10 +2617,10 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME | - DRIVER_RENDER | DRIVER_MODESET, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, + .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, - .preclose = i915_driver_preclose, .postclose = i915_driver_postclose, .set_busid = drm_pci_set_busid, @@ -2601,3 +2646,7 @@ static struct drm_driver driver = { .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, }; + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_drm.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 46fcd8b7080a..c9b0949f6c1a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,26 +79,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170123" -#define DRIVER_TIMESTAMP 1485156432 - -#undef WARN_ON -/* Many gcc seem to no see through this and fall over :( */ -#if 0 -#define WARN_ON(x) ({ \ - bool __i915_warn_cond = (x); \ - if (__builtin_constant_p(__i915_warn_cond)) \ - BUILD_BUG_ON(__i915_warn_cond); \ - WARN(__i915_warn_cond, "WARN_ON(" #x ")"); }) -#else -#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")") -#endif - -#undef WARN_ON_ONCE -#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")") - -#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \ - (long) (x), __func__); +#define DRIVER_DATE "20170403" +#define DRIVER_TIMESTAMP 1491198738 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -344,6 +326,11 @@ enum intel_display_power_domain { POWER_DOMAIN_PORT_DDI_C_LANES, POWER_DOMAIN_PORT_DDI_D_LANES, POWER_DOMAIN_PORT_DDI_E_LANES, + POWER_DOMAIN_PORT_DDI_A_IO, + POWER_DOMAIN_PORT_DDI_B_IO, + POWER_DOMAIN_PORT_DDI_C_IO, + POWER_DOMAIN_PORT_DDI_D_IO, + POWER_DOMAIN_PORT_DDI_E_IO, POWER_DOMAIN_PORT_DSI, POWER_DOMAIN_PORT_CRT, POWER_DOMAIN_PORT_OTHER, @@ -385,6 +372,8 @@ enum hpd_pin { #define for_each_hpd_pin(__pin) \ for ((__pin) = (HPD_NONE + 1); (__pin) < HPD_NUM_PINS; (__pin)++) +#define HPD_STORM_DEFAULT_THRESHOLD 5 + struct i915_hotplug { struct work_struct hotplug_work; @@ -408,6 +397,8 @@ struct i915_hotplug { struct work_struct poll_init_work; bool poll_enabled; + unsigned int hpd_storm_threshold; + /* * if we get a HPD irq from DP and a HPD irq from non-DP * the non-DP HPD could block the workqueue on a mode config @@ -480,10 +471,8 @@ struct i915_hotplug { &(dev)->mode_config.encoder_list, \ base.head) -#define for_each_intel_connector(dev, intel_connector) \ - list_for_each_entry(intel_connector, \ - &(dev)->mode_config.connector_list, \ - base.head) +#define for_each_intel_connector_iter(intel_connector, iter) \ + while ((intel_connector = to_intel_connector(drm_connector_list_iter_next(iter)))) #define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \ list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \ @@ -495,7 +484,35 @@ struct i915_hotplug { #define for_each_power_domain(domain, mask) \ for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ - for_each_if ((1 << (domain)) & (mask)) + for_each_if (BIT_ULL(domain) & (mask)) + +#define for_each_power_well(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells; \ + (__power_well) - (__dev_priv)->power_domains.power_wells < \ + (__dev_priv)->power_domains.power_well_count; \ + (__power_well)++) + +#define for_each_power_well_rev(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells + \ + (__dev_priv)->power_domains.power_well_count - 1; \ + (__power_well) - (__dev_priv)->power_domains.power_wells >= 0; \ + (__power_well)--) + +#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well(__dev_priv, __power_well) \ + for_each_if ((__power_well)->domains & (__domain_mask)) + +#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well_rev(__dev_priv, __power_well) \ + for_each_if ((__power_well)->domains & (__domain_mask)) + +#define for_each_intel_plane_in_state(__state, plane, plane_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->base.dev->mode_config.num_total_plane && \ + ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \ + (plane_state) = to_intel_plane_state((__state)->base.planes[__i].state), 1); \ + (__i)++) \ + for_each_if (plane_state) struct drm_i915_private; struct i915_mm_struct; @@ -601,9 +618,13 @@ struct intel_initial_plane_config; struct intel_crtc; struct intel_limit; struct dpll; +struct intel_cdclk_state; struct drm_i915_display_funcs { - int (*get_display_clock_speed)(struct drm_i915_private *dev_priv); + void (*get_cdclk)(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state); + void (*set_cdclk)(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state); int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, @@ -618,7 +639,6 @@ struct drm_i915_display_funcs { int (*compute_global_watermarks)(struct drm_atomic_state *state); void (*update_wm)(struct intel_crtc *crtc); int (*modeset_calc_cdclk)(struct drm_atomic_state *state); - void (*modeset_commit_cdclk)(struct drm_atomic_state *state); /* Returns the active state of the crtc, and if the crtc is active, * fills out the pipe-config with the hw state. */ bool (*get_pipe_config)(struct intel_crtc *, @@ -637,7 +657,8 @@ struct drm_i915_display_funcs { struct intel_encoder *encoder, const struct drm_display_mode *adjusted_mode); void (*audio_codec_disable)(struct intel_encoder *encoder); - void (*fdi_link_train)(struct drm_crtc *crtc); + void (*fdi_link_train)(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state); void (*init_clock_gating)(struct drm_i915_private *dev_priv); int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, @@ -664,9 +685,9 @@ enum forcewake_domain_id { }; enum forcewake_domains { - FORCEWAKE_RENDER = (1 << FW_DOMAIN_ID_RENDER), - FORCEWAKE_BLITTER = (1 << FW_DOMAIN_ID_BLITTER), - FORCEWAKE_MEDIA = (1 << FW_DOMAIN_ID_MEDIA), + FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER), + FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER), + FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA), FORCEWAKE_ALL = (FORCEWAKE_RENDER | FORCEWAKE_BLITTER | FORCEWAKE_MEDIA) @@ -693,21 +714,25 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, struct intel_uncore_funcs { void (*force_wake_get)(struct drm_i915_private *dev_priv, - enum forcewake_domains domains); + enum forcewake_domains domains); void (*force_wake_put)(struct drm_i915_private *dev_priv, - enum forcewake_domains domains); - - uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace); - uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace); - uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace); - uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace); - - void (*mmio_writeb)(struct drm_i915_private *dev_priv, i915_reg_t r, - uint8_t val, bool trace); - void (*mmio_writew)(struct drm_i915_private *dev_priv, i915_reg_t r, - uint16_t val, bool trace); - void (*mmio_writel)(struct drm_i915_private *dev_priv, i915_reg_t r, - uint32_t val, bool trace); + enum forcewake_domains domains); + + uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv, + i915_reg_t r, bool trace); + uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, + i915_reg_t r, bool trace); + uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, + i915_reg_t r, bool trace); + uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, + i915_reg_t r, bool trace); + + void (*mmio_writeb)(struct drm_i915_private *dev_priv, + i915_reg_t r, uint8_t val, bool trace); + void (*mmio_writew)(struct drm_i915_private *dev_priv, + i915_reg_t r, uint16_t val, bool trace); + void (*mmio_writel)(struct drm_i915_private *dev_priv, + i915_reg_t r, uint32_t val, bool trace); }; struct intel_forcewake_range { @@ -723,6 +748,7 @@ struct intel_uncore { const struct intel_forcewake_range *fw_domains_table; unsigned int fw_domains_table_entries; + struct notifier_block pmic_bus_access_nb; struct intel_uncore_funcs funcs; unsigned fifo_count; @@ -730,32 +756,35 @@ struct intel_uncore { enum forcewake_domains fw_domains; enum forcewake_domains fw_domains_active; + u32 fw_set; + u32 fw_clear; + u32 fw_reset; + struct intel_uncore_forcewake_domain { - struct drm_i915_private *i915; enum forcewake_domain_id id; enum forcewake_domains mask; unsigned wake_count; struct hrtimer timer; i915_reg_t reg_set; - u32 val_set; - u32 val_clear; i915_reg_t reg_ack; - i915_reg_t reg_post; - u32 val_reset; } fw_domain[FW_DOMAIN_ID_COUNT]; int unclaimed_mmio_check; }; +#define __mask_next_bit(mask) ({ \ + int __idx = ffs(mask) - 1; \ + mask &= ~BIT(__idx); \ + __idx; \ +}) + /* Iterate over initialised fw domains */ -#define for_each_fw_domain_masked(domain__, mask__, dev_priv__) \ - for ((domain__) = &(dev_priv__)->uncore.fw_domain[0]; \ - (domain__) < &(dev_priv__)->uncore.fw_domain[FW_DOMAIN_ID_COUNT]; \ - (domain__)++) \ - for_each_if ((mask__) & (domain__)->mask) +#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \ + for (tmp__ = (mask__); \ + tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;) -#define for_each_fw_domain(domain__, dev_priv__) \ - for_each_fw_domain_masked(domain__, FORCEWAKE_ALL, dev_priv__) +#define for_each_fw_domain(domain__, dev_priv__, tmp__) \ + for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__) #define CSR_VERSION(major, minor) ((major) << 16 | (minor)) #define CSR_VERSION_MAJOR(version) ((version) >> 16) @@ -858,6 +887,7 @@ enum intel_platform { INTEL_BROXTON, INTEL_KABYLAKE, INTEL_GEMINILAKE, + INTEL_MAX_PLATFORMS }; struct intel_device_info { @@ -892,7 +922,7 @@ struct intel_device_info { struct intel_display_error_state; -struct drm_i915_error_state { +struct i915_gpu_state { struct kref ref; struct timeval time; struct timeval boottime; @@ -902,16 +932,20 @@ struct drm_i915_error_state { char error_msg[128]; bool simulated; + bool awake; + bool wakelock; + bool suspended; int iommu; u32 reset_count; u32 suspend_count; struct intel_device_info device_info; + struct i915_params params; /* Generic register state */ u32 eir; u32 pgtbl_er; u32 ier; - u32 gtier[4]; + u32 gtier[4], ngtier; u32 ccid; u32 derrmr; u32 forcewake; @@ -925,6 +959,7 @@ struct drm_i915_error_state { u32 gab_ctl; u32 gfx_mode; + u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; @@ -972,6 +1007,16 @@ struct drm_i915_error_state { u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; struct intel_instdone instdone; + struct drm_i915_error_context { + char comm[TASK_COMM_LEN]; + pid_t pid; + u32 handle; + u32 hw_id; + int ban_score; + int active; + int guilty; + } context; + struct drm_i915_error_object { u64 gtt_offset; u64 gtt_size; @@ -1005,10 +1050,6 @@ struct drm_i915_error_state { u32 pp_dir_base; }; } vm_info; - - pid_t pid; - char comm[TASK_COMM_LEN]; - int context_bans; } engine[I915_NUM_ENGINES]; struct drm_i915_error_buffer { @@ -1272,7 +1313,7 @@ struct vlv_s0ix_state { }; struct intel_rps_ei { - u32 cz_clock; + ktime_t ktime; u32 render_c0; u32 media_c0; }; @@ -1287,7 +1328,7 @@ struct intel_gen6_power_mgmt { u32 pm_iir; /* PM interrupt bits that should never be masked */ - u32 pm_intr_keep; + u32 pm_intrmsk_mbz; /* Frequencies are stored in potentially platform dependent multiples. * In other words, *_freq needs to be multiplied by X to be interesting. @@ -1397,7 +1438,7 @@ struct i915_power_well { int count; /* cached hw enabled state */ bool hw_enabled; - unsigned long domains; + u64 domains; /* unique identifier for this power well */ unsigned long id; /* @@ -1458,7 +1499,7 @@ struct i915_gem_mm { struct work_struct free_work; /** Usable portion of the GTT for GEM */ - phys_addr_t stolen_base; /* limited to low memory (32-bit) */ + dma_addr_t stolen_base; /* limited to low memory (32-bit) */ /** PPGTT used for aliasing the PPGTT with the GTT */ struct i915_hw_ppgtt *aliasing_ppgtt; @@ -1500,11 +1541,6 @@ struct drm_i915_error_state_buf { loff_t pos; }; -struct i915_error_state_file_priv { - struct drm_i915_private *i915; - struct drm_i915_error_state *error; -}; - #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ #define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */ @@ -1521,7 +1557,7 @@ struct i915_gpu_error { /* For reset and error_state handling. */ spinlock_t lock; /* Protected by the above dev->gpu_error.lock. */ - struct drm_i915_error_state *first_error; + struct i915_gpu_state *first_error; unsigned long missed_irq_rings; @@ -1549,8 +1585,33 @@ struct i915_gpu_error { */ unsigned long reset_count; + /** + * flags: Control various stages of the GPU reset + * + * #I915_RESET_BACKOFF - When we start a reset, we want to stop any + * other users acquiring the struct_mutex. To do this we set the + * #I915_RESET_BACKOFF bit in the error flags when we detect a reset + * and then check for that bit before acquiring the struct_mutex (in + * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a + * secondary role in preventing two concurrent global reset attempts. + * + * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the + * struct_mutex. We try to acquire the struct_mutex in the reset worker, + * but it may be held by some long running waiter (that we cannot + * interrupt without causing trouble). Once we are ready to do the GPU + * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If + * they already hold the struct_mutex and want to participate they can + * inspect the bit and do the reset directly, otherwise the worker + * waits for the struct_mutex. + * + * #I915_WEDGED - If reset fails and we can no longer use the GPU, + * we set the #I915_WEDGED bit. Prior to command submission, e.g. + * i915_gem_request_alloc(), this bit is checked and the sequence + * aborted (with -EIO reported to userspace) if set. + */ unsigned long flags; -#define I915_RESET_IN_PROGRESS 0 +#define I915_RESET_BACKOFF 0 +#define I915_RESET_HANDOFF 1 #define I915_WEDGED (BITS_PER_LONG - 1) /** @@ -2055,6 +2116,10 @@ struct i915_oa_ops { bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv); }; +struct intel_cdclk_state { + unsigned int cdclk, vco, ref; +}; + struct drm_i915_private { struct drm_device drm; @@ -2157,13 +2222,7 @@ struct drm_i915_private { unsigned int fsb_freq, mem_freq, is_ddr3; unsigned int skl_preferred_vco_freq; - unsigned int cdclk_freq, max_cdclk_freq; - - /* - * For reading holding any crtc lock is sufficient, - * for writing must hold all of them. - */ - unsigned int atomic_cdclk_freq; + unsigned int max_cdclk_freq; unsigned int max_dotclk_freq; unsigned int rawclk_freq; @@ -2171,8 +2230,22 @@ struct drm_i915_private { unsigned int czclk_freq; struct { - unsigned int vco, ref; - } cdclk_pll; + /* + * The current logical cdclk state. + * See intel_atomic_state.cdclk.logical + * + * For reading holding any crtc lock is sufficient, + * for writing must hold all of them. + */ + struct intel_cdclk_state logical; + /* + * The current actual cdclk state. + * See intel_atomic_state.cdclk.actual + */ + struct intel_cdclk_state actual; + /* The current hardware cdclk state */ + struct intel_cdclk_state hw; + } cdclk; /** * wq - Driver workqueue for GEM. @@ -2317,9 +2390,6 @@ struct drm_i915_private { } sagv_status; struct { - /* protects DSPARB registers on pre-g4x/vlv/chv */ - spinlock_t dsparb_lock; - /* * Raw watermark latency values: * in 0.1us units for WM0, @@ -2486,6 +2556,11 @@ static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) return container_of(guc, struct drm_i915_private, guc); } +static inline struct drm_i915_private *huc_to_i915(struct intel_huc *huc) +{ + return container_of(huc, struct drm_i915_private, huc); +} + /* Simple iterator over all initialised engines */ #define for_each_engine(engine__, dev_priv__, id__) \ for ((id__) = 0; \ @@ -2493,12 +2568,6 @@ static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) (id__)++) \ for_each_if ((engine__) = (dev_priv__)->engine[(id__)]) -#define __mask_next_bit(mask) ({ \ - int __idx = ffs(mask) - 1; \ - mask &= ~BIT(__idx); \ - __idx; \ -}) - /* Iterator over subset of engines selected by mask */ #define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \ for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask; \ @@ -2752,6 +2821,12 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_KBL_REVID(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_REVID(dev_priv, since, until)) +#define GLK_REVID_A0 0x0 +#define GLK_REVID_A1 0x1 + +#define IS_GLK_REVID(dev_priv, since, until) \ + (IS_GEMINILAKE(dev_priv) && IS_REVID(dev_priv, since, until)) + /* * The genX designation typically refers to the render engine, so render * capability related checks should use IS_GEN, while display and other checks @@ -2767,8 +2842,9 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_GEN8(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(7))) #define IS_GEN9(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(8))) -#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && INTEL_INFO(dev_priv)->is_lp) #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) +#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv)) +#define IS_GEN9_BC(dev_priv) (IS_GEN9(dev_priv) && !IS_LP(dev_priv)) #define ENGINE_MASK(id) BIT(id) #define RENDER_RING ENGINE_MASK(RCS) @@ -2810,9 +2886,7 @@ intel_info(const struct drm_i915_private *dev_priv) /* WaRsDisableCoarsePowerGating:skl,bxt */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) || \ - IS_SKL_GT3(dev_priv) || \ - IS_SKL_GT4(dev_priv)) + (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) /* * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts @@ -2952,6 +3026,9 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); +int intel_engines_init_early(struct drm_i915_private *dev_priv); +int intel_engines_init(struct drm_i915_private *dev_priv); + /* intel_hotplug.c */ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 pin_mask, u32 long_mask); @@ -2990,14 +3067,12 @@ int intel_irq_install(struct drm_i915_private *dev_priv); void intel_irq_uninstall(struct drm_i915_private *dev_priv); extern void intel_uncore_sanitize(struct drm_i915_private *dev_priv); -extern void intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, - bool restore_forcewake); extern void intel_uncore_init(struct drm_i915_private *dev_priv); extern bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv); extern bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv); extern void intel_uncore_fini(struct drm_i915_private *dev_priv); -extern void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, - bool restore); +extern void intel_uncore_suspend(struct drm_i915_private *dev_priv); +extern void intel_uncore_resume_early(struct drm_i915_private *dev_priv); const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id); void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, enum forcewake_domains domains); @@ -3129,6 +3204,7 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +void i915_gem_sanitize(struct drm_i915_private *i915); int i915_gem_load_init(struct drm_i915_private *dev_priv); void i915_gem_load_cleanup(struct drm_i915_private *dev_priv); void i915_gem_load_init_fences(struct drm_i915_private *dev_priv); @@ -3288,9 +3364,9 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, unsigned int *needs_clflush); int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, unsigned int *needs_clflush); -#define CLFLUSH_BEFORE 0x1 -#define CLFLUSH_AFTER 0x2 -#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER) +#define CLFLUSH_BEFORE BIT(0) +#define CLFLUSH_AFTER BIT(1) +#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER) static inline void i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj) @@ -3320,9 +3396,14 @@ i915_gem_find_active_request(struct intel_engine_cs *engine); void i915_gem_retire_requests(struct drm_i915_private *dev_priv); -static inline bool i915_reset_in_progress(struct i915_gpu_error *error) +static inline bool i915_reset_backoff(struct i915_gpu_error *error) { - return unlikely(test_bit(I915_RESET_IN_PROGRESS, &error->flags)); + return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags)); +} + +static inline bool i915_reset_handoff(struct i915_gpu_error *error) +{ + return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags)); } static inline bool i915_terminally_wedged(struct i915_gpu_error *error) @@ -3330,9 +3411,9 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error) return unlikely(test_bit(I915_WEDGED, &error->flags)); } -static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error) +static inline bool i915_reset_backoff_or_wedged(struct i915_gpu_error *error) { - return i915_reset_in_progress(error) | i915_terminally_wedged(error); + return i915_reset_backoff(error) | i915_terminally_wedged(error); } static inline u32 i915_reset_count(struct i915_gpu_error *error) @@ -3344,13 +3425,15 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); +bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv); + +void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); void i915_gem_init_swizzling(struct drm_i915_private *dev_priv); void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv); -int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, - unsigned int flags); +int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, + unsigned int flags); int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); int i915_gem_fault(struct vm_fault *vmf); @@ -3529,12 +3612,10 @@ u32 i915_gem_fence_alignment(struct drm_i915_private *dev_priv, u32 size, /* i915_debugfs.c */ #ifdef CONFIG_DEBUG_FS int i915_debugfs_register(struct drm_i915_private *dev_priv); -void i915_debugfs_unregister(struct drm_i915_private *dev_priv); int i915_debugfs_connector_add(struct drm_connector *connector); void intel_display_crc_init(struct drm_i915_private *dev_priv); #else static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) {return 0;} -static inline void i915_debugfs_unregister(struct drm_i915_private *dev_priv) {} static inline int i915_debugfs_connector_add(struct drm_connector *connector) { return 0; } static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {} @@ -3546,7 +3627,7 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {} __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); int i915_error_state_to_str(struct drm_i915_error_state_buf *estr, - const struct i915_error_state_file_priv *error); + const struct i915_gpu_state *gpu); int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb, struct drm_i915_private *i915, size_t count, loff_t pos); @@ -3555,13 +3636,28 @@ static inline void i915_error_state_buf_release( { kfree(eb->buf); } + +struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); void i915_capture_error_state(struct drm_i915_private *dev_priv, u32 engine_mask, const char *error_msg); -void i915_error_state_get(struct drm_device *dev, - struct i915_error_state_file_priv *error_priv); -void i915_error_state_put(struct i915_error_state_file_priv *error_priv); -void i915_destroy_error_state(struct drm_i915_private *dev_priv); + +static inline struct i915_gpu_state * +i915_gpu_state_get(struct i915_gpu_state *gpu) +{ + kref_get(&gpu->ref); + return gpu; +} + +void __i915_gpu_state_free(struct kref *kref); +static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) +{ + if (gpu) + kref_put(&gpu->ref, __i915_gpu_state_free); +} + +struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); +void i915_reset_error_state(struct drm_i915_private *i915); #else @@ -3571,7 +3667,13 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, { } -static inline void i915_destroy_error_state(struct drm_i915_private *dev_priv) +static inline struct i915_gpu_state * +i915_first_error_state(struct drm_i915_private *i915) +{ + return NULL; +} + +static inline void i915_reset_error_state(struct drm_i915_private *i915) { } @@ -3629,7 +3731,7 @@ static inline bool intel_gmbus_is_forced_bit(struct i2c_adapter *adapter) extern void intel_i2c_reset(struct drm_i915_private *dev_priv); /* intel_bios.c */ -int intel_bios_init(struct drm_i915_private *dev_priv); +void intel_bios_init(struct drm_i915_private *dev_priv); bool intel_bios_is_valid_vbt(const void *buf, size_t size); bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv); bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin); @@ -3711,7 +3813,7 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv); extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv); extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val); extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv); -extern void intel_set_rps(struct drm_i915_private *dev_priv, u8 val); +extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val); extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); @@ -3727,7 +3829,6 @@ extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, extern struct intel_display_error_state * intel_display_capture_error_state(struct drm_i915_private *dev_priv); extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, - struct drm_i915_private *dev_priv, struct intel_display_error_state *error); int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val); @@ -3737,7 +3838,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, /* intel_sideband.c */ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr); -void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); +int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr); u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg); void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val); @@ -3793,6 +3894,8 @@ void vlv_phy_reset_lanes(struct intel_encoder *encoder); int intel_gpu_freq(struct drm_i915_private *dev_priv, int val); int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); +u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, + const i915_reg_t reg); #define I915_READ8(reg) dev_priv->uncore.funcs.mmio_readb(dev_priv, (reg), true) #define I915_WRITE8(reg, val) dev_priv->uncore.funcs.mmio_writeb(dev_priv, (reg), (val), true) @@ -3837,14 +3940,14 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); #define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg) #define __raw_read(x, s) \ -static inline uint##x##_t __raw_i915_read##x(struct drm_i915_private *dev_priv, \ +static inline uint##x##_t __raw_i915_read##x(const struct drm_i915_private *dev_priv, \ i915_reg_t reg) \ { \ return read##s(dev_priv->regs + i915_mmio_reg_offset(reg)); \ } #define __raw_write(x, s) \ -static inline void __raw_i915_write##x(struct drm_i915_private *dev_priv, \ +static inline void __raw_i915_write##x(const struct drm_i915_private *dev_priv, \ i915_reg_t reg, uint##x##_t val) \ { \ write##s(val, dev_priv->regs + i915_mmio_reg_offset(reg)); \ @@ -3956,14 +4059,34 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } static inline bool -__i915_request_irq_complete(struct drm_i915_gem_request *req) +__i915_request_irq_complete(const struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; + u32 seqno; + + /* Note that the engine may have wrapped around the seqno, and + * so our request->global_seqno will be ahead of the hardware, + * even though it completed the request before wrapping. We catch + * this by kicking all the waiters before resetting the seqno + * in hardware, and also signal the fence. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags)) + return true; + + /* The request was dequeued before we were awoken. We check after + * inspecting the hw to confirm that this was the same request + * that generated the HWS update. The memory barriers within + * the request execution are sufficient to ensure that a check + * after reading the value from hw matches this request. + */ + seqno = i915_gem_request_global_seqno(req); + if (!seqno) + return false; /* Before we do the heavier coherent read of the seqno, * check the value (hopefully) in the CPU cacheline. */ - if (__i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req, seqno)) return true; /* Ensure our read of the seqno is coherent so that we @@ -3978,9 +4101,8 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) * is woken. */ if (engine->irq_seqno_barrier && - rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current && - cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) { - struct task_struct *tsk; + test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) { + struct intel_breadcrumbs *b = &engine->breadcrumbs; /* The ordering of irq_posted versus applying the barrier * is crucial. The clearing of the current irq_posted must @@ -4002,19 +4124,18 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) * the seqno before we believe it coherent since they see * irq_posted == false but we are still running). */ - rcu_read_lock(); - tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); - if (tsk && tsk != current) + spin_lock_irq(&b->irq_lock); + if (b->irq_wait && b->irq_wait->tsk != current) /* Note that if the bottom-half is changed as we * are sending the wake-up, the new bottom-half will * be woken by whomever made the change. We only have * to worry about when we steal the irq-posted for * ourself. */ - wake_up_process(tsk); - rcu_read_unlock(); + wake_up_process(b->irq_wait->tsk); + spin_unlock_irq(&b->irq_lock); - if (__i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req, seqno)) return true; } @@ -4045,4 +4166,10 @@ int remap_io_mapping(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, struct io_mapping *iomap); +static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj) +{ + return (obj->cache_level != I915_CACHE_NONE || + HAS_LLC(to_i915(obj->base.dev))); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fe531f904062..532a577ff7a1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,12 +29,14 @@ #include <drm/drm_vma_manager.h> #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" #include "intel_mocs.h" #include <linux/dma-fence-array.h> +#include <linux/kthread.h> #include <linux/reservation.h> #include <linux/shmem_fs.h> #include <linux/slab.h> @@ -47,18 +49,12 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915); static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static bool cpu_cache_is_coherent(struct drm_device *dev, - enum i915_cache_level level) -{ - return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE; -} - static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) return false; - if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + if (!i915_gem_object_is_coherent(obj)) return true; return obj->pin_display; @@ -107,16 +103,13 @@ i915_gem_wait_for_error(struct i915_gpu_error *error) might_sleep(); - if (!i915_reset_in_progress(error)) - return 0; - /* * Only wait 10 seconds for the gpu reset to complete to avoid hanging * userspace. If it takes that long something really bad is going on and * we should simply try to bail out and fail as gracefully as possible. */ ret = wait_event_interruptible_timeout(error->reset_queue, - !i915_reset_in_progress(error), + !i915_reset_backoff(error), I915_RESET_TIMEOUT); if (ret == 0) { DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); @@ -254,7 +247,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, if (needs_clflush && (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && - !cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + !i915_gem_object_is_coherent(obj)) drm_clflush_sg(pages); obj->base.read_domains = I915_GEM_DOMAIN_CPU; @@ -312,6 +305,8 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; +static const struct drm_i915_gem_object_ops i915_gem_object_ops; + int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -399,7 +394,7 @@ out: if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) i915_gem_request_retire_upto(rq); - if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) { + if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the * meantime, assume that this client is the only one @@ -424,7 +419,9 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, long timeout, struct intel_rps_client *rps) { + unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; + bool prune_fences = false; if (flags & I915_WAIT_ALL) { struct dma_fence **shared; @@ -449,15 +446,31 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, for (; i < count; i++) dma_fence_put(shared[i]); kfree(shared); + + prune_fences = count && timeout >= 0; } else { excl = reservation_object_get_excl_rcu(resv); } - if (excl && timeout >= 0) + if (excl && timeout >= 0) { timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + prune_fences = timeout >= 0; + } dma_fence_put(excl); + /* Oportunistically prune the fences iff we know they have *all* been + * signaled and that the reservation object has not been changed (i.e. + * no new fences have been added). + */ + if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { + if (reservation_object_trylock(resv)) { + if (!__read_seqcount_retry(&resv->seq, seq)) + reservation_object_add_excl_fence(resv, NULL); + reservation_object_unlock(resv); + } + } + return timeout; } @@ -585,9 +598,18 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, if (obj->mm.pages) return -EBUSY; + GEM_BUG_ON(obj->ops != &i915_gem_object_ops); obj->ops = &i915_gem_phys_ops; - return i915_gem_object_pin_pages(obj); + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err_xfer; + + return 0; + +err_xfer: + obj->ops = &i915_gem_object_ops; + return ret; } static int @@ -608,7 +630,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, drm_clflush_virt_range(vaddr, args->size); i915_gem_chipset_flush(to_i915(obj->base.dev)); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); return 0; } @@ -763,6 +785,15 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, if (ret) return ret; + if (i915_gem_object_is_coherent(obj) || + !static_cpu_has(X86_FEATURE_CLFLUSH)) { + ret = i915_gem_object_set_to_cpu_domain(obj, false); + if (ret) + goto err_unpin; + else + goto out; + } + i915_gem_object_flush_gtt_write_domain(obj); /* If we're not in the cpu read domain, set ourself into the gtt @@ -771,17 +802,9 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, * anyway again before the next pread happens. */ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, - obj->cache_level); - - if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { - ret = i915_gem_object_set_to_cpu_domain(obj, false); - if (ret) - goto err_unpin; - - *needs_clflush = 0; - } + *needs_clflush = CLFLUSH_BEFORE; +out: /* return with the pages pinned */ return 0; @@ -814,6 +837,15 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, if (ret) return ret; + if (i915_gem_object_is_coherent(obj) || + !static_cpu_has(X86_FEATURE_CLFLUSH)) { + ret = i915_gem_object_set_to_cpu_domain(obj, true); + if (ret) + goto err_unpin; + else + goto out; + } + i915_gem_object_flush_gtt_write_domain(obj); /* If we're not in the cpu write domain, set ourself into the @@ -822,26 +854,15 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, * right away and we therefore have to clflush anyway. */ if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) - *needs_clflush |= cpu_write_needs_clflush(obj) << 1; + *needs_clflush |= CLFLUSH_AFTER; /* Same trick applies to invalidate partially written cachelines read * before writing. */ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev, - obj->cache_level); - - if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - goto err_unpin; - - *needs_clflush = 0; - } - - if ((*needs_clflush & CLFLUSH_AFTER) == 0) - obj->cache_dirty = true; + *needs_clflush |= CLFLUSH_BEFORE; +out: intel_fb_obj_invalidate(obj, ORIGIN_CPU); obj->mm.dirty = true; /* return with the pages pinned */ @@ -1257,7 +1278,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, user_data += page_length; offset += page_length; } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); mutex_lock(&i915->drm.struct_mutex); out_unpin: @@ -1393,7 +1414,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, offset = 0; } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); i915_gem_obj_finish_shmem_access(obj); return ret; } @@ -1602,23 +1623,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_sw_finish *args = data; struct drm_i915_gem_object *obj; - int err = 0; obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; /* Pinned buffers may be scanout, so flush the cache */ - if (READ_ONCE(obj->pin_display)) { - err = i915_mutex_lock_interruptible(dev); - if (!err) { - i915_gem_object_flush_cpu_write_domain(obj); - mutex_unlock(&dev->struct_mutex); - } - } - + i915_gem_object_flush_if_display(obj); i915_gem_object_put(obj); - return err; + + return 0; } /** @@ -2232,17 +2246,17 @@ unlock: mutex_unlock(&obj->mm.lock); } -static void i915_sg_trim(struct sg_table *orig_st) +static bool i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; struct scatterlist *sg, *new_sg; unsigned int i; if (orig_st->nents == orig_st->orig_nents) - return; + return false; if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) - return; + return false; new_sg = new_st.sgl; for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { @@ -2255,6 +2269,7 @@ static void i915_sg_trim(struct sg_table *orig_st) sg_free_table(orig_st); *orig_st = new_st; + return true; } static struct sg_table * @@ -2306,7 +2321,7 @@ rebuild_st: st->nents = 0; for (i = 0; i < page_count; i++) { page = shmem_read_mapping_page_gfp(mapping, i, gfp); - if (IS_ERR(page)) { + if (unlikely(IS_ERR(page))) { i915_gem_shrink(dev_priv, page_count, I915_SHRINK_BOUND | @@ -2314,12 +2329,21 @@ rebuild_st: I915_SHRINK_PURGEABLE); page = shmem_read_mapping_page_gfp(mapping, i, gfp); } - if (IS_ERR(page)) { + if (unlikely(IS_ERR(page))) { + gfp_t reclaim; + /* We've tried hard to allocate the memory by reaping * our own buffer, now let the real VM do its job and * go down in flames if truly OOM. + * + * However, since graphics tend to be disposable, + * defer the oom here by reporting the ENOMEM back + * to userspace. */ - page = shmem_read_mapping_page(mapping, i); + reclaim = mapping_gfp_mask(mapping); + reclaim |= __GFP_NORETRY; /* reclaim, but no oom */ + + page = shmem_read_mapping_page_gfp(mapping, i, reclaim); if (IS_ERR(page)) { ret = PTR_ERR(page); goto err_sg; @@ -2674,7 +2698,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request; + struct drm_i915_gem_request *request, *active = NULL; + unsigned long flags; /* We are called by the error capture and reset at a random * point in time. In particular, note that neither is crucially @@ -2684,15 +2709,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ + spin_lock_irqsave(&engine->timeline->lock, flags); list_for_each_entry(request, &engine->timeline->requests, link) { - if (__i915_gem_request_completed(request)) + if (__i915_gem_request_completed(request, + request->global_seqno)) continue; GEM_BUG_ON(request->engine != engine); - return request; + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &request->fence.flags)); + + active = request; + break; } + spin_unlock_irqrestore(&engine->timeline->lock, flags); - return NULL; + return active; } static bool engine_stalled(struct intel_engine_cs *engine) @@ -2719,6 +2751,17 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *request; + /* Prevent the signaler thread from updating the request + * state (by calling dma_fence_signal) as we are processing + * the reset. The write from the GPU of the seqno is + * asynchronous and the signaler thread may see a different + * value to us and declare the request complete, even though + * the reset routine have picked that request as the active + * (incomplete) request. This conflict is not handled + * gracefully! + */ + kthread_park(engine->breadcrumbs.signaler); + /* Prevent request submission to the hardware until we have * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request @@ -2730,6 +2773,9 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) tasklet_kill(&engine->irq_tasklet); tasklet_disable(&engine->irq_tasklet); + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + if (engine_stalled(engine)) { request = i915_gem_find_active_request(engine); if (request && request->fence.error == -EIO) @@ -2826,9 +2872,6 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - request = i915_gem_find_active_request(engine); if (request && i915_gem_reset_request(request)) { DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", @@ -2852,8 +2895,14 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) i915_gem_retire_requests(dev_priv); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + struct i915_gem_context *ctx; + i915_gem_reset_engine(engine); + ctx = fetch_and_zero(&engine->last_retired_context); + if (ctx) + engine->context_unpin(engine, ctx); + } i915_gem_restore_fences(dev_priv); @@ -2872,8 +2921,10 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { tasklet_enable(&engine->irq_tasklet); + kthread_unpark(engine->breadcrumbs.signaler); + } } static void nop_submit_request(struct drm_i915_gem_request *request) @@ -2947,14 +2998,78 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); set_bit(I915_WEDGED, &dev_priv->gpu_error.flags); + /* Retire completed requests first so the list of inflight/incomplete + * requests is accurate and we don't try and mark successful requests + * as in error during __i915_gem_set_wedged_BKL(). + */ + i915_gem_retire_requests(dev_priv); + stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); i915_gem_context_lost(dev_priv); - i915_gem_retire_requests(dev_priv); mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); } +bool i915_gem_unset_wedged(struct drm_i915_private *i915) +{ + struct i915_gem_timeline *tl; + int i; + + lockdep_assert_held(&i915->drm.struct_mutex); + if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) + return true; + + /* Before unwedging, make sure that all pending operations + * are flushed and errored out - we may have requests waiting upon + * third party fences. We marked all inflight requests as EIO, and + * every execbuf since returned EIO, for consistency we want all + * the currently pending requests to also be marked as EIO, which + * is done inside our nop_submit_request - and so we must wait. + * + * No more can be submitted until we reset the wedged bit. + */ + list_for_each_entry(tl, &i915->gt.timelines, link) { + for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { + struct drm_i915_gem_request *rq; + + rq = i915_gem_active_peek(&tl->engine[i].last_request, + &i915->drm.struct_mutex); + if (!rq) + continue; + + /* We can't use our normal waiter as we want to + * avoid recursively trying to handle the current + * reset. The basic dma_fence_default_wait() installs + * a callback for dma_fence_signal(), which is + * triggered by our nop handler (indirectly, the + * callback enables the signaler thread which is + * woken by the nop_submit_request() advancing the seqno + * and when the seqno passes the fence, the signaler + * then signals the fence waking us up). + */ + if (dma_fence_default_wait(&rq->fence, true, + MAX_SCHEDULE_TIMEOUT) < 0) + return false; + } + } + + /* Undo nop_submit_request. We prevent all new i915 requests from + * being queued (by disallowing execbuf whilst wedged) so having + * waited for all active requests above, we know the system is idle + * and do not have to worry about a thread being inside + * engine->submit_request() as we swap over. So unlike installing + * the nop_submit_request on reset, we can do this from normal + * context and do not require stop_machine(). + */ + intel_engines_reset_default_submission(i915); + + smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ + clear_bit(I915_WEDGED, &i915->gpu_error.flags); + + return true; +} + static void i915_gem_retire_work_handler(struct work_struct *work) { @@ -2997,9 +3112,7 @@ i915_gem_idle_work_handler(struct work_struct *work) * Wait for last execlists context complete, but bail out in case a * new request is submitted. */ - wait_for(READ_ONCE(dev_priv->gt.active_requests) || - intel_execlists_idle(dev_priv), 10); - + wait_for(intel_engines_are_idle(dev_priv), 10); if (READ_ONCE(dev_priv->gt.active_requests)) return; @@ -3024,11 +3137,13 @@ i915_gem_idle_work_handler(struct work_struct *work) if (dev_priv->gt.active_requests) goto out_unlock; - if (wait_for(intel_execlists_idle(dev_priv), 10)) + if (wait_for(intel_engines_are_idle(dev_priv), 10)) DRM_ERROR("Timeout waiting for engines to idle\n"); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + intel_engine_disarm_breadcrumbs(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + } GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; @@ -3156,6 +3271,29 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) return 0; } +static int wait_for_engine(struct intel_engine_cs *engine, int timeout_ms) +{ + return wait_for(intel_engine_is_idle(engine), timeout_ms); +} + +static int wait_for_engines(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + if (GEM_WARN_ON(wait_for_engine(engine, 50))) { + i915_gem_set_wedged(i915); + return -EIO; + } + + GEM_BUG_ON(intel_engine_get_seqno(engine) != + intel_engine_last_submit(engine)); + } + + return 0; +} + int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) { int ret; @@ -3170,48 +3308,16 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) if (ret) return ret; } - } else { - ret = wait_for_timeline(&i915->gt.global_timeline, flags); - if (ret) - return ret; - } - return 0; -} - -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, - bool force) -{ - /* If we don't have a page list set up, then we're not pinned - * to GPU, and we can ignore the cache flush because it'll happen - * again at bind time. - */ - if (!obj->mm.pages) - return; + i915_gem_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); - /* - * Stolen memory is always coherent with the GPU as it is explicitly - * marked as wc by the system, or the system is cache-coherent. - */ - if (obj->stolen || obj->phys_handle) - return; - - /* If the GPU is snooping the contents of the CPU cache, - * we do not need to manually clear the CPU cache lines. However, - * the caches are only snooped when the render cache is - * flushed/invalidated. As we always have to emit invalidations - * and flushes when moving into and out of the RENDER domain, correct - * snooping behaviour occurs naturally as the result of our domain - * tracking. - */ - if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { - obj->cache_dirty = true; - return; + ret = wait_for_engines(i915); + } else { + ret = wait_for_timeline(&i915->gt.global_timeline, flags); } - trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->mm.pages); - obj->cache_dirty = false; + return ret; } /** Flushes the GTT write domain for the object if it's dirty. */ @@ -3239,15 +3345,18 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) * system agents we cannot reproduce this behaviour). */ wmb(); - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) - POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { + if (intel_runtime_pm_get_if_in_use(dev_priv)) { + spin_lock_irq(&dev_priv->uncore.lock); + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + spin_unlock_irq(&dev_priv->uncore.lock); + intel_runtime_pm_put(dev_priv); + } + } - intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT)); + intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); obj->base.write_domain = 0; - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - I915_GEM_DOMAIN_GTT); } /** Flushes the CPU write domain for the object if it's dirty. */ @@ -3257,13 +3366,27 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - i915_gem_clflush_object(obj, obj->pin_display); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + obj->base.write_domain = 0; +} + +static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) +{ + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) + return; + i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); obj->base.write_domain = 0; - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - I915_GEM_DOMAIN_CPU); +} + +void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) +{ + if (!READ_ONCE(obj->pin_display)) + return; + + mutex_lock(&obj->base.dev->struct_mutex); + __i915_gem_object_flush_for_display(obj); + mutex_unlock(&obj->base.dev->struct_mutex); } /** @@ -3277,7 +3400,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { - uint32_t old_write_domain, old_read_domains; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3315,9 +3437,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) mb(); - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ @@ -3329,10 +3448,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) obj->mm.dirty = true; } - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - i915_gem_object_unpin_pages(obj); return 0; } @@ -3457,7 +3572,7 @@ restart: } if (obj->base.write_domain == I915_GEM_DOMAIN_CPU && - cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + i915_gem_object_is_coherent(obj)) obj->cache_dirty = true; list_for_each_entry(vma, &obj->vma_list, obj_link) @@ -3569,7 +3684,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view) { struct i915_vma *vma; - u32 old_read_domains, old_write_domain; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3629,24 +3743,14 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ - if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj, true); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); - } - - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; + __i915_gem_object_flush_for_display(obj); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - obj->base.write_domain = 0; obj->base.read_domains |= I915_GEM_DOMAIN_GTT; - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - return vma; err_unpin_display: @@ -3682,7 +3786,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) { - uint32_t old_write_domain, old_read_domains; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3701,13 +3804,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) i915_gem_object_flush_gtt_write_domain(obj); - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj, false); - + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } @@ -3724,10 +3823,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) obj->base.write_domain = I915_GEM_DOMAIN_CPU; } - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - return 0; } @@ -3755,16 +3850,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) return -EIO; spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_list) { + list_for_each_entry(request, &file_priv->mm.request_list, client_link) { if (time_after_eq(request->emitted_jiffies, recent_enough)) break; - /* - * Note that the request might not have been submitted yet. - * In which case emitted_jiffies will be zero. - */ - if (!request->emitted_jiffies) - continue; + if (target) { + list_del(&target->client_link); + target->file_priv = NULL; + } target = request; } @@ -4050,7 +4143,7 @@ frontbuffer_retire(struct i915_gem_active *active, struct drm_i915_gem_object *obj = container_of(active, typeof(*obj), frontbuffer_write); - intel_fb_obj_flush(obj, true, ORIGIN_CS); + intel_fb_obj_flush(obj, ORIGIN_CS); } void i915_gem_object_init(struct drm_i915_gem_object *obj, @@ -4314,11 +4407,29 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) !i915_gem_context_is_kernel(engine->last_retired_context)); } +void i915_gem_sanitize(struct drm_i915_private *i915) +{ + /* + * If we inherit context state from the BIOS or earlier occupants + * of the GPU, the GPU may be in an inconsistent state when we + * try to take over. The only way to remove the earlier state + * is by resetting. However, resetting on earlier gen is tricky as + * it may impact the display and we are uncertain about the stability + * of the reset, so we only reset recent machines with logical + * context support (that must be reset to remove any stray contexts). + */ + if (HAS_HW_CONTEXTS(i915)) { + int reset = intel_gpu_reset(i915, ALL_ENGINES); + WARN_ON(reset && reset != -ENODEV); + } +} + int i915_gem_suspend(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; int ret; + intel_runtime_pm_get(dev_priv); intel_suspend_gt_powersave(dev_priv); mutex_lock(&dev->struct_mutex); @@ -4333,16 +4444,13 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) */ ret = i915_gem_switch_to_kernel_context(dev_priv); if (ret) - goto err; + goto err_unlock; ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED); if (ret) - goto err; - - i915_gem_retire_requests(dev_priv); - GEM_BUG_ON(dev_priv->gt.active_requests); + goto err_unlock; assert_kernel_context_is_current(dev_priv); i915_gem_context_lost(dev_priv); @@ -4365,7 +4473,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * reset the GPU back to its idle, low power state. */ WARN_ON(dev_priv->gt.awake); - WARN_ON(!intel_execlists_idle(dev_priv)); + WARN_ON(!intel_engines_are_idle(dev_priv)); /* * Neither the BIOS, ourselves or any other kernel @@ -4386,15 +4494,13 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machines is a good idea, we don't - just in case it leaves the * machine in an unusable condition. */ - if (HAS_HW_CONTEXTS(dev_priv)) { - int reset = intel_gpu_reset(dev_priv, ALL_ENGINES); - WARN_ON(reset && reset != -ENODEV); - } - - return 0; + i915_gem_sanitize(dev_priv); + goto out_rpm_put; -err: +err_unlock: mutex_unlock(&dev->struct_mutex); +out_rpm_put: + intel_runtime_pm_put(dev_priv); return ret; } @@ -4464,11 +4570,24 @@ static void init_unused_rings(struct drm_i915_private *dev_priv) } } -int -i915_gem_init_hw(struct drm_i915_private *dev_priv) +static int __i915_gem_restart_engines(void *data) { + struct drm_i915_private *i915 = data; struct intel_engine_cs *engine; enum intel_engine_id id; + int err; + + for_each_engine(engine, i915, id) { + err = engine->init_hw(engine); + if (err) + return err; + } + + return 0; +} + +int i915_gem_init_hw(struct drm_i915_private *dev_priv) +{ int ret; dev_priv->gt.last_init_time = ktime_get(); @@ -4514,16 +4633,14 @@ i915_gem_init_hw(struct drm_i915_private *dev_priv) } /* Need to do basic initialisation of all rings first: */ - for_each_engine(engine, dev_priv, id) { - ret = engine->init_hw(engine); - if (ret) - goto out; - } + ret = __i915_gem_restart_engines(dev_priv); + if (ret) + goto out; intel_mocs_init_l3cc_table(dev_priv); /* We can't enable contexts until all firmware is loaded */ - ret = intel_guc_setup(dev_priv); + ret = intel_uc_init_hw(dev_priv); if (ret) goto out; @@ -4559,6 +4676,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_clflush_init(dev_priv); + if (!i915.enable_execlists) { dev_priv->gt.resume = intel_legacy_submission_resume; dev_priv->gt.cleanup_engine = intel_engine_cleanup; @@ -4607,6 +4726,11 @@ out_unlock: return ret; } +void i915_gem_init_mmio(struct drm_i915_private *i915) +{ + i915_gem_sanitize(i915); +} + void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) { @@ -4720,7 +4844,9 @@ err_out: void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) { + i915_gem_drain_freed_objects(dev_priv); WARN_ON(!llist_empty(&dev_priv->mm.free_list)); + WARN_ON(dev_priv->mm.object_count); mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_timeline_fini(&dev_priv->gt.global_timeline); @@ -4738,14 +4864,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) int i915_gem_freeze(struct drm_i915_private *dev_priv) { - intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_shrink_all(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); - return 0; } @@ -4796,7 +4918,7 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) * file_priv. */ spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_list) + list_for_each_entry(request, &file_priv->mm.request_list, client_link) request->file_priv = NULL; spin_unlock(&file_priv->mm.lock); @@ -4874,38 +4996,49 @@ i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, const void *data, size_t size) { struct drm_i915_gem_object *obj; - struct sg_table *sg; - size_t bytes; - int ret; + struct file *file; + size_t offset; + int err; obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); if (IS_ERR(obj)) return obj; - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - goto fail; + GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto fail; + file = obj->base.filp; + offset = 0; + do { + unsigned int len = min_t(typeof(size), size, PAGE_SIZE); + struct page *page; + void *pgdata, *vaddr; - sg = obj->mm.pages; - bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); - obj->mm.dirty = true; /* Backing store is now out of date */ - i915_gem_object_unpin_pages(obj); + err = pagecache_write_begin(file, file->f_mapping, + offset, len, 0, + &page, &pgdata); + if (err < 0) + goto fail; - if (WARN_ON(bytes != size)) { - DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); - ret = -EFAULT; - goto fail; - } + vaddr = kmap(page); + memcpy(vaddr, data, len); + kunmap(page); + + err = pagecache_write_end(file, file->f_mapping, + offset, len, len, + page, pgdata); + if (err < 0) + goto fail; + + size -= len; + data += len; + offset += len; + } while (size); return obj; fail: i915_gem_object_put(obj); - return ERR_PTR(ret); + return ERR_PTR(err); } struct scatterlist * @@ -5060,3 +5193,11 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, sg = i915_gem_object_get_sg(obj, n, &offset); return sg_dma_address(sg) + (offset << PAGE_SHIFT); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/scatterlist.c" +#include "selftests/mock_gem_device.c" +#include "selftests/huge_gem_object.c" +#include "selftests/i915_gem_object.c" +#include "selftests/i915_gem_coherency.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index a585d47c420a..5a49487368ca 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -28,9 +28,18 @@ #ifdef CONFIG_DRM_I915_DEBUG_GEM #define GEM_BUG_ON(expr) BUG_ON(expr) #define GEM_WARN_ON(expr) WARN_ON(expr) + +#define GEM_DEBUG_DECL(var) var +#define GEM_DEBUG_EXEC(expr) expr +#define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr) + #else #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0) + +#define GEM_DEBUG_DECL(var) +#define GEM_DEBUG_EXEC(expr) do { } while (0) +#define GEM_DEBUG_BUG_ON(expr) #endif #define I915_NUM_ENGINES 5 diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index b3bc119ec1bb..41aa598c4f3b 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -96,8 +96,7 @@ struct drm_i915_gem_object * i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size) { - struct drm_i915_gem_object *obj = NULL; - struct drm_i915_gem_object *tmp; + struct drm_i915_gem_object *obj; struct list_head *list; int n, ret; @@ -112,31 +111,29 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, n = ARRAY_SIZE(pool->cache_list) - 1; list = &pool->cache_list[n]; - list_for_each_entry(tmp, list, batch_pool_link) { + list_for_each_entry(obj, list, batch_pool_link) { /* The batches are strictly LRU ordered */ - if (i915_gem_object_is_active(tmp)) - break; + if (i915_gem_object_is_active(obj)) { + if (!reservation_object_test_signaled_rcu(obj->resv, + true)) + break; - GEM_BUG_ON(!reservation_object_test_signaled_rcu(tmp->resv, - true)); + i915_gem_retire_requests(pool->engine->i915); + GEM_BUG_ON(i915_gem_object_is_active(obj)); + } - if (tmp->base.size >= size) { - /* Clear the set of shared fences early */ - ww_mutex_lock(&tmp->resv->lock, NULL); - reservation_object_add_excl_fence(tmp->resv, NULL); - ww_mutex_unlock(&tmp->resv->lock); + GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv, + true)); - obj = tmp; - break; - } + if (obj->base.size >= size) + goto found; } - if (obj == NULL) { - obj = i915_gem_object_create_internal(pool->engine->i915, size); - if (IS_ERR(obj)) - return obj; - } + obj = i915_gem_object_create_internal(pool->engine->i915, size); + if (IS_ERR(obj)) + return obj; +found: ret = i915_gem_object_pin_pages(obj); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c new file mode 100644 index 000000000000..ffd01e02fe94 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -0,0 +1,189 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" +#include "intel_frontbuffer.h" +#include "i915_gem_clflush.h" + +static DEFINE_SPINLOCK(clflush_lock); +static u64 clflush_context; + +struct clflush { + struct dma_fence dma; /* Must be first for dma_fence_free() */ + struct i915_sw_fence wait; + struct work_struct work; + struct drm_i915_gem_object *obj; +}; + +static const char *i915_clflush_get_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) +{ + return "clflush"; +} + +static bool i915_clflush_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static void i915_clflush_release(struct dma_fence *fence) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), dma); + + i915_sw_fence_fini(&clflush->wait); + + BUILD_BUG_ON(offsetof(typeof(*clflush), dma)); + dma_fence_free(&clflush->dma); +} + +static const struct dma_fence_ops i915_clflush_ops = { + .get_driver_name = i915_clflush_get_driver_name, + .get_timeline_name = i915_clflush_get_timeline_name, + .enable_signaling = i915_clflush_enable_signaling, + .wait = dma_fence_default_wait, + .release = i915_clflush_release, +}; + +static void __i915_do_clflush(struct drm_i915_gem_object *obj) +{ + drm_clflush_sg(obj->mm.pages); + obj->cache_dirty = false; + + intel_fb_obj_flush(obj, ORIGIN_CPU); +} + +static void i915_clflush_work(struct work_struct *work) +{ + struct clflush *clflush = container_of(work, typeof(*clflush), work); + struct drm_i915_gem_object *obj = clflush->obj; + + if (!obj->cache_dirty) + goto out; + + if (i915_gem_object_pin_pages(obj)) { + DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); + goto out; + } + + __i915_do_clflush(obj); + + i915_gem_object_unpin_pages(obj); + +out: + i915_gem_object_put(obj); + + dma_fence_signal(&clflush->dma); + dma_fence_put(&clflush->dma); +} + +static int __i915_sw_fence_call +i915_clflush_notify(struct i915_sw_fence *fence, + enum i915_sw_fence_notify state) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), wait); + + switch (state) { + case FENCE_COMPLETE: + schedule_work(&clflush->work); + break; + + case FENCE_FREE: + dma_fence_put(&clflush->dma); + break; + } + + return NOTIFY_DONE; +} + +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + struct clflush *clflush; + + /* + * Stolen memory is always coherent with the GPU as it is explicitly + * marked as wc by the system, or the system is cache-coherent. + * Similarly, we only access struct pages through the CPU cache, so + * anything not backed by physical memory we consider to be always + * coherent and not need clflushing. + */ + if (!i915_gem_object_has_struct_page(obj)) + return; + + obj->cache_dirty = true; + + /* If the GPU is snooping the contents of the CPU cache, + * we do not need to manually clear the CPU cache lines. However, + * the caches are only snooped when the render cache is + * flushed/invalidated. As we always have to emit invalidations + * and flushes when moving into and out of the RENDER domain, correct + * snooping behaviour occurs naturally as the result of our domain + * tracking. + */ + if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj)) + return; + + trace_i915_gem_object_clflush(obj); + + clflush = NULL; + if (!(flags & I915_CLFLUSH_SYNC)) + clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + if (clflush) { + dma_fence_init(&clflush->dma, + &i915_clflush_ops, + &clflush_lock, + clflush_context, + 0); + i915_sw_fence_init(&clflush->wait, i915_clflush_notify); + + clflush->obj = i915_gem_object_get(obj); + INIT_WORK(&clflush->work, i915_clflush_work); + + dma_fence_get(&clflush->dma); + + i915_sw_fence_await_reservation(&clflush->wait, + obj->resv, NULL, + true, I915_FENCE_TIMEOUT, + GFP_KERNEL); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &clflush->dma); + reservation_object_unlock(obj->resv); + + i915_sw_fence_commit(&clflush->wait); + } else if (obj->mm.pages) { + __i915_do_clflush(obj); + } else { + GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); + } +} + +void i915_gem_clflush_init(struct drm_i915_private *i915) +{ + clflush_context = dma_fence_context_alloc(1); +} diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h new file mode 100644 index 000000000000..b62d61a2d15f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_clflush.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_GEM_CLFLUSH_H__ +#define __I915_GEM_CLFLUSH_H__ + +struct drm_i915_private; +struct drm_i915_gem_object; + +void i915_gem_clflush_init(struct drm_i915_private *i915); +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags); +#define I915_CLFLUSH_FORCE BIT(0) +#define I915_CLFLUSH_SYNC BIT(1) + +#endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index e2d83b6d376b..8bd0c4966913 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -92,21 +92,6 @@ #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -/* This is a HW constraint. The value below is the largest known requirement - * I've seen in a spec to date, and that was a workaround for a non-shipping - * part. It should be safe to decrease this, but it's more future proof as is. - */ -#define GEN6_CONTEXT_ALIGN (64<<10) -#define GEN7_CONTEXT_ALIGN I915_GTT_MIN_ALIGNMENT - -static size_t get_context_alignment(struct drm_i915_private *dev_priv) -{ - if (IS_GEN6(dev_priv)) - return GEN6_CONTEXT_ALIGN; - - return GEN7_CONTEXT_ALIGN; -} - static int get_context_size(struct drm_i915_private *dev_priv) { int ret; @@ -236,6 +221,30 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) return 0; } +static u32 default_desc_template(const struct drm_i915_private *i915, + const struct i915_hw_ppgtt *ppgtt) +{ + u32 address_mode; + u32 desc; + + desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; + + address_mode = INTEL_LEGACY_32B_CONTEXT; + if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) + address_mode = INTEL_LEGACY_64B_CONTEXT; + desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; + + if (IS_GEN8(i915)) + desc |= GEN8_CTX_L3LLC_COHERENT; + + /* TODO: WaDisableLiteRestore when we start using semaphore + * signalling between Command Streamers + * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; + */ + + return desc; +} + static struct i915_gem_context * __create_hw_context(struct drm_i915_private *dev_priv, struct drm_i915_file_private *file_priv) @@ -257,8 +266,6 @@ __create_hw_context(struct drm_i915_private *dev_priv, list_add_tail(&ctx->link, &dev_priv->context_list); ctx->i915 = dev_priv; - ctx->ggtt_alignment = get_context_alignment(dev_priv); - if (dev_priv->hw_context_size) { struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -309,8 +316,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, i915_gem_context_set_bannable(ctx); ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) << - GEN8_CTX_ADDRESSING_MODE_SHIFT; + ctx->desc_template = + default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt); /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not * present or not in use we still need a small bias as ring wraparound @@ -331,6 +338,13 @@ err_out: return ERR_PTR(ret); } +static void __destroy_hw_context(struct i915_gem_context *ctx, + struct drm_i915_file_private *file_priv) +{ + idr_remove(&file_priv->context_idr, ctx->user_handle); + context_close(ctx); +} + /** * The default context needs to exist per ring that uses contexts. It stores the * context state of the GPU for applications that don't utilize HW contexts, as @@ -355,12 +369,12 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); - idr_remove(&file_priv->context_idr, ctx->user_handle); - context_close(ctx); + __destroy_hw_context(ctx, file_priv); return ERR_CAST(ppgtt); } ctx->ppgtt = ppgtt; + ctx->desc_template = default_desc_template(dev_priv, ppgtt); } trace_i915_context_create(ctx); @@ -399,7 +413,8 @@ i915_gem_context_create_gvt(struct drm_device *dev) i915_gem_context_set_closed(ctx); /* not user accessible */ i915_gem_context_clear_bannable(ctx); i915_gem_context_set_force_single_submission(ctx); - ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ + if (!i915.enable_guc_submission) + ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); out: @@ -450,6 +465,11 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) return PTR_ERR(ctx); } + /* For easy recognisablity, we want the kernel context to be 0 and then + * all user contexts will have non-zero hw_id. + */ + GEM_BUG_ON(ctx->hw_id); + i915_gem_context_clear_bannable(ctx); ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ dev_priv->kernel_context = ctx; @@ -556,135 +576,116 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) } static inline int -mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) +mi_set_context(struct drm_i915_gem_request *req, u32 flags) { struct drm_i915_private *dev_priv = req->i915; - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; enum intel_engine_id id; - u32 flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = - /* Use an extended w/a on ivb+ if signalling from other rings */ - i915.semaphores ? + /* Use an extended w/a on gen7 if signalling from other rings */ + (i915.semaphores && INTEL_GEN(dev_priv) == 7) ? INTEL_INFO(dev_priv)->num_rings - 1 : 0; - int len, ret; - - /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB - * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value - * explicitly, so we rely on the value at ring init, stored in - * itlb_before_ctx_switch. - */ - if (IS_GEN6(dev_priv)) { - ret = engine->emit_flush(req, EMIT_INVALIDATE); - if (ret) - return ret; - } + int len; + u32 *cs; - /* These flags are for resource streamer on HSW+ */ + flags |= MI_MM_SPACE_GTT; if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) - flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); - else if (INTEL_GEN(dev_priv) < 8) - flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); - + /* These flags are for resource streamer on HSW+ */ + flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; + else + flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; len = 4; if (INTEL_GEN(dev_priv) >= 7) len += 2 + (num_rings ? 4*num_rings + 6 : 0); - ret = intel_ring_begin(req, len); - if (ret) - return ret; + cs = intel_ring_begin(req, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (INTEL_GEN(dev_priv) >= 7) { - intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; if (num_rings) { struct intel_engine_cs *signaller; - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(num_rings)); + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); for_each_engine(signaller, dev_priv, id) { if (signaller == engine) continue; - intel_ring_emit_reg(ring, - RING_PSMI_CTL(signaller->mmio_base)); - intel_ring_emit(ring, - _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + *cs++ = i915_mmio_reg_offset( + RING_PSMI_CTL(signaller->mmio_base)); + *cs++ = _MASKED_BIT_ENABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); } } } - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_SET_CONTEXT); - intel_ring_emit(ring, - i915_ggtt_offset(req->ctx->engine[RCS].state) | flags); + *cs++ = MI_NOOP; + *cs++ = MI_SET_CONTEXT; + *cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv */ - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; if (INTEL_GEN(dev_priv) >= 7) { if (num_rings) { struct intel_engine_cs *signaller; i915_reg_t last_reg = {}; /* keep gcc quiet */ - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(num_rings)); + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); for_each_engine(signaller, dev_priv, id) { if (signaller == engine) continue; last_reg = RING_PSMI_CTL(signaller->mmio_base); - intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, - _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = _MASKED_BIT_DISABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); } /* Insert a delay before the next switch! */ - intel_ring_emit(ring, - MI_STORE_REGISTER_MEM | - MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, - i915_ggtt_offset(engine->scratch)); - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = i915_ggtt_offset(engine->scratch); + *cs++ = MI_NOOP; } - intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); - return ret; + return 0; } static int remap_l3(struct drm_i915_gem_request *req, int slice) { - u32 *remap_info = req->i915->l3_parity.remap_info[slice]; - struct intel_ring *ring = req->ring; - int i, ret; + u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice]; + int i; if (!remap_info) return 0; - ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* * Note: We do not worry about the concurrent register cacheline hang * here because no other code should access these registers other than * at initialization time. */ - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { - intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); - intel_ring_emit(ring, remap_info[i]); + *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); + *cs++ = remap_info[i]; } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -932,7 +933,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) } ret = i915_switch_context(req); - i915_add_request_no_flush(req); + i915_add_request(req); if (ret) return ret; } @@ -1013,8 +1014,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, return PTR_ERR(ctx); } - idr_remove(&file_priv->context_idr, ctx->user_handle); - context_close(ctx); + __destroy_hw_context(ctx, file_priv); mutex_unlock(&dev->struct_mutex); DRM_DEBUG("HW context %d destroyed\n", args->ctx_id); @@ -1163,3 +1163,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_context.c" +#include "selftests/i915_gem_context.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index e9c008fe14b1..4af2ab94558b 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -140,8 +140,6 @@ struct i915_gem_context { */ int priority; - /** ggtt_alignment: alignment restriction for context objects */ - u32 ggtt_alignment; /** ggtt_offset_bias: placement restriction for context objects */ u32 ggtt_offset_bias; diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 29bb8011dbc4..f225bf680b6d 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -200,10 +200,10 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .map_dma_buf = i915_gem_map_dma_buf, .unmap_dma_buf = i915_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, - .kmap = i915_gem_dmabuf_kmap, - .kmap_atomic = i915_gem_dmabuf_kmap_atomic, - .kunmap = i915_gem_dmabuf_kunmap, - .kunmap_atomic = i915_gem_dmabuf_kunmap_atomic, + .map = i915_gem_dmabuf_kmap, + .map_atomic = i915_gem_dmabuf_kmap_atomic, + .unmap = i915_gem_dmabuf_kunmap, + .unmap_atomic = i915_gem_dmabuf_kunmap_atomic, .mmap = i915_gem_dmabuf_mmap, .vmap = i915_gem_dmabuf_vmap, .vunmap = i915_gem_dmabuf_vunmap, @@ -307,3 +307,8 @@ fail_detach: return ERR_PTR(ret); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_dmabuf.c" +#include "selftests/i915_gem_dmabuf.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 3be2503aa042..51e365f70464 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -196,7 +196,6 @@ search_again: if (ret) return ret; - i915_gem_retire_requests(dev_priv); goto search_again; found: @@ -258,6 +257,9 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, int ret = 0; lockdep_assert_held(&vm->i915->drm.struct_mutex); + GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); + trace_i915_gem_evict_node(vm, target, flags); /* Retire before we search the active list. Although we have @@ -271,11 +273,13 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, check_color = vm->mm.color_adjust; if (check_color) { /* Expand search to cover neighbouring guard pages (or lack!) */ - if (start > vm->start) + if (start) start -= I915_GTT_PAGE_SIZE; - if (end < vm->start + vm->total) - end += I915_GTT_PAGE_SIZE; + + /* Always look at the page afterwards to avoid the end-of-GTT */ + end += I915_GTT_PAGE_SIZE; } + GEM_BUG_ON(start >= end); drm_mm_for_each_node_in_range(node, &vm->mm, start, end) { /* If we find any non-objects (!vma), we cannot evict them */ @@ -284,6 +288,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, break; } + GEM_BUG_ON(!node->allocated); vma = container_of(node, typeof(*vma), node); /* If we are using coloring to insert guard pages between @@ -377,7 +382,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) if (ret) return ret; - i915_gem_retire_requests(dev_priv); WARN_ON(!list_empty(&vm->active_list)); } @@ -387,3 +391,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_evict.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 15a15d00a6bf..a3e59c8ef27b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -28,12 +28,14 @@ #include <linux/dma_remapping.h> #include <linux/reservation.h> +#include <linux/sync_file.h> #include <linux/uaccess.h> #include <drm/drmP.h> #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" @@ -1112,13 +1114,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; + if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) + continue; + + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) { + i915_gem_clflush_object(obj, 0); + obj->base.write_domain = 0; + } + ret = i915_gem_request_await_object (req, obj, obj->base.pending_write_domain); if (ret) return ret; - - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - i915_gem_clflush_object(obj, false); } /* Unconditionally flush any chipset caches (for streaming writes). */ @@ -1299,12 +1306,12 @@ static void eb_export_fence(struct drm_i915_gem_object *obj, * handle an error right now. Worst case should be missed * synchronisation leading to rendering corruption. */ - ww_mutex_lock(&resv->lock, NULL); + reservation_object_lock(resv, NULL); if (flags & EXEC_OBJECT_WRITE) reservation_object_add_excl_fence(resv, &req->fence); else if (reservation_object_reserve_shared(resv) == 0) reservation_object_add_shared_fence(resv, &req->fence); - ww_mutex_unlock(&resv->lock); + reservation_object_unlock(resv); } static void @@ -1315,8 +1322,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - u32 old_read = obj->base.read_domains; - u32 old_write = obj->base.write_domain; obj->base.write_domain = obj->base.pending_write_domain; if (obj->base.write_domain) @@ -1327,32 +1332,31 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, i915_vma_move_to_active(vma, req, vma->exec_entry->flags); eb_export_fence(obj, req, vma->exec_entry->flags); - trace_i915_gem_object_change_domain(obj, old_read, old_write); } } static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; - int ret, i; + u32 *cs; + int i; if (!IS_GEN7(req->i915) || req->engine->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } - ret = intel_ring_begin(req, 4 * 3); - if (ret) - return ret; + cs = intel_ring_begin(req, 4 * 3); + if (IS_ERR(cs)) + return PTR_ERR(cs); for (i = 0; i < 4; i++) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); - intel_ring_emit(ring, 0); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); + *cs++ = 0; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -1405,6 +1409,14 @@ out: return vma; } +static void +add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file) +{ + req->file_priv = file->driver_priv; + list_add_tail(&req->client_link, &req->file_priv->mm.request_list); +} + static int execbuf_submit(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, @@ -1445,8 +1457,6 @@ execbuf_submit(struct i915_execbuffer_params *params, if (ret) return ret; - trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); - i915_gem_execbuffer_move_to_active(vmas, params->request); return 0; @@ -1545,6 +1555,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_execbuffer_params *params = ¶ms_master; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 dispatch_flags; + struct dma_fence *in_fence = NULL; + struct sync_file *out_fence = NULL; + int out_fence_fd = -1; int ret; bool need_relocs; @@ -1588,6 +1601,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, dispatch_flags |= I915_DISPATCH_RS; } + if (args->flags & I915_EXEC_FENCE_IN) { + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); + if (!in_fence) + return -EINVAL; + } + + if (args->flags & I915_EXEC_FENCE_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + ret = out_fence_fd; + goto err_in_fence; + } + } + /* Take a local wakeref for preparing to dispatch the execbuf as * we expect to access the hardware fairly frequently in the * process. Upon first dispatch, we acquire another prolonged @@ -1732,6 +1759,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err_batch_unpin; } + if (in_fence) { + ret = i915_gem_request_await_dma_fence(params->request, + in_fence); + if (ret < 0) + goto err_request; + } + + if (out_fence_fd != -1) { + out_fence = sync_file_create(¶ms->request->fence); + if (!out_fence) { + ret = -ENOMEM; + goto err_request; + } + } + /* Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this * request is retired will the the batch_obj be moved onto the @@ -1740,10 +1782,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, */ params->request->batch = params->batch; - ret = i915_gem_request_add_to_client(params->request, file); - if (ret) - goto err_request; - /* * Save assorted stuff away to pass through to *_submission(). * NB: This data should be 'persistent' and not local as it will @@ -1756,9 +1794,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->dispatch_flags = dispatch_flags; params->ctx = ctx; + trace_i915_gem_request_queue(params->request, dispatch_flags); + ret = execbuf_submit(params, args, &eb->vmas); err_request: __i915_add_request(params->request, ret == 0); + add_to_client(params->request, file); + + if (out_fence) { + if (ret == 0) { + fd_install(out_fence_fd, out_fence->file); + args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ + args->rsvd2 |= (u64)out_fence_fd << 32; + out_fence_fd = -1; + } else { + fput(out_fence->file); + } + } err_batch_unpin: /* @@ -1780,6 +1832,10 @@ pre_mutex_err: /* intel_gpu_busy should also get a ref, so it will free when the device * is really idle. */ intel_runtime_pm_put(dev_priv); + if (out_fence_fd != -1) + put_unused_fd(out_fence_fd); +err_in_fence: + dma_fence_put(in_fence); return ret; } @@ -1887,11 +1943,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EINVAL; } - if (args->rsvd2 != 0) { - DRM_DEBUG("dirty rvsd2 field\n"); - return -EINVAL; - } - exec2_list = drm_malloc_gfp(args->buffer_count, sizeof(*exec2_list), GFP_TEMPORARY); diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index fadbe8f4c745..5fe2cd8c8f28 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -248,7 +248,14 @@ static int fence_update(struct drm_i915_fence_reg *fence, list_move(&fence->link, &fence->i915->mm.fence_list); } - fence_write(fence, vma); + /* We only need to update the register itself if the device is awake. + * If the device is currently powered down, we will defer the write + * to the runtime resume, see i915_gem_restore_fences(). + */ + if (intel_runtime_pm_get_if_in_use(fence->i915)) { + fence_write(fence, vma); + intel_runtime_pm_put(fence->i915); + } if (vma) { if (fence->vma != vma) { @@ -278,8 +285,6 @@ i915_vma_put_fence(struct i915_vma *vma) { struct drm_i915_fence_reg *fence = vma->fence; - assert_rpm_wakelock_held(vma->vm->i915); - if (!fence) return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 96e45a4d5441..8bab4aea63e6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -23,6 +23,9 @@ * */ +#include <linux/slab.h> /* fault-inject.h is not standalone! */ + +#include <linux/fault-inject.h> #include <linux/log2.h> #include <linux/random.h> #include <linux/seq_file.h> @@ -187,11 +190,17 @@ static int ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 unused) { - u32 pte_flags = 0; + u32 pte_flags; + int ret; + + ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); + if (ret) + return ret; vma->pages = vma->obj->mm.pages; /* Currently applicable only to VLV */ + pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -203,9 +212,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, static void ppgtt_unbind_vma(struct i915_vma *vma) { - vma->vm->clear_range(vma->vm, - vma->node.start, - vma->size); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } static gen8_pte_t gen8_pte_encode(dma_addr_t addr, @@ -340,268 +347,229 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr, return pte; } -static int __setup_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, gfp_t flags) +static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) { - struct device *kdev = &dev_priv->drm.pdev->dev; + struct page *page; - p->page = alloc_page(flags); - if (!p->page) - return -ENOMEM; + if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) + i915_gem_shrink_all(vm->i915); - p->daddr = dma_map_page(kdev, - p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (vm->free_pages.nr) + return vm->free_pages.pages[--vm->free_pages.nr]; - if (dma_mapping_error(kdev, p->daddr)) { - __free_page(p->page); - return -EINVAL; - } + page = alloc_page(gfp); + if (!page) + return NULL; - return 0; + if (vm->pt_kmap_wc) + set_pages_array_wc(&page, 1); + + return page; } -static int setup_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p) +static void vm_free_pages_release(struct i915_address_space *vm) { - return __setup_page_dma(dev_priv, p, I915_GFP_DMA); + GEM_BUG_ON(!pagevec_count(&vm->free_pages)); + + if (vm->pt_kmap_wc) + set_pages_array_wb(vm->free_pages.pages, + pagevec_count(&vm->free_pages)); + + __pagevec_release(&vm->free_pages); } -static void cleanup_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p) +static void vm_free_page(struct i915_address_space *vm, struct page *page) { - struct pci_dev *pdev = dev_priv->drm.pdev; + if (!pagevec_add(&vm->free_pages, page)) + vm_free_pages_release(vm); +} - if (WARN_ON(!p->page)) - return; +static int __setup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + gfp_t gfp) +{ + p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY); + if (unlikely(!p->page)) + return -ENOMEM; + + p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { + vm_free_page(vm, p->page); + return -ENOMEM; + } - dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - __free_page(p->page); - memset(p, 0, sizeof(*p)); + return 0; } -static void *kmap_page_dma(struct i915_page_dma *p) +static int setup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p) { - return kmap_atomic(p->page); + return __setup_page_dma(vm, p, I915_GFP_DMA); } -/* We use the flushing unmap only with ppgtt structures: - * page directories, page tables and scratch pages. - */ -static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr) +static void cleanup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p) { - /* There are only few exceptions for gen >=6. chv and bxt. - * And we are not sure about the latter so play safe for now. - */ - if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) - drm_clflush_virt_range(vaddr, PAGE_SIZE); - - kunmap_atomic(vaddr); + dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + vm_free_page(vm, p->page); } -#define kmap_px(px) kmap_page_dma(px_base(px)) -#define kunmap_px(ppgtt, vaddr) \ - kunmap_page_dma((ppgtt)->base.i915, (vaddr)) +#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) -#define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px)) -#define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px)) -#define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v)) -#define fill32_px(dev_priv, px, v) \ - fill_page_dma_32((dev_priv), px_base(px), (v)) +#define setup_px(vm, px) setup_page_dma((vm), px_base(px)) +#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px)) +#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v)) +#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v)) -static void fill_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, const uint64_t val) +static void fill_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + const u64 val) { + u64 * const vaddr = kmap_atomic(p->page); int i; - uint64_t * const vaddr = kmap_page_dma(p); for (i = 0; i < 512; i++) vaddr[i] = val; - kunmap_page_dma(dev_priv, vaddr); + kunmap_atomic(vaddr); } -static void fill_page_dma_32(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, const uint32_t val32) +static void fill_page_dma_32(struct i915_address_space *vm, + struct i915_page_dma *p, + const u32 v) { - uint64_t v = val32; - - v = v << 32 | val32; - - fill_page_dma(dev_priv, p, v); + fill_page_dma(vm, p, (u64)v << 32 | v); } static int -setup_scratch_page(struct drm_i915_private *dev_priv, - struct i915_page_dma *scratch, - gfp_t gfp) +setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) { - return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO); + return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO); } -static void cleanup_scratch_page(struct drm_i915_private *dev_priv, - struct i915_page_dma *scratch) +static void cleanup_scratch_page(struct i915_address_space *vm) { - cleanup_page_dma(dev_priv, scratch); + cleanup_page_dma(vm, &vm->scratch_page); } -static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv) +static struct i915_page_table *alloc_pt(struct i915_address_space *vm) { struct i915_page_table *pt; - const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES; - int ret = -ENOMEM; - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - if (!pt) + pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pt)) return ERR_PTR(-ENOMEM); - pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), - GFP_KERNEL); - - if (!pt->used_ptes) - goto fail_bitmap; - - ret = setup_px(dev_priv, pt); - if (ret) - goto fail_page_m; + if (unlikely(setup_px(vm, pt))) { + kfree(pt); + return ERR_PTR(-ENOMEM); + } + pt->used_ptes = 0; return pt; - -fail_page_m: - kfree(pt->used_ptes); -fail_bitmap: - kfree(pt); - - return ERR_PTR(ret); } -static void free_pt(struct drm_i915_private *dev_priv, - struct i915_page_table *pt) +static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - cleanup_px(dev_priv, pt); - kfree(pt->used_ptes); + cleanup_px(vm, pt); kfree(pt); } static void gen8_initialize_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - gen8_pte_t scratch_pte; - - scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); - - fill_px(vm->i915, pt, scratch_pte); + fill_px(vm, pt, + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC)); } static void gen6_initialize_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - gen6_pte_t scratch_pte; - - WARN_ON(vm->scratch_page.daddr == 0); - - scratch_pte = vm->pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC, 0); - - fill32_px(vm->i915, pt, scratch_pte); + fill32_px(vm, pt, + vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0)); } -static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv) +static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) { struct i915_page_directory *pd; - int ret = -ENOMEM; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) + pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), - sizeof(*pd->used_pdes), GFP_KERNEL); - if (!pd->used_pdes) - goto fail_bitmap; - - ret = setup_px(dev_priv, pd); - if (ret) - goto fail_page_m; + if (unlikely(setup_px(vm, pd))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + pd->used_pdes = 0; return pd; - -fail_page_m: - kfree(pd->used_pdes); -fail_bitmap: - kfree(pd); - - return ERR_PTR(ret); } -static void free_pd(struct drm_i915_private *dev_priv, +static void free_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - if (px_page(pd)) { - cleanup_px(dev_priv, pd); - kfree(pd->used_pdes); - kfree(pd); - } + cleanup_px(vm, pd); + kfree(pd); } static void gen8_initialize_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - gen8_pde_t scratch_pde; - - scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); + unsigned int i; - fill_px(vm->i915, pd, scratch_pde); + fill_px(vm, pd, + gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC)); + for (i = 0; i < I915_PDES; i++) + pd->page_table[i] = vm->scratch_pt; } -static int __pdp_init(struct drm_i915_private *dev_priv, +static int __pdp_init(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { - size_t pdpes = I915_PDPES_PER_PDP(dev_priv); + const unsigned int pdpes = i915_pdpes_per_pdp(vm); + unsigned int i; - pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), - sizeof(unsigned long), - GFP_KERNEL); - if (!pdp->used_pdpes) + pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), + GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pdp->page_directory)) return -ENOMEM; - pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), - GFP_KERNEL); - if (!pdp->page_directory) { - kfree(pdp->used_pdpes); - /* the PDP might be the statically allocated top level. Keep it - * as clean as possible */ - pdp->used_pdpes = NULL; - return -ENOMEM; - } + for (i = 0; i < pdpes; i++) + pdp->page_directory[i] = vm->scratch_pd; return 0; } static void __pdp_fini(struct i915_page_directory_pointer *pdp) { - kfree(pdp->used_pdpes); kfree(pdp->page_directory); pdp->page_directory = NULL; } -static struct -i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv) +static inline bool use_4lvl(const struct i915_address_space *vm) +{ + return i915_vm_is_48bit(vm); +} + +static struct i915_page_directory_pointer * +alloc_pdp(struct i915_address_space *vm) { struct i915_page_directory_pointer *pdp; int ret = -ENOMEM; - WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv)); + WARN_ON(!use_4lvl(vm)); pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); if (!pdp) return ERR_PTR(-ENOMEM); - ret = __pdp_init(dev_priv, pdp); + ret = __pdp_init(vm, pdp); if (ret) goto fail_bitmap; - ret = setup_px(dev_priv, pdp); + ret = setup_px(vm, pdp); if (ret) goto fail_page_m; @@ -615,14 +583,16 @@ fail_bitmap: return ERR_PTR(ret); } -static void free_pdp(struct drm_i915_private *dev_priv, +static void free_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { __pdp_fini(pdp); - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - cleanup_px(dev_priv, pdp); - kfree(pdp); - } + + if (!use_4lvl(vm)) + return; + + cleanup_px(vm, pdp); + kfree(pdp); } static void gen8_initialize_pdp(struct i915_address_space *vm, @@ -632,47 +602,18 @@ static void gen8_initialize_pdp(struct i915_address_space *vm, scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); - fill_px(vm->i915, pdp, scratch_pdpe); + fill_px(vm, pdp, scratch_pdpe); } static void gen8_initialize_pml4(struct i915_address_space *vm, struct i915_pml4 *pml4) { - gen8_ppgtt_pml4e_t scratch_pml4e; - - scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), - I915_CACHE_LLC); - - fill_px(vm->i915, pml4, scratch_pml4e); -} - -static void -gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt, - struct i915_page_directory_pointer *pdp, - struct i915_page_directory *pd, - int index) -{ - gen8_ppgtt_pdpe_t *page_directorypo; + unsigned int i; - if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))) - return; - - page_directorypo = kmap_px(pdp); - page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); - kunmap_px(ppgtt, page_directorypo); -} - -static void -gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt, - struct i915_pml4 *pml4, - struct i915_page_directory_pointer *pdp, - int index) -{ - gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); - - WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))); - pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); - kunmap_px(ppgtt, pagemap); + fill_px(vm, pml4, + gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC)); + for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) + pml4->pdps[i] = vm->scratch_pdp; } /* Broadwell Page Directory Pointer Descriptors */ @@ -680,33 +621,32 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, dma_addr_t addr) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - int ret; + u32 *cs; BUG_ON(entry >= 4); - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); - intel_ring_emit(ring, upper_32_bits(addr)); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); - intel_ring_emit(ring, lower_32_bits(addr)); - intel_ring_advance(ring); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry)); + *cs++ = upper_32_bits(addr); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry)); + *cs++ = lower_32_bits(addr); + intel_ring_advance(req, cs); return 0; } -static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) +static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_request *req) { int i, ret; - for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { + for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); ret = gen8_write_pdp(req, i, pd_daddr); @@ -717,8 +657,8 @@ static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, return 0; } -static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) +static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_request *req) { return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); } @@ -738,70 +678,80 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) */ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, struct i915_page_table *pt, - uint64_t start, - uint64_t length) + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned int num_entries = gen8_pte_count(start, length); unsigned int pte = gen8_pte_index(start); unsigned int pte_end = pte + num_entries; - gen8_pte_t *pt_vaddr; - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_t *vaddr; - if (WARN_ON(!px_page(pt))) - return false; + GEM_BUG_ON(num_entries > pt->used_ptes); - GEM_BUG_ON(pte_end > GEN8_PTES); + pt->used_ptes -= num_entries; + if (!pt->used_ptes) + return true; - bitmap_clear(pt->used_ptes, pte, num_entries); - if (USES_FULL_PPGTT(vm->i915)) { - if (bitmap_empty(pt->used_ptes, GEN8_PTES)) - return true; - } + vaddr = kmap_atomic_px(pt); + while (pte < pte_end) + vaddr[pte++] = scratch_pte; + kunmap_atomic(vaddr); - pt_vaddr = kmap_px(pt); + return false; +} - while (pte < pte_end) - pt_vaddr[pte++] = scratch_pte; +static void gen8_ppgtt_set_pde(struct i915_address_space *vm, + struct i915_page_directory *pd, + struct i915_page_table *pt, + unsigned int pde) +{ + gen8_pde_t *vaddr; - kunmap_px(ppgtt, pt_vaddr); + pd->page_table[pde] = pt; - return false; + vaddr = kmap_atomic_px(pd); + vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC); + kunmap_atomic(vaddr); } -/* Removes entries from a single page dir, releasing it if it's empty. - * Caller can use the return value to update higher-level entries - */ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, struct i915_page_directory *pd, - uint64_t start, - uint64_t length) + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_table *pt; - uint64_t pde; - gen8_pde_t *pde_vaddr; - gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), - I915_CACHE_LLC); + u32 pde; gen8_for_each_pde(pt, pd, start, length, pde) { - if (WARN_ON(!pd->page_table[pde])) - break; - - if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { - __clear_bit(pde, pd->used_pdes); - pde_vaddr = kmap_px(pd); - pde_vaddr[pde] = scratch_pde; - kunmap_px(ppgtt, pde_vaddr); - free_pt(vm->i915, pt); - } + GEM_BUG_ON(pt == vm->scratch_pt); + + if (!gen8_ppgtt_clear_pt(vm, pt, start, length)) + continue; + + gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); + GEM_BUG_ON(!pd->used_pdes); + pd->used_pdes--; + + free_pt(vm, pt); } - if (bitmap_empty(pd->used_pdes, I915_PDES)) - return true; + return !pd->used_pdes; +} - return false; +static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + struct i915_page_directory *pd, + unsigned int pdpe) +{ + gen8_ppgtt_pdpe_t *vaddr; + + pdp->page_directory[pdpe] = pd; + if (!use_4lvl(vm)) + return; + + vaddr = kmap_atomic_px(pdp); + vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); + kunmap_atomic(vaddr); } /* Removes entries from a single page dir pointer, releasing it if it's empty. @@ -809,138 +759,189 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, */ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length) + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd; - uint64_t pdpe; + unsigned int pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (WARN_ON(!pdp->page_directory[pdpe])) - break; + GEM_BUG_ON(pd == vm->scratch_pd); - if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { - __clear_bit(pdpe, pdp->used_pdpes); - gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe); - free_pd(vm->i915, pd); - } + if (!gen8_ppgtt_clear_pd(vm, pd, start, length)) + continue; + + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + GEM_BUG_ON(!pdp->used_pdpes); + pdp->used_pdpes--; + + free_pd(vm, pd); } - mark_tlbs_dirty(ppgtt); + return !pdp->used_pdpes; +} - if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) - return true; +static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, + u64 start, u64 length) +{ + gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length); +} - return false; +static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, + struct i915_page_directory_pointer *pdp, + unsigned int pml4e) +{ + gen8_ppgtt_pml4e_t *vaddr; + + pml4->pdps[pml4e] = pdp; + + vaddr = kmap_atomic_px(pml4); + vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); + kunmap_atomic(vaddr); } /* Removes entries from a single pml4. * This is the top-level structure in 4-level page tables used on gen8+. * Empty entries are always scratch pml4e. */ -static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) +static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, + u64 start, u64 length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; - uint64_t pml4e; + unsigned int pml4e; - GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); + GEM_BUG_ON(!use_4lvl(vm)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (WARN_ON(!pml4->pdps[pml4e])) - break; + GEM_BUG_ON(pdp == vm->scratch_pdp); - if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { - __clear_bit(pml4e, pml4->used_pml4es); - gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e); - free_pdp(vm->i915, pdp); - } + if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length)) + continue; + + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + + free_pdp(vm, pdp); } } -static void gen8_ppgtt_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) -{ - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); +struct sgt_dma { + struct scatterlist *sg; + dma_addr_t dma, max; +}; - if (USES_FULL_48BIT_PPGTT(vm->i915)) - gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length); - else - gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); +struct gen8_insert_pte { + u16 pml4e; + u16 pdpe; + u16 pde; + u16 pte; +}; + +static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) +{ + return (struct gen8_insert_pte) { + gen8_pml4e_index(start), + gen8_pdpe_index(start), + gen8_pde_index(start), + gen8_pte_index(start), + }; } -static void -gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, +static __always_inline bool +gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, - struct sg_page_iter *sg_iter, - uint64_t start, + struct sgt_dma *iter, + struct gen8_insert_pte *idx, enum i915_cache_level cache_level) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - gen8_pte_t *pt_vaddr; - unsigned pdpe = gen8_pdpe_index(start); - unsigned pde = gen8_pde_index(start); - unsigned pte = gen8_pte_index(start); - - pt_vaddr = NULL; - - while (__sg_page_iter_next(sg_iter)) { - if (pt_vaddr == NULL) { - struct i915_page_directory *pd = pdp->page_directory[pdpe]; - struct i915_page_table *pt = pd->page_table[pde]; - pt_vaddr = kmap_px(pt); + struct i915_page_directory *pd; + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + gen8_pte_t *vaddr; + bool ret; + + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); + pd = pdp->page_directory[idx->pdpe]; + vaddr = kmap_atomic_px(pd->page_table[idx->pde]); + do { + vaddr[idx->pte] = pte_encode | iter->dma; + + iter->dma += PAGE_SIZE; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) { + ret = false; + break; + } + + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + iter->sg->length; } - pt_vaddr[pte] = - gen8_pte_encode(sg_page_iter_dma_address(sg_iter), - cache_level); - if (++pte == GEN8_PTES) { - kunmap_px(ppgtt, pt_vaddr); - pt_vaddr = NULL; - if (++pde == I915_PDES) { - if (++pdpe == I915_PDPES_PER_PDP(vm->i915)) + if (++idx->pte == GEN8_PTES) { + idx->pte = 0; + + if (++idx->pde == I915_PDES) { + idx->pde = 0; + + /* Limited by sg length for 3lvl */ + if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { + idx->pdpe = 0; + ret = true; break; - pde = 0; + } + + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); + pd = pdp->page_directory[idx->pdpe]; } - pte = 0; + + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(pd->page_table[idx->pde]); } - } + } while (1); + kunmap_atomic(vaddr); - if (pt_vaddr) - kunmap_px(ppgtt, pt_vaddr); + return ret; } -static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, - struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, - u32 unused) +static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, + struct sg_table *pages, + u64 start, + enum i915_cache_level cache_level, + u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sg_page_iter sg_iter; + struct sgt_dma iter = { + .sg = pages->sgl, + .dma = sg_dma_address(iter.sg), + .max = iter.dma + iter.sg->length, + }; + struct gen8_insert_pte idx = gen8_insert_pte(start); - __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); + gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, + cache_level); +} - if (!USES_FULL_48BIT_PPGTT(vm->i915)) { - gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, - cache_level); - } else { - struct i915_page_directory_pointer *pdp; - uint64_t pml4e; - uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; +static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, + struct sg_table *pages, + u64 start, + enum i915_cache_level cache_level, + u32 unused) +{ + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct sgt_dma iter = { + .sg = pages->sgl, + .dma = sg_dma_address(iter.sg), + .max = iter.dma + iter.sg->length, + }; + struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; + struct gen8_insert_pte idx = gen8_insert_pte(start); - gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { - gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, - start, cache_level); - } - } + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, + &idx, cache_level)) + GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); } -static void gen8_free_page_tables(struct drm_i915_private *dev_priv, +static void gen8_free_page_tables(struct i915_address_space *vm, struct i915_page_directory *pd) { int i; @@ -948,38 +949,34 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv, if (!px_page(pd)) return; - for_each_set_bit(i, pd->used_pdes, I915_PDES) { - if (WARN_ON(!pd->page_table[i])) - continue; - - free_pt(dev_priv, pd->page_table[i]); - pd->page_table[i] = NULL; + for (i = 0; i < I915_PDES; i++) { + if (pd->page_table[i] != vm->scratch_pt) + free_pt(vm, pd->page_table[i]); } } static int gen8_init_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; int ret; - ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); + ret = setup_scratch_page(vm, I915_GFP_DMA); if (ret) return ret; - vm->scratch_pt = alloc_pt(dev_priv); + vm->scratch_pt = alloc_pt(vm); if (IS_ERR(vm->scratch_pt)) { ret = PTR_ERR(vm->scratch_pt); goto free_scratch_page; } - vm->scratch_pd = alloc_pd(dev_priv); + vm->scratch_pd = alloc_pd(vm); if (IS_ERR(vm->scratch_pd)) { ret = PTR_ERR(vm->scratch_pd); goto free_pt; } - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - vm->scratch_pdp = alloc_pdp(dev_priv); + if (use_4lvl(vm)) { + vm->scratch_pdp = alloc_pdp(vm); if (IS_ERR(vm->scratch_pdp)) { ret = PTR_ERR(vm->scratch_pdp); goto free_pd; @@ -988,29 +985,30 @@ static int gen8_init_scratch(struct i915_address_space *vm) gen8_initialize_pt(vm, vm->scratch_pt); gen8_initialize_pd(vm, vm->scratch_pd); - if (USES_FULL_48BIT_PPGTT(dev_priv)) + if (use_4lvl(vm)) gen8_initialize_pdp(vm, vm->scratch_pdp); return 0; free_pd: - free_pd(dev_priv, vm->scratch_pd); + free_pd(vm, vm->scratch_pd); free_pt: - free_pt(dev_priv, vm->scratch_pt); + free_pt(vm, vm->scratch_pt); free_scratch_page: - cleanup_scratch_page(dev_priv, &vm->scratch_page); + cleanup_scratch_page(vm); return ret; } static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) { + struct i915_address_space *vm = &ppgtt->base; + struct drm_i915_private *dev_priv = vm->i915; enum vgt_g2v_type msg; - struct drm_i915_private *dev_priv = ppgtt->base.i915; int i; - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - u64 daddr = px_dma(&ppgtt->pml4); + if (use_4lvl(vm)) { + const u64 daddr = px_dma(&ppgtt->pml4); I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); @@ -1018,8 +1016,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); } else { - for (i = 0; i < GEN8_LEGACY_PDPES; i++) { - u64 daddr = i915_page_dir_dma_addr(ppgtt, i); + for (i = 0; i < GEN8_3LVL_PDPES; i++) { + const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); @@ -1036,44 +1034,42 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) static void gen8_free_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; - - if (USES_FULL_48BIT_PPGTT(dev_priv)) - free_pdp(dev_priv, vm->scratch_pdp); - free_pd(dev_priv, vm->scratch_pd); - free_pt(dev_priv, vm->scratch_pt); - cleanup_scratch_page(dev_priv, &vm->scratch_page); + if (use_4lvl(vm)) + free_pdp(vm, vm->scratch_pdp); + free_pd(vm, vm->scratch_pd); + free_pt(vm, vm->scratch_pt); + cleanup_scratch_page(vm); } -static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv, +static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { + const unsigned int pdpes = i915_pdpes_per_pdp(vm); int i; - for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) { - if (WARN_ON(!pdp->page_directory[i])) + for (i = 0; i < pdpes; i++) { + if (pdp->page_directory[i] == vm->scratch_pd) continue; - gen8_free_page_tables(dev_priv, pdp->page_directory[i]); - free_pd(dev_priv, pdp->page_directory[i]); + gen8_free_page_tables(vm, pdp->page_directory[i]); + free_pd(vm, pdp->page_directory[i]); } - free_pdp(dev_priv, pdp); + free_pdp(vm, pdp); } static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) { - struct drm_i915_private *dev_priv = ppgtt->base.i915; int i; - for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { - if (WARN_ON(!ppgtt->pml4.pdps[i])) + for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { + if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp) continue; - gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]); + gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]); } - cleanup_px(dev_priv, &ppgtt->pml4); + cleanup_px(&ppgtt->base, &ppgtt->pml4); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -1084,414 +1080,162 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) if (intel_vgpu_active(dev_priv)) gen8_ppgtt_notify_vgt(ppgtt, false); - if (!USES_FULL_48BIT_PPGTT(dev_priv)) - gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp); - else + if (use_4lvl(vm)) gen8_ppgtt_cleanup_4lvl(ppgtt); + else + gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp); gen8_free_scratch(vm); } -/** - * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. - * @vm: Master vm structure. - * @pd: Page directory for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pts: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page tables. Extremely similar to - * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by - * the page directory boundary (instead of the page directory pointer). That - * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is - * possible, and likely that the caller will need to use multiple calls of this - * function to achieve the appropriate allocation. - * - * Return: 0 if success; negative error code otherwise. - */ -static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, - struct i915_page_directory *pd, - uint64_t start, - uint64_t length, - unsigned long *new_pts) +static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, + struct i915_page_directory *pd, + u64 start, u64 length) { - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_table *pt; - uint32_t pde; + u64 from = start; + unsigned int pde; gen8_for_each_pde(pt, pd, start, length, pde) { - /* Don't reallocate page tables */ - if (test_bit(pde, pd->used_pdes)) { - /* Scratch is never allocated this way */ - WARN_ON(pt == vm->scratch_pt); - continue; - } - - pt = alloc_pt(dev_priv); - if (IS_ERR(pt)) - goto unwind_out; - - gen8_initialize_pt(vm, pt); - pd->page_table[pde] = pt; - __set_bit(pde, new_pts); - trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); - } - - return 0; - -unwind_out: - for_each_set_bit(pde, new_pts, I915_PDES) - free_pt(dev_priv, pd->page_table[pde]); - - return -ENOMEM; -} - -/** - * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. - * @vm: Master vm structure. - * @pdp: Page directory pointer for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pds: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page directories starting at the pde index of - * @start, and ending at the pde index @start + @length. This function will skip - * over already allocated page directories within the range, and only allocate - * new ones, setting the appropriate pointer within the pdp as well as the - * correct position in the bitmap @new_pds. - * - * The function will only allocate the pages within the range for a give page - * directory pointer. In other words, if @start + @length straddles a virtually - * addressed PDP boundary (512GB for 4k pages), there will be more allocations - * required by the caller, This is not currently possible, and the BUG in the - * code will prevent it. - * - * Return: 0 if success; negative error code otherwise. - */ -static int -gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length, - unsigned long *new_pds) -{ - struct drm_i915_private *dev_priv = vm->i915; - struct i915_page_directory *pd; - uint32_t pdpe; - uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); - - WARN_ON(!bitmap_empty(new_pds, pdpes)); - - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (test_bit(pdpe, pdp->used_pdpes)) - continue; - - pd = alloc_pd(dev_priv); - if (IS_ERR(pd)) - goto unwind_out; - - gen8_initialize_pd(vm, pd); - pdp->page_directory[pdpe] = pd; - __set_bit(pdpe, new_pds); - trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); - } + if (pt == vm->scratch_pt) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) + goto unwind; - return 0; - -unwind_out: - for_each_set_bit(pdpe, new_pds, pdpes) - free_pd(dev_priv, pdp->page_directory[pdpe]); - - return -ENOMEM; -} - -/** - * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. - * @vm: Master vm structure. - * @pml4: Page map level 4 for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pdps: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page directory pointers. Extremely similar to - * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). - * The main difference is here we are limited by the pml4 boundary (instead of - * the page directory pointer). - * - * Return: 0 if success; negative error code otherwise. - */ -static int -gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length, - unsigned long *new_pdps) -{ - struct drm_i915_private *dev_priv = vm->i915; - struct i915_page_directory_pointer *pdp; - uint32_t pml4e; - - WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); - - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (!test_bit(pml4e, pml4->used_pml4es)) { - pdp = alloc_pdp(dev_priv); - if (IS_ERR(pdp)) - goto unwind_out; + gen8_initialize_pt(vm, pt); - gen8_initialize_pdp(vm, pdp); - pml4->pdps[pml4e] = pdp; - __set_bit(pml4e, new_pdps); - trace_i915_page_directory_pointer_entry_alloc(vm, - pml4e, - start, - GEN8_PML4E_SHIFT); + gen8_ppgtt_set_pde(vm, pd, pt, pde); + pd->used_pdes++; + GEM_BUG_ON(pd->used_pdes > I915_PDES); } - } - - return 0; - -unwind_out: - for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - free_pdp(dev_priv, pml4->pdps[pml4e]); - - return -ENOMEM; -} - -static void -free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) -{ - kfree(new_pts); - kfree(new_pds); -} - -/* Fills in the page directory bitmap, and the array of page tables bitmap. Both - * of these are based on the number of PDPEs in the system. - */ -static -int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, - unsigned long **new_pts, - uint32_t pdpes) -{ - unsigned long *pds; - unsigned long *pts; - - pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); - if (!pds) - return -ENOMEM; - - pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), - GFP_TEMPORARY); - if (!pts) - goto err_out; - - *new_pds = pds; - *new_pts = pts; + pt->used_ptes += gen8_pte_count(start, length); + } return 0; -err_out: - free_gen8_temp_bitmaps(pds, pts); +unwind: + gen8_ppgtt_clear_pd(vm, pd, from, start - from); return -ENOMEM; } -static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length) +static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - unsigned long *new_page_dirs, *new_page_tables; - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_directory *pd; - const uint64_t orig_start = start; - const uint64_t orig_length = length; - uint32_t pdpe; - uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); + u64 from = start; + unsigned int pdpe; int ret; - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); - if (ret) - return ret; - - /* Do the allocations first so we can easily bail out */ - ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, - new_page_dirs); - if (ret) { - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); - return ret; - } - - /* For every page directory referenced, allocate page tables */ gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, - new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); - if (ret) - goto err_out; - } + if (pd == vm->scratch_pd) { + pd = alloc_pd(vm); + if (IS_ERR(pd)) + goto unwind; - start = orig_start; - length = orig_length; + gen8_initialize_pd(vm, pd); + gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + pdp->used_pdpes++; + GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm)); - /* Allocations have completed successfully, so set the bitmaps, and do - * the mappings. */ - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - gen8_pde_t *const page_directory = kmap_px(pd); - struct i915_page_table *pt; - uint64_t pd_len = length; - uint64_t pd_start = start; - uint32_t pde; - - /* Every pd should be allocated, we just did that above. */ - WARN_ON(!pd); - - gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { - /* Same reasoning as pd */ - WARN_ON(!pt); - WARN_ON(!pd_len); - WARN_ON(!gen8_pte_count(pd_start, pd_len)); - - /* Set our used ptes within the page table */ - bitmap_set(pt->used_ptes, - gen8_pte_index(pd_start), - gen8_pte_count(pd_start, pd_len)); - - /* Our pde is now pointing to the pagetable, pt */ - __set_bit(pde, pd->used_pdes); - - /* Map the PDE to the page table */ - page_directory[pde] = gen8_pde_encode(px_dma(pt), - I915_CACHE_LLC); - trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, - gen8_pte_index(start), - gen8_pte_count(start, length), - GEN8_PTES); - - /* NB: We haven't yet mapped ptes to pages. At this - * point we're still relying on insert_entries() */ + mark_tlbs_dirty(i915_vm_to_ppgtt(vm)); } - kunmap_px(ppgtt, page_directory); - __set_bit(pdpe, pdp->used_pdpes); - gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); + ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); + if (unlikely(ret)) + goto unwind_pd; } - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); - mark_tlbs_dirty(ppgtt); return 0; -err_out: - while (pdpe--) { - unsigned long temp; - - for_each_set_bit(temp, new_page_tables + pdpe * - BITS_TO_LONGS(I915_PDES), I915_PDES) - free_pt(dev_priv, - pdp->page_directory[pdpe]->page_table[temp]); +unwind_pd: + if (!pd->used_pdes) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + GEM_BUG_ON(!pdp->used_pdpes); + pdp->used_pdpes--; + free_pd(vm, pd); } +unwind: + gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); + return -ENOMEM; +} - for_each_set_bit(pdpe, new_page_dirs, pdpes) - free_pd(dev_priv, pdp->page_directory[pdpe]); - - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); - mark_tlbs_dirty(ppgtt); - return ret; +static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, + u64 start, u64 length) +{ + return gen8_ppgtt_alloc_pdp(vm, + &i915_vm_to_ppgtt(vm)->pdp, start, length); } -static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) +static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, + u64 start, u64 length) { - DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; - uint64_t pml4e; - int ret = 0; - - /* Do the pml4 allocations first, so we don't need to track the newly - * allocated tables below the pdp */ - bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); - - /* The pagedirectory and pagetable allocations are done in the shared 3 - * and 4 level code. Just allocate the pdps. - */ - ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, - new_pdps); - if (ret) - return ret; - - WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, - "The allocation has spanned more than 512GB. " - "It is highly likely this is incorrect."); + u64 from = start; + u32 pml4e; + int ret; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - WARN_ON(!pdp); + if (pml4->pdps[pml4e] == vm->scratch_pdp) { + pdp = alloc_pdp(vm); + if (IS_ERR(pdp)) + goto unwind; - ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); - if (ret) - goto err_out; + gen8_initialize_pdp(vm, pdp); + gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); + } - gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e); + ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); + if (unlikely(ret)) + goto unwind_pdp; } - bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, - GEN8_PML4ES_PER_PML4); - return 0; -err_out: - for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - gen8_ppgtt_cleanup_3lvl(vm->i915, pml4->pdps[pml4e]); - - return ret; -} - -static int gen8_alloc_va_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) -{ - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - - if (USES_FULL_48BIT_PPGTT(vm->i915)) - return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); - else - return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); +unwind_pdp: + if (!pdp->used_pdpes) { + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + free_pdp(vm, pdp); + } +unwind: + gen8_ppgtt_clear_4lvl(vm, from, start - from); + return -ENOMEM; } -static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, - uint64_t start, uint64_t length, +static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, + struct i915_page_directory_pointer *pdp, + u64 start, u64 length, gen8_pte_t scratch_pte, struct seq_file *m) { + struct i915_address_space *vm = &ppgtt->base; struct i915_page_directory *pd; - uint32_t pdpe; + u32 pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { struct i915_page_table *pt; - uint64_t pd_len = length; - uint64_t pd_start = start; - uint32_t pde; + u64 pd_len = length; + u64 pd_start = start; + u32 pde; - if (!test_bit(pdpe, pdp->used_pdpes)) + if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd) continue; seq_printf(m, "\tPDPE #%d\n", pdpe); gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { - uint32_t pte; + u32 pte; gen8_pte_t *pt_vaddr; - if (!test_bit(pde, pd->used_pdes)) + if (pd->page_table[pde] == ppgtt->base.scratch_pt) continue; - pt_vaddr = kmap_px(pt); + pt_vaddr = kmap_atomic_px(pt); for (pte = 0; pte < GEN8_PTES; pte += 4) { - uint64_t va = - (pdpe << GEN8_PDPE_SHIFT) | - (pde << GEN8_PDE_SHIFT) | - (pte << GEN8_PTE_SHIFT); + u64 va = (pdpe << GEN8_PDPE_SHIFT | + pde << GEN8_PDE_SHIFT | + pte << GEN8_PTE_SHIFT); int i; bool found = false; @@ -1510,9 +1254,6 @@ static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, } seq_puts(m, "\n"); } - /* don't use kunmap_px, it could trigger - * an unnecessary flush. - */ kunmap_atomic(pt_vaddr); } } @@ -1521,53 +1262,57 @@ static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) { struct i915_address_space *vm = &ppgtt->base; - uint64_t start = ppgtt->base.start; - uint64_t length = ppgtt->base.total; - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + u64 start = 0, length = ppgtt->base.total; - if (!USES_FULL_48BIT_PPGTT(vm->i915)) { - gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); - } else { - uint64_t pml4e; + if (use_4lvl(vm)) { + u64 pml4e; struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (!test_bit(pml4e, pml4->used_pml4es)) + if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp) continue; seq_printf(m, " PML4E #%llu\n", pml4e); - gen8_dump_pdp(pdp, start, length, scratch_pte, m); + gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m); } + } else { + gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); } } -static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) +static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) { - unsigned long *new_page_dirs, *new_page_tables; - uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev)); - int ret; + struct i915_address_space *vm = &ppgtt->base; + struct i915_page_directory_pointer *pdp = &ppgtt->pdp; + struct i915_page_directory *pd; + u64 start = 0, length = ppgtt->base.total; + u64 from = start; + unsigned int pdpe; - /* We allocate temp bitmap for page tables for no gain - * but as this is for init only, lets keep the things simple - */ - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); - if (ret) - return ret; + gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { + pd = alloc_pd(vm); + if (IS_ERR(pd)) + goto unwind; - /* Allocate for all pdps regardless of how the ppgtt - * was defined. - */ - ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, - 0, 1ULL << 32, - new_page_dirs); - if (!ret) - *ppgtt->pdp.used_pdpes = *new_page_dirs; + gen8_initialize_pd(vm, pd); + gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + pdp->used_pdpes++; + } - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + pdp->used_pdpes++; /* never remove */ + return 0; - return ret; +unwind: + start -= from; + gen8_for_each_pdpe(pd, pdp, from, start, pdpe) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + free_pd(vm, pd); + } + pdp->used_pdpes = 0; + return -ENOMEM; } /* @@ -1579,52 +1324,64 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) */ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) { - struct drm_i915_private *dev_priv = ppgtt->base.i915; + struct i915_address_space *vm = &ppgtt->base; + struct drm_i915_private *dev_priv = vm->i915; int ret; + ppgtt->base.total = USES_FULL_48BIT_PPGTT(dev_priv) ? + 1ULL << 48 : + 1ULL << 32; + ret = gen8_init_scratch(&ppgtt->base); - if (ret) + if (ret) { + ppgtt->base.total = 0; return ret; + } - ppgtt->base.start = 0; - ppgtt->base.cleanup = gen8_ppgtt_cleanup; - ppgtt->base.allocate_va_range = gen8_alloc_va_range; - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; - ppgtt->base.clear_range = gen8_ppgtt_clear_range; - ppgtt->base.unbind_vma = ppgtt_unbind_vma; - ppgtt->base.bind_vma = ppgtt_bind_vma; - ppgtt->debug_dump = gen8_dump_ppgtt; + /* There are only few exceptions for gen >=6. chv and bxt. + * And we are not sure about the latter so play safe for now. + */ + if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) + ppgtt->base.pt_kmap_wc = true; - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - ret = setup_px(dev_priv, &ppgtt->pml4); + if (use_4lvl(vm)) { + ret = setup_px(&ppgtt->base, &ppgtt->pml4); if (ret) goto free_scratch; gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); - ppgtt->base.total = 1ULL << 48; - ppgtt->switch_mm = gen8_48b_mm_switch; + ppgtt->switch_mm = gen8_mm_switch_4lvl; + ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; + ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; + ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; } else { - ret = __pdp_init(dev_priv, &ppgtt->pdp); + ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); if (ret) goto free_scratch; - ppgtt->base.total = 1ULL << 32; - ppgtt->switch_mm = gen8_legacy_mm_switch; - trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, - 0, 0, - GEN8_PML4E_SHIFT); - if (intel_vgpu_active(dev_priv)) { - ret = gen8_preallocate_top_level_pdps(ppgtt); - if (ret) + ret = gen8_preallocate_top_level_pdp(ppgtt); + if (ret) { + __pdp_fini(&ppgtt->pdp); goto free_scratch; + } } + + ppgtt->switch_mm = gen8_mm_switch_3lvl; + ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl; + ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; + ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl; } if (intel_vgpu_active(dev_priv)) gen8_ppgtt_notify_vgt(ppgtt, true); + ppgtt->base.cleanup = gen8_ppgtt_cleanup; + ppgtt->base.unbind_vma = ppgtt_unbind_vma; + ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->debug_dump = gen8_dump_ppgtt; + return 0; free_scratch: @@ -1637,9 +1394,8 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) struct i915_address_space *vm = &ppgtt->base; struct i915_page_table *unused; gen6_pte_t scratch_pte; - uint32_t pd_entry; - uint32_t pte, pde; - uint32_t start = ppgtt->base.start, length = ppgtt->base.total; + u32 pd_entry, pte, pde; + u32 start = 0, length = ppgtt->base.total; scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); @@ -1658,7 +1414,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) expected); seq_printf(m, "\tPDE: %x\n", pd_entry); - pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); + pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]); for (pte = 0; pte < GEN6_PTES; pte+=4) { unsigned long va = @@ -1681,73 +1437,59 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } seq_puts(m, "\n"); } - kunmap_px(ppgtt, pt_vaddr); + kunmap_atomic(pt_vaddr); } } /* Write pde (index) from the page directory @pd to the page table @pt */ -static void gen6_write_pde(struct i915_page_directory *pd, - const int pde, struct i915_page_table *pt) +static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt, + const unsigned int pde, + const struct i915_page_table *pt) { /* Caller needs to make sure the write completes if necessary */ - struct i915_hw_ppgtt *ppgtt = - container_of(pd, struct i915_hw_ppgtt, pd); - u32 pd_entry; - - pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); - pd_entry |= GEN6_PDE_VALID; - - writel(pd_entry, ppgtt->pd_addr + pde); + writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, + ppgtt->pd_addr + pde); } /* Write all the page tables found in the ppgtt structure to incrementing page * directories. */ -static void gen6_write_page_range(struct drm_i915_private *dev_priv, - struct i915_page_directory *pd, - uint32_t start, uint32_t length) +static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, + u32 start, u32 length) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_page_table *pt; - uint32_t pde; + unsigned int pde; - gen6_for_each_pde(pt, pd, start, length, pde) - gen6_write_pde(pd, pde, pt); + gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) + gen6_write_pde(ppgtt, pde, pt); - /* Make sure write is complete before other code can use this page - * table. Also require for WC mapped PTEs */ - readl(ggtt->gsm); + mark_tlbs_dirty(ppgtt); + wmb(); } -static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) +static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) { - BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); - - return (ppgtt->pd.base.ggtt_offset / 64) << 16; + GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); + return ppgtt->pd.base.ggtt_offset << 10; } static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - int ret; + u32 *cs; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); - if (ret) - return ret; - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(ring, PP_DIR_DCLV_2G); - intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); - intel_ring_emit(ring, get_pd_offset(ppgtt)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = get_pd_offset(ppgtt); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1755,33 +1497,21 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - int ret; + u32 *cs; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); - if (ret) - return ret; - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(ring, PP_DIR_DCLV_2G); - intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); - intel_ring_emit(ring, get_pd_offset(ppgtt)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - - /* XXX: RCS is the only one to auto invalidate the TLBs? */ - if (engine->id != RCS) { - ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); - if (ret) - return ret; - } + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = get_pd_offset(ppgtt); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1813,7 +1543,7 @@ static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - uint32_t ecochk, ecobits; + u32 ecochk, ecobits; enum intel_engine_id id; ecobits = I915_READ(GAC_ECO_BITS); @@ -1837,7 +1567,7 @@ static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) { - uint32_t ecochk, gab_ctl, ecobits; + u32 ecochk, gab_ctl, ecobits; ecobits = I915_READ(GAC_ECO_BITS); I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | @@ -1854,168 +1584,124 @@ static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) /* PPGTT support for Sandybdrige/Gen6 and later */ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - gen6_pte_t *pt_vaddr, scratch_pte; - unsigned first_entry = start >> PAGE_SHIFT; - unsigned num_entries = length >> PAGE_SHIFT; - unsigned act_pt = first_entry / GEN6_PTES; - unsigned first_pte = first_entry % GEN6_PTES; - unsigned last_pte, i; - - scratch_pte = vm->pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC, 0); + unsigned int first_entry = start >> PAGE_SHIFT; + unsigned int pde = first_entry / GEN6_PTES; + unsigned int pte = first_entry % GEN6_PTES; + unsigned int num_entries = length >> PAGE_SHIFT; + gen6_pte_t scratch_pte = + vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); while (num_entries) { - last_pte = first_pte + num_entries; - if (last_pte > GEN6_PTES) - last_pte = GEN6_PTES; + struct i915_page_table *pt = ppgtt->pd.page_table[pde++]; + unsigned int end = min(pte + num_entries, GEN6_PTES); + gen6_pte_t *vaddr; - pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + num_entries -= end - pte; - for (i = first_pte; i < last_pte; i++) - pt_vaddr[i] = scratch_pte; + /* Note that the hw doesn't support removing PDE on the fly + * (they are cached inside the context with no means to + * invalidate the cache), so we can only reset the PTE + * entries back to scratch. + */ - kunmap_px(ppgtt, pt_vaddr); + vaddr = kmap_atomic_px(pt); + do { + vaddr[pte++] = scratch_pte; + } while (pte < end); + kunmap_atomic(vaddr); - num_entries -= last_pte - first_pte; - first_pte = 0; - act_pt++; + pte = 0; } } static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, u32 flags) + u64 start, + enum i915_cache_level cache_level, + u32 flags) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned first_entry = start >> PAGE_SHIFT; unsigned act_pt = first_entry / GEN6_PTES; unsigned act_pte = first_entry % GEN6_PTES; - gen6_pte_t *pt_vaddr = NULL; - struct sgt_iter sgt_iter; - dma_addr_t addr; + const u32 pte_encode = vm->pte_encode(0, cache_level, flags); + struct sgt_dma iter; + gen6_pte_t *vaddr; + + vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); + iter.sg = pages->sgl; + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + do { + vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); - for_each_sgt_dma(addr, sgt_iter, pages) { - if (pt_vaddr == NULL) - pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + iter.dma += PAGE_SIZE; + if (iter.dma == iter.max) { + iter.sg = __sg_next(iter.sg); + if (!iter.sg) + break; - pt_vaddr[act_pte] = - vm->pte_encode(addr, cache_level, flags); + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + } if (++act_pte == GEN6_PTES) { - kunmap_px(ppgtt, pt_vaddr); - pt_vaddr = NULL; - act_pt++; + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]); act_pte = 0; } - } - - if (pt_vaddr) - kunmap_px(ppgtt, pt_vaddr); + } while (1); + kunmap_atomic(vaddr); } static int gen6_alloc_va_range(struct i915_address_space *vm, - uint64_t start_in, uint64_t length_in) + u64 start, u64 length) { - DECLARE_BITMAP(new_page_tables, I915_PDES); - struct drm_i915_private *dev_priv = vm->i915; - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_table *pt; - uint32_t start, length, start_save, length_save; - uint32_t pde; - int ret; + u64 from = start; + unsigned int pde; + bool flush = false; - start = start_save = start_in; - length = length_save = length_in; - - bitmap_zero(new_page_tables, I915_PDES); - - /* The allocation is done in two stages so that we can bail out with - * minimal amount of pain. The first stage finds new page tables that - * need allocation. The second stage marks use ptes within the page - * tables. - */ gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { - if (pt != vm->scratch_pt) { - WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); - continue; - } - - /* We've already allocated a page table */ - WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); + if (pt == vm->scratch_pt) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) + goto unwind_out; - pt = alloc_pt(dev_priv); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind_out; + gen6_initialize_pt(vm, pt); + ppgtt->pd.page_table[pde] = pt; + gen6_write_pde(ppgtt, pde, pt); + flush = true; } - - gen6_initialize_pt(vm, pt); - - ppgtt->pd.page_table[pde] = pt; - __set_bit(pde, new_page_tables); - trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); } - start = start_save; - length = length_save; - - gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { - DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); - - bitmap_zero(tmp_bitmap, GEN6_PTES); - bitmap_set(tmp_bitmap, gen6_pte_index(start), - gen6_pte_count(start, length)); - - if (__test_and_clear_bit(pde, new_page_tables)) - gen6_write_pde(&ppgtt->pd, pde, pt); - - trace_i915_page_table_entry_map(vm, pde, pt, - gen6_pte_index(start), - gen6_pte_count(start, length), - GEN6_PTES); - bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, - GEN6_PTES); + if (flush) { + mark_tlbs_dirty(ppgtt); + wmb(); } - WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); - - /* Make sure write is complete before other code can use this page - * table. Also require for WC mapped PTEs */ - readl(ggtt->gsm); - - mark_tlbs_dirty(ppgtt); return 0; unwind_out: - for_each_set_bit(pde, new_page_tables, I915_PDES) { - struct i915_page_table *pt = ppgtt->pd.page_table[pde]; - - ppgtt->pd.page_table[pde] = vm->scratch_pt; - free_pt(dev_priv, pt); - } - - mark_tlbs_dirty(ppgtt); - return ret; + gen6_ppgtt_clear_range(vm, from, start); + return -ENOMEM; } static int gen6_init_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; int ret; - ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); + ret = setup_scratch_page(vm, I915_GFP_DMA); if (ret) return ret; - vm->scratch_pt = alloc_pt(dev_priv); + vm->scratch_pt = alloc_pt(vm); if (IS_ERR(vm->scratch_pt)) { - cleanup_scratch_page(dev_priv, &vm->scratch_page); + cleanup_scratch_page(vm); return PTR_ERR(vm->scratch_pt); } @@ -2026,25 +1712,22 @@ static int gen6_init_scratch(struct i915_address_space *vm) static void gen6_free_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; - - free_pt(dev_priv, vm->scratch_pt); - cleanup_scratch_page(dev_priv, &vm->scratch_page); + free_pt(vm, vm->scratch_pt); + cleanup_scratch_page(vm); } static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd = &ppgtt->pd; - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_table *pt; - uint32_t pde; + u32 pde; drm_mm_remove_node(&ppgtt->node); gen6_for_all_pdes(pt, pd, pde) if (pt != vm->scratch_pt) - free_pt(dev_priv, pt); + free_pt(vm, pt); gen6_free_scratch(vm); } @@ -2077,6 +1760,12 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) if (ppgtt->node.start < ggtt->mappable_end) DRM_DEBUG("Forced to use aperture for PDEs\n"); + ppgtt->pd.base.ggtt_offset = + ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); + + ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + + ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); + return 0; err_out: @@ -2090,10 +1779,10 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) } static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, - uint64_t start, uint64_t length) + u64 start, u64 length) { struct i915_page_table *unused; - uint32_t pde; + u32 pde; gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; @@ -2119,32 +1808,30 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ppgtt->base.allocate_va_range = gen6_alloc_va_range; + ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; + + gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); + gen6_write_page_range(ppgtt, 0, ppgtt->base.total); + + ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total); + if (ret) { + gen6_ppgtt_cleanup(&ppgtt->base); + return ret; + } + ppgtt->base.clear_range = gen6_ppgtt_clear_range; ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; ppgtt->base.cleanup = gen6_ppgtt_cleanup; - ppgtt->base.start = 0; - ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; ppgtt->debug_dump = gen6_dump_ppgtt; - ppgtt->pd.base.ggtt_offset = - ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); - - ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + - ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); - - gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); - - gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); - DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); - DRM_DEBUG("Adding PPGTT at offset %x\n", - ppgtt->pd.base.ggtt_offset << 10); + DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n", + ppgtt->pd.base.ggtt_offset << 10); return 0; } @@ -2153,6 +1840,7 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, struct drm_i915_private *dev_priv) { ppgtt->base.i915 = dev_priv; + ppgtt->base.dma = &dev_priv->drm.pdev->dev; if (INTEL_INFO(dev_priv)->gen < 8) return gen6_ppgtt_init(ppgtt); @@ -2165,15 +1853,23 @@ static void i915_address_space_init(struct i915_address_space *vm, const char *name) { i915_gem_timeline_init(dev_priv, &vm->timeline, name); - drm_mm_init(&vm->mm, vm->start, vm->total); + + drm_mm_init(&vm->mm, 0, vm->total); + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; + INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); INIT_LIST_HEAD(&vm->unbound_list); + list_add_tail(&vm->global_link, &dev_priv->vm_list); + pagevec_init(&vm->free_pages, false); } static void i915_address_space_fini(struct i915_address_space *vm) { + if (pagevec_count(&vm->free_pages)) + vm_free_pages_release(vm); + i915_gem_timeline_fini(&vm->timeline); drm_mm_takedown(&vm->mm); list_del(&vm->global_link); @@ -2185,34 +1881,17 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_SKYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); - else if (IS_BROXTON(dev_priv)) + else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); } -static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_private *dev_priv, - struct drm_i915_file_private *file_priv, - const char *name) -{ - int ret; - - ret = __hw_ppgtt_init(ppgtt, dev_priv); - if (ret == 0) { - kref_init(&ppgtt->ref); - i915_address_space_init(&ppgtt->base, dev_priv, name); - ppgtt->base.file = file_priv; - } - - return ret; -} - int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) { gtt_write_workarounds(dev_priv); @@ -2250,12 +1929,16 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name); + ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret) { kfree(ppgtt); return ERR_PTR(ret); } + kref_init(&ppgtt->ref); + i915_address_space_init(&ppgtt->base, dev_priv, name); + ppgtt->base.file = fpriv; + trace_i915_ppgtt_create(&ppgtt->base); return ppgtt; @@ -2294,9 +1977,8 @@ void i915_ppgtt_release(struct kref *kref) WARN_ON(!list_empty(&ppgtt->base.inactive_list)); WARN_ON(!list_empty(&ppgtt->base.unbound_list)); - i915_address_space_fini(&ppgtt->base); - ppgtt->base.cleanup(&ppgtt->base); + i915_address_space_fini(&ppgtt->base); kfree(ppgtt); } @@ -2358,7 +2040,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); - ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); + ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); i915_ggtt_invalidate(dev_priv); } @@ -2395,7 +2077,7 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void gen8_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level level, u32 unused) { @@ -2410,32 +2092,22 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level level, u32 unused) + u64 start, + enum i915_cache_level level, + u32 unused) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; gen8_pte_t __iomem *gtt_entries; - gen8_pte_t gtt_entry; + const gen8_pte_t pte_encode = gen8_pte_encode(0, level); dma_addr_t addr; - int i = 0; - gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); + gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; + gtt_entries += start >> PAGE_SHIFT; + for_each_sgt_dma(addr, sgt_iter, st) + gen8_set_pte(gtt_entries++, pte_encode | addr); - for_each_sgt_dma(addr, sgt_iter, st) { - gtt_entry = gen8_pte_encode(addr, level); - gen8_set_pte(>t_entries[i++], gtt_entry); - } - - /* - * XXX: This serves as a posting read to make sure that the PTE has - * actually been updated. There is some concern that even though - * registers and PTEs are within the same BAR that they are potentially - * of NUMA access patterns. Therefore, even with the way we assume - * hardware should work, we must keep this posting read for paranoia. - */ - if (i != 0) - WARN_ON(readq(>t_entries[i-1]) != gtt_entry); + wmb(); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates @@ -2444,35 +2116,9 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, ggtt->invalidate(vm->i915); } -struct insert_entries { - struct i915_address_space *vm; - struct sg_table *st; - uint64_t start; - enum i915_cache_level level; - u32 flags; -}; - -static int gen8_ggtt_insert_entries__cb(void *_arg) -{ - struct insert_entries *arg = _arg; - gen8_ggtt_insert_entries(arg->vm, arg->st, - arg->start, arg->level, arg->flags); - return 0; -} - -static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, - struct sg_table *st, - uint64_t start, - enum i915_cache_level level, - u32 flags) -{ - struct insert_entries arg = { vm, st, start, level, flags }; - stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); -} - static void gen6_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level level, u32 flags) { @@ -2493,31 +2139,18 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm, */ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level level, u32 flags) + u64 start, + enum i915_cache_level level, + u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - struct sgt_iter sgt_iter; - gen6_pte_t __iomem *gtt_entries; - gen6_pte_t gtt_entry; + gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; + unsigned int i = start >> PAGE_SHIFT; + struct sgt_iter iter; dma_addr_t addr; - int i = 0; - - gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); - - for_each_sgt_dma(addr, sgt_iter, st) { - gtt_entry = vm->pte_encode(addr, level, flags); - iowrite32(gtt_entry, >t_entries[i++]); - } - - /* XXX: This serves as a posting read to make sure that the PTE has - * actually been updated. There is some concern that even though - * registers and PTEs are within the same BAR that they are potentially - * of NUMA access patterns. Therefore, even with the way we assume - * hardware should work, we must keep this posting read for paranoia. - */ - if (i != 0) - WARN_ON(readl(>t_entries[i-1]) != gtt_entry); + for_each_sgt_dma(addr, iter, st) + iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); + wmb(); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates @@ -2527,17 +2160,19 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, } static void nop_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) + u64 start, u64 length) { } static void gen8_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) + u64 start, u64 length) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; unsigned num_entries = length >> PAGE_SHIFT; - gen8_pte_t scratch_pte, __iomem *gtt_base = + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_t __iomem *gtt_base = (gen8_pte_t __iomem *)ggtt->gsm + first_entry; const int max_entries = ggtt_total_entries(ggtt) - first_entry; int i; @@ -2547,16 +2182,12 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); for (i = 0; i < num_entries; i++) gen8_set_pte(>t_base[i], scratch_pte); - readl(gtt_base); } static void gen6_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; @@ -2576,12 +2207,11 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); - readl(gtt_base); } static void i915_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level cache_level, u32 unused) { @@ -2593,19 +2223,18 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm, static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, u32 unused) + u64 start, + enum i915_cache_level cache_level, + u32 unused) { unsigned int flags = (cache_level == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); - } static void i915_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); } @@ -2616,14 +2245,16 @@ static int ggtt_bind_vma(struct i915_vma *vma, { struct drm_i915_private *i915 = vma->vm->i915; struct drm_i915_gem_object *obj = vma->obj; - u32 pte_flags = 0; - int ret; + u32 pte_flags; - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; + if (unlikely(!vma->pages)) { + int ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + } /* Currently applicable only to VLV */ + pte_flags = 0; if (obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -2642,6 +2273,15 @@ static int ggtt_bind_vma(struct i915_vma *vma, return 0; } +static void ggtt_unbind_vma(struct i915_vma *vma) +{ + struct drm_i915_private *i915 = vma->vm->i915; + + intel_runtime_pm_get(i915); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); + intel_runtime_pm_put(i915); +} + static int aliasing_gtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) @@ -2650,15 +2290,32 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, u32 pte_flags; int ret; - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; + if (unlikely(!vma->pages)) { + ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + } /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; + if (flags & I915_VMA_LOCAL_BIND) { + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; + + if (appgtt->base.allocate_va_range) { + ret = appgtt->base.allocate_va_range(&appgtt->base, + vma->node.start, + vma->node.size); + if (ret) + goto err_pages; + } + + appgtt->base.insert_entries(&appgtt->base, + vma->pages, vma->node.start, + cache_level, pte_flags); + } if (flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); @@ -2668,32 +2325,35 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, intel_runtime_pm_put(i915); } - if (flags & I915_VMA_LOCAL_BIND) { - struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - appgtt->base.insert_entries(&appgtt->base, - vma->pages, vma->node.start, - cache_level, pte_flags); - } - return 0; + +err_pages: + if (!(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND))) { + if (vma->pages != vma->obj->mm.pages) { + GEM_BUG_ON(!vma->pages); + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + } + return ret; } -static void ggtt_unbind_vma(struct i915_vma *vma) +static void aliasing_gtt_unbind_vma(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - const u64 size = min(vma->size, vma->node.size); if (vma->flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); - vma->vm->clear_range(vma->vm, - vma->node.start, size); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); intel_runtime_pm_put(i915); } - if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) - appgtt->base.clear_range(&appgtt->base, - vma->node.start, size); + if (vma->flags & I915_VMA_LOCAL_BIND) { + struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base; + + vm->clear_range(vm, vma->node.start, vma->size); + } } void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, @@ -2719,14 +2379,76 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node, u64 *start, u64 *end) { - if (node->color != color) + if (node->allocated && node->color != color) *start += I915_GTT_PAGE_SIZE; + /* Also leave a space between the unallocated reserved node after the + * GTT and any objects within the GTT, i.e. we use the color adjustment + * to insert a guard page to prevent prefetches crossing over the + * GTT boundary. + */ node = list_next_entry(node, node_list); - if (node->allocated && node->color != color) + if (node->color != color) *end -= I915_GTT_PAGE_SIZE; } +int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_hw_ppgtt *ppgtt; + int err; + + ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]"); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); + + if (WARN_ON(ppgtt->base.total < ggtt->base.total)) { + err = -ENODEV; + goto err_ppgtt; + } + + if (ppgtt->base.allocate_va_range) { + /* Note we only pre-allocate as far as the end of the global + * GTT. On 48b / 4-level page-tables, the difference is very, + * very significant! We have to preallocate as GVT/vgpu does + * not like the page directory disappearing. + */ + err = ppgtt->base.allocate_va_range(&ppgtt->base, + 0, ggtt->base.total); + if (err) + goto err_ppgtt; + } + + i915->mm.aliasing_ppgtt = ppgtt; + + WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); + ggtt->base.bind_vma = aliasing_gtt_bind_vma; + + WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma); + ggtt->base.unbind_vma = aliasing_gtt_unbind_vma; + + return 0; + +err_ppgtt: + i915_ppgtt_put(ppgtt); + return err; +} + +void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_hw_ppgtt *ppgtt; + + ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt); + if (!ppgtt) + return; + + i915_ppgtt_put(ppgtt); + + ggtt->base.bind_vma = ggtt_bind_vma; + ggtt->base.unbind_vma = ggtt_unbind_vma; +} + int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) { /* Let GEM Manage all of the aperture. @@ -2740,7 +2462,6 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) */ struct i915_ggtt *ggtt = &dev_priv->ggtt; unsigned long hole_start, hole_end; - struct i915_hw_ppgtt *ppgtt; struct drm_mm_node *entry; int ret; @@ -2769,38 +2490,13 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) ggtt->base.total - PAGE_SIZE, PAGE_SIZE); if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) { - ret = -ENOMEM; - goto err; - } - - ret = __hw_ppgtt_init(ppgtt, dev_priv); + ret = i915_gem_init_aliasing_ppgtt(dev_priv); if (ret) - goto err_ppgtt; - - if (ppgtt->base.allocate_va_range) { - ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, - ppgtt->base.total); - if (ret) - goto err_ppgtt_cleanup; - } - - ppgtt->base.clear_range(&ppgtt->base, - ppgtt->base.start, - ppgtt->base.total); - - dev_priv->mm.aliasing_ppgtt = ppgtt; - WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); - ggtt->base.bind_vma = aliasing_gtt_bind_vma; + goto err; } return 0; -err_ppgtt_cleanup: - ppgtt->base.cleanup(&ppgtt->base); -err_ppgtt: - kfree(ppgtt); err: drm_mm_remove_node(&ggtt->error_capture); return ret; @@ -2813,27 +2509,31 @@ err: void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_vma *vma, *vn; - if (dev_priv->mm.aliasing_ppgtt) { - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - ppgtt->base.cleanup(&ppgtt->base); - kfree(ppgtt); - } + ggtt->base.closed = true; + + mutex_lock(&dev_priv->drm.struct_mutex); + WARN_ON(!list_empty(&ggtt->base.active_list)); + list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link) + WARN_ON(i915_vma_unbind(vma)); + mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_cleanup_stolen(&dev_priv->drm); + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_fini_aliasing_ppgtt(dev_priv); + if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); if (drm_mm_initialized(&ggtt->base.mm)) { intel_vgt_deballoon(dev_priv); - - mutex_lock(&dev_priv->drm.struct_mutex); i915_address_space_fini(&ggtt->base); - mutex_unlock(&dev_priv->drm.struct_mutex); } ggtt->base.cleanup(&ggtt->base); + mutex_unlock(&dev_priv->drm.struct_mutex); arch_phys_wc_del(ggtt->mtrr); io_mapping_fini(&ggtt->mappable); @@ -2943,7 +2643,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return -ENOMEM; } - ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32); + ret = setup_scratch_page(&ggtt->base, GFP_DMA32); if (ret) { DRM_ERROR("Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ @@ -2959,7 +2659,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) * writing this data shouldn't be harmful even in those cases. */ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) { - uint64_t pat; + u64 pat; pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ @@ -2994,7 +2694,7 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) { - uint64_t pat; + u64 pat; /* * Map WB on BDW to snooped on CHV. @@ -3032,7 +2732,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm) struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); iounmap(ggtt->gsm); - cleanup_scratch_page(vm->i915, &vm->scratch_page); + cleanup_scratch_page(vm); } static int gen8_gmch_probe(struct i915_ggtt *ggtt) @@ -3078,8 +2778,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.clear_range = gen8_ggtt_clear_range; ggtt->base.insert_entries = gen8_ggtt_insert_entries; - if (IS_CHERRYVIEW(dev_priv)) - ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; ggtt->invalidate = gen6_ggtt_invalidate; @@ -3183,6 +2881,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) int ret; ggtt->base.i915 = dev_priv; + ggtt->base.dma = &dev_priv->drm.pdev->dev; if (INTEL_GEN(dev_priv) <= 5) ret = i915_gmch_probe(ggtt); @@ -3242,14 +2941,14 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->vm_list); - /* Subtract the guard page before address space initialization to - * shrink the range used by drm_mm. + /* Note that we use page colouring to enforce a guard page at the + * end of the address space. This is required as the CS may prefetch + * beyond the end of the batch buffer, across the page boundary, + * and beyond the end of the GTT if we do not provide a guard. */ mutex_lock(&dev_priv->drm.struct_mutex); - ggtt->base.total -= PAGE_SIZE; i915_address_space_init(&ggtt->base, dev_priv, "[global]"); - ggtt->base.total += PAGE_SIZE; - if (!HAS_LLC(dev_priv)) + if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); @@ -3303,7 +3002,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); /* First fill our portion of the GTT with scratch pages */ - ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); + ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ @@ -3344,8 +3043,6 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) struct i915_address_space *vm; list_for_each_entry(vm, &dev_priv->vm_list, global_link) { - /* TODO: Perhaps it shouldn't be gen6 specific */ - struct i915_hw_ppgtt *ppgtt; if (i915_is_ggtt(vm)) @@ -3353,8 +3050,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) else ppgtt = i915_vm_to_ppgtt(vm); - gen6_write_page_range(dev_priv, &ppgtt->pd, - 0, ppgtt->base.total); + gen6_write_page_range(ppgtt, 0, ppgtt->base.total); } } @@ -3389,11 +3085,11 @@ rotate_pages(const dma_addr_t *in, unsigned int offset, return sg; } -static struct sg_table * -intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, - struct drm_i915_gem_object *obj) +static noinline struct sg_table * +intel_rotate_pages(struct intel_rotation_info *rot_info, + struct drm_i915_gem_object *obj) { - const size_t n_pages = obj->base.size / PAGE_SIZE; + const unsigned long n_pages = obj->base.size / PAGE_SIZE; unsigned int size = intel_rotation_info_size(rot_info); struct sgt_iter sgt_iter; dma_addr_t dma_addr; @@ -3452,7 +3148,7 @@ err_st_alloc: return ERR_PTR(ret); } -static struct sg_table * +static noinline struct sg_table * intel_partial_pages(const struct i915_ggtt_view *view, struct drm_i915_gem_object *obj) { @@ -3506,7 +3202,7 @@ err_st_alloc: static int i915_get_ggtt_vma_pages(struct i915_vma *vma) { - int ret = 0; + int ret; /* The vma->pages are only valid within the lifespan of the borrowed * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so @@ -3515,32 +3211,33 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) */ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); - if (vma->pages) + switch (vma->ggtt_view.type) { + case I915_GGTT_VIEW_NORMAL: + vma->pages = vma->obj->mm.pages; return 0; - if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) - vma->pages = vma->obj->mm.pages; - else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) + case I915_GGTT_VIEW_ROTATED: vma->pages = - intel_rotate_fb_obj_pages(&vma->ggtt_view.rotated, - vma->obj); - else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) + intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); + break; + + case I915_GGTT_VIEW_PARTIAL: vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); - else + break; + + default: WARN_ONCE(1, "GGTT view %u not implemented!\n", vma->ggtt_view.type); + return -EINVAL; + } - if (!vma->pages) { - DRM_ERROR("Failed to get pages for GGTT view type %u!\n", - vma->ggtt_view.type); - ret = -EINVAL; - } else if (IS_ERR(vma->pages)) { + ret = 0; + if (unlikely(IS_ERR(vma->pages))) { ret = PTR_ERR(vma->pages); vma->pages = NULL; DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", vma->ggtt_view.type, ret); } - return ret; } @@ -3743,3 +3440,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, size, alignment, color, start, end, DRM_MM_INSERT_EVICT); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_gtt.c" +#include "selftests/i915_gem_gtt.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 3c5ef5358cef..fb15684c1d83 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -36,9 +36,11 @@ #include <linux/io-mapping.h> #include <linux/mm.h> +#include <linux/pagevec.h> #include "i915_gem_timeline.h" #include "i915_gem_request.h" +#include "i915_selftest.h" #define I915_GTT_PAGE_SIZE 4096UL #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE @@ -51,11 +53,11 @@ struct drm_i915_file_private; struct drm_i915_fence_reg; -typedef uint32_t gen6_pte_t; -typedef uint64_t gen8_pte_t; -typedef uint64_t gen8_pde_t; -typedef uint64_t gen8_ppgtt_pdpe_t; -typedef uint64_t gen8_ppgtt_pml4e_t; +typedef u32 gen6_pte_t; +typedef u64 gen8_pte_t; +typedef u64 gen8_pde_t; +typedef u64 gen8_ppgtt_pdpe_t; +typedef u64 gen8_ppgtt_pml4e_t; #define ggtt_total_entries(ggtt) ((ggtt)->base.total >> PAGE_SHIFT) @@ -67,7 +69,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN6_PTE_UNCACHED (1 << 1) #define GEN6_PTE_VALID (1 << 0) -#define I915_PTES(pte_len) (PAGE_SIZE / (pte_len)) +#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len))) #define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1) #define I915_PDES 512 #define I915_PDE_MASK (I915_PDES - 1) @@ -99,13 +101,20 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) -/* GEN8 legacy style address is defined as a 3 level page table: +/* GEN8 32b style address is defined as a 3 level page table: * 31:30 | 29:21 | 20:12 | 11:0 * PDPE | PDE | PTE | offset * The difference as compared to normal x86 3 level page table is the PDPEs are * programmed via register. - * - * GEN8 48b legacy style address is defined as a 4 level page table: + */ +#define GEN8_3LVL_PDPES 4 +#define GEN8_PDE_SHIFT 21 +#define GEN8_PDE_MASK 0x1ff +#define GEN8_PTE_SHIFT 12 +#define GEN8_PTE_MASK 0x1ff +#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) + +/* GEN8 48b style address is defined as a 4 level page table: * 47:39 | 38:30 | 29:21 | 20:12 | 11:0 * PML4E | PDPE | PDE | PTE | offset */ @@ -116,15 +125,6 @@ typedef uint64_t gen8_ppgtt_pml4e_t; /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page * tables */ #define GEN8_PDPE_MASK 0x1ff -#define GEN8_PDE_SHIFT 21 -#define GEN8_PDE_MASK 0x1ff -#define GEN8_PTE_SHIFT 12 -#define GEN8_PTE_MASK 0x1ff -#define GEN8_LEGACY_PDPES 4 -#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) - -#define I915_PDPES_PER_PDP(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\ - GEN8_PML4ES_PER_PML4 : GEN8_LEGACY_PDPES) #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ @@ -141,7 +141,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN8_PPAT_WC (1<<0) #define GEN8_PPAT_UC (0<<0) #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) -#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) +#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) struct sg_table; @@ -208,7 +208,7 @@ struct i915_page_dma { /* For gen6/gen7 only. This is the offset in the GGTT * where the page directory entries for PPGTT begin */ - uint32_t ggtt_offset; + u32 ggtt_offset; }; }; @@ -218,28 +218,24 @@ struct i915_page_dma { struct i915_page_table { struct i915_page_dma base; - - unsigned long *used_ptes; + unsigned int used_ptes; }; struct i915_page_directory { struct i915_page_dma base; - unsigned long *used_pdes; struct i915_page_table *page_table[I915_PDES]; /* PDEs */ + unsigned int used_pdes; }; struct i915_page_directory_pointer { struct i915_page_dma base; - - unsigned long *used_pdpes; struct i915_page_directory **page_directory; + unsigned int used_pdpes; }; struct i915_pml4 { struct i915_page_dma base; - - DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4); struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; }; @@ -247,6 +243,7 @@ struct i915_address_space { struct drm_mm mm; struct i915_gem_timeline timeline; struct drm_i915_private *i915; + struct device *dma; /* Every address space belongs to a struct file - except for the global * GTT that is owned by the driver (and so @file is set to NULL). In * principle, no information should leak from one context to another @@ -257,7 +254,6 @@ struct i915_address_space { */ struct drm_i915_file_private *file; struct list_head global_link; - u64 start; /* Start offset always 0 for dri2 */ u64 total; /* size addr space maps (ex. 2GB for ggtt) */ bool closed; @@ -297,6 +293,9 @@ struct i915_address_space { */ struct list_head unbound_list; + struct pagevec free_pages; + bool pt_kmap_wc; + /* FIXME: Need a more generic return type */ gen6_pte_t (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, @@ -304,20 +303,19 @@ struct i915_address_space { /* flags for pte_encode */ #define PTE_READ_ONLY (1<<0) int (*allocate_va_range)(struct i915_address_space *vm, - uint64_t start, - uint64_t length); + u64 start, u64 length); void (*clear_range)(struct i915_address_space *vm, - uint64_t start, - uint64_t length); + u64 start, u64 length); void (*insert_page)(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level cache_level, u32 flags); void (*insert_entries)(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level cache_level, u32 flags); + u64 start, + enum i915_cache_level cache_level, + u32 flags); void (*cleanup)(struct i915_address_space *vm); /** Unmap an object from an address space. This usually consists of * setting the valid PTE entries to a reserved scratch page. */ @@ -326,10 +324,18 @@ struct i915_address_space { int (*bind_vma)(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); + + I915_SELFTEST_DECLARE(struct fault_attr fault_attr); }; #define i915_is_ggtt(V) (!(V)->file) +static inline bool +i915_vm_is_48bit(const struct i915_address_space *vm) +{ + return (vm->total - 1) >> 32; +} + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a @@ -381,7 +387,6 @@ struct i915_hw_ppgtt { gen6_pte_t __iomem *pd_addr; - int (*enable)(struct i915_hw_ppgtt *ppgtt); int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); @@ -409,9 +414,9 @@ struct i915_hw_ppgtt { (pt = (pd)->page_table[iter], true); \ ++iter) -static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) +static inline u32 i915_pte_index(u64 address, unsigned int pde_shift) { - const uint32_t mask = NUM_PTE(pde_shift) - 1; + const u32 mask = NUM_PTE(pde_shift) - 1; return (address >> PAGE_SHIFT) & mask; } @@ -420,11 +425,10 @@ static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) * does not cross a page table boundary, so the max value would be * GEN6_PTES for GEN6, and GEN8_PTES for GEN8. */ -static inline uint32_t i915_pte_count(uint64_t addr, size_t length, - uint32_t pde_shift) +static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift) { - const uint64_t mask = ~((1ULL << pde_shift) - 1); - uint64_t end; + const u64 mask = ~((1ULL << pde_shift) - 1); + u64 end; WARN_ON(length == 0); WARN_ON(offset_in_page(addr|length)); @@ -437,26 +441,35 @@ static inline uint32_t i915_pte_count(uint64_t addr, size_t length, return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift); } -static inline uint32_t i915_pde_index(uint64_t addr, uint32_t shift) +static inline u32 i915_pde_index(u64 addr, u32 shift) { return (addr >> shift) & I915_PDE_MASK; } -static inline uint32_t gen6_pte_index(uint32_t addr) +static inline u32 gen6_pte_index(u32 addr) { return i915_pte_index(addr, GEN6_PDE_SHIFT); } -static inline size_t gen6_pte_count(uint32_t addr, uint32_t length) +static inline u32 gen6_pte_count(u32 addr, u32 length) { return i915_pte_count(addr, length, GEN6_PDE_SHIFT); } -static inline uint32_t gen6_pde_index(uint32_t addr) +static inline u32 gen6_pde_index(u32 addr) { return i915_pde_index(addr, GEN6_PDE_SHIFT); } +static inline unsigned int +i915_pdpes_per_pdp(const struct i915_address_space *vm) +{ + if (i915_vm_is_48bit(vm)) + return GEN8_PML4ES_PER_PML4; + + return GEN8_3LVL_PDPES; +} + /* Equivalent to the gen6 version, For each pde iterates over every pde * between from start until start + length. On gen8+ it simply iterates * over every page directory entry in a page directory. @@ -471,7 +484,7 @@ static inline uint32_t gen6_pde_index(uint32_t addr) #define gen8_for_each_pdpe(pd, pdp, start, length, iter) \ for (iter = gen8_pdpe_index(start); \ - length > 0 && iter < I915_PDPES_PER_PDP(dev) && \ + length > 0 && iter < i915_pdpes_per_pdp(vm) && \ (pd = (pdp)->page_directory[iter], true); \ ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT); \ temp = min(temp - start, length); \ @@ -485,27 +498,27 @@ static inline uint32_t gen6_pde_index(uint32_t addr) temp = min(temp - start, length); \ start += temp, length -= temp; }), ++iter) -static inline uint32_t gen8_pte_index(uint64_t address) +static inline u32 gen8_pte_index(u64 address) { return i915_pte_index(address, GEN8_PDE_SHIFT); } -static inline uint32_t gen8_pde_index(uint64_t address) +static inline u32 gen8_pde_index(u64 address) { return i915_pde_index(address, GEN8_PDE_SHIFT); } -static inline uint32_t gen8_pdpe_index(uint64_t address) +static inline u32 gen8_pdpe_index(u64 address) { return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK; } -static inline uint32_t gen8_pml4e_index(uint64_t address) +static inline u32 gen8_pml4e_index(u64 address) { return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; } -static inline size_t gen8_pte_count(uint64_t address, uint64_t length) +static inline u64 gen8_pte_count(u64 address, u64 length) { return i915_pte_count(address, length, GEN8_PDE_SHIFT); } @@ -513,9 +526,7 @@ static inline size_t gen8_pte_count(uint64_t address, uint64_t length) static inline dma_addr_t i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) { - return test_bit(n, ppgtt->pdp.used_pdpes) ? - px_dma(ppgtt->pdp.page_directory[n]) : - px_dma(ppgtt->base.scratch_pd); + return px_dma(ppgtt->pdp.page_directory[n]); } static inline struct i915_ggtt * @@ -525,6 +536,9 @@ i915_vm_to_ggtt(struct i915_address_space *vm) return container_of(vm, struct i915_ggtt, base); } +int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915); +void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915); + int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 933019e1b206..fc950abbe400 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -35,8 +35,10 @@ static void internal_free_pages(struct sg_table *st) { struct scatterlist *sg; - for (sg = st->sgl; sg; sg = __sg_next(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } sg_free_table(st); kfree(st); @@ -133,6 +135,7 @@ create_st: return st; err: + sg_set_page(sg, NULL, 0, 0); sg_mark_end(sg); internal_free_pages(st); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 76b80a0be797..174cf923c236 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -33,6 +33,8 @@ #include <drm/i915_drm.h> +#include "i915_selftest.h" + struct drm_i915_gem_object_ops { unsigned int flags; #define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 @@ -87,6 +89,7 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + I915_SELFTEST_DECLARE(struct list_head st_link); unsigned long flags; @@ -165,19 +168,23 @@ struct drm_i915_gem_object { struct reservation_object *resv; /** References from framebuffers, locks out tiling changes. */ - unsigned long framebuffer_references; + unsigned int framebuffer_references; /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; - struct i915_gem_userptr { - uintptr_t ptr; - unsigned read_only :1; + union { + struct i915_gem_userptr { + uintptr_t ptr; + unsigned read_only :1; + + struct i915_mm_struct *mm; + struct i915_mmu_object *mmu_object; + struct work_struct *work; + } userptr; - struct i915_mm_struct *mm; - struct i915_mmu_object *mmu_object; - struct work_struct *work; - } userptr; + unsigned long scratch; + }; /** for phys allocated objects */ struct drm_dma_handle *phys_handle; @@ -256,10 +263,14 @@ extern void drm_gem_object_unreference(struct drm_gem_object *); __deprecated extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); -static inline bool -i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) +static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) { - return kref_read(&obj->base.refcount) == 0; + reservation_object_lock(obj->resv, NULL); +} + +static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) +{ + reservation_object_unlock(obj->resv); } static inline bool @@ -302,6 +313,12 @@ i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); +static inline bool +i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj) +{ + return READ_ONCE(obj->framebuffer_references); +} + static inline unsigned int i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) { @@ -360,5 +377,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) return engine; } +void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); + #endif diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index da70bfe97ec5..5ddbc9499775 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -83,7 +83,6 @@ static void i915_fence_release(struct dma_fence *fence) * caught trying to reuse dead objects. */ i915_sw_fence_fini(&req->submit); - i915_sw_fence_fini(&req->execute); kmem_cache_free(req->i915->requests, req); } @@ -97,42 +96,20 @@ const struct dma_fence_ops i915_fence_ops = { .release = i915_fence_release, }; -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file) -{ - struct drm_i915_private *dev_private; - struct drm_i915_file_private *file_priv; - - WARN_ON(!req || !file || req->file_priv); - - if (!req || !file) - return -EINVAL; - - if (req->file_priv) - return -EINVAL; - - dev_private = req->i915; - file_priv = file->driver_priv; - - spin_lock(&file_priv->mm.lock); - req->file_priv = file_priv; - list_add_tail(&req->client_list, &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); - - return 0; -} - static inline void i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) { - struct drm_i915_file_private *file_priv = request->file_priv; + struct drm_i915_file_private *file_priv; + file_priv = request->file_priv; if (!file_priv) return; spin_lock(&file_priv->mm.lock); - list_del(&request->client_list); - request->file_priv = NULL; + if (request->file_priv) { + list_del(&request->client_link); + request->file_priv = NULL; + } spin_unlock(&file_priv->mm.lock); } @@ -212,6 +189,82 @@ i915_priotree_init(struct i915_priotree *pt) pt->priority = INT_MIN; } +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + /* Carefully retire all requests without writing to the rings */ + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret) + return ret; + + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ + for_each_engine(engine, i915, id) { + struct i915_gem_timeline *timeline; + struct intel_timeline *tl = engine->timeline; + + if (!i915_seqno_passed(seqno, tl->seqno)) { + /* spin until threads are complete */ + while (intel_breadcrumbs_busy(engine)) + cond_resched(); + } + + /* Finally reset hw state */ + tl->seqno = seqno; + intel_engine_init_global_seqno(engine, seqno); + + list_for_each_entry(timeline, &i915->gt.timelines, link) + memset(timeline->engine[id].sync_seqno, 0, + sizeof(timeline->engine[id].sync_seqno)); + } + + return 0; +} + +int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (seqno == 0) + return -EINVAL; + + /* HWS page needs to be set less than what we + * will inject to ring + */ + return reset_all_global_seqno(dev_priv, seqno - 1); +} + +static int reserve_seqno(struct intel_engine_cs *engine) +{ + u32 active = ++engine->timeline->inflight_seqnos; + u32 seqno = engine->timeline->seqno; + int ret; + + /* Reservation is fine until we need to wrap around */ + if (likely(!add_overflows(seqno, active))) + return 0; + + ret = reset_all_global_seqno(engine->i915, 0); + if (ret) { + engine->timeline->inflight_seqnos--; + return ret; + } + + return 0; +} + +static void unreserve_seqno(struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!engine->timeline->inflight_seqnos); + engine->timeline->inflight_seqnos--; +} + void i915_gem_retire_noop(struct i915_gem_active *active, struct drm_i915_gem_request *request) { @@ -225,7 +278,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); - GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute)); GEM_BUG_ON(!i915_gem_request_completed(request)); GEM_BUG_ON(!request->i915->gt.active_requests); @@ -244,13 +296,14 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) * completion order. */ list_del(&request->ring_link); - request->ring->last_retired_head = request->postfix; + request->ring->head = request->postfix; if (!--request->i915->gt.active_requests) { GEM_BUG_ON(!request->i915->gt.awake); mod_delayed_work(request->i915->wq, &request->i915->gt.idle_work, msecs_to_jiffies(100)); } + unreserve_seqno(request->engine); /* Walk through the active list, calling retire on each. This allows * objects to track their GPU activity and mark themselves as idle @@ -321,88 +374,9 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) } while (tmp != req); } -static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) -{ - struct i915_gem_timeline *timeline = &i915->gt.global_timeline; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int ret; - - /* Carefully retire all requests without writing to the rings */ - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); - if (ret) - return ret; - - i915_gem_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests > 1); - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) { - while (intel_breadcrumbs_busy(i915)) - cond_resched(); /* spin until threads are complete */ - } - atomic_set(&timeline->seqno, seqno); - - /* Finally reset hw state */ - for_each_engine(engine, i915, id) - intel_engine_init_global_seqno(engine, seqno); - - list_for_each_entry(timeline, &i915->gt.timelines, link) { - for_each_engine(engine, i915, id) { - struct intel_timeline *tl = &timeline->engine[id]; - - memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); - } - } - - return 0; -} - -int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - return i915_gem_init_global_seqno(dev_priv, seqno - 1); -} - -static int reserve_global_seqno(struct drm_i915_private *i915) -{ - u32 active_requests = ++i915->gt.active_requests; - u32 seqno = atomic_read(&i915->gt.global_timeline.seqno); - int ret; - - /* Reservation is fine until we need to wrap around */ - if (likely(seqno + active_requests > seqno)) - return 0; - - ret = i915_gem_init_global_seqno(i915, 0); - if (ret) { - i915->gt.active_requests--; - return ret; - } - - return 0; -} - -static u32 __timeline_get_seqno(struct i915_gem_timeline *tl) -{ - /* seqno only incremented under a mutex */ - return ++tl->seqno.counter; -} - -static u32 timeline_get_seqno(struct i915_gem_timeline *tl) +static u32 timeline_get_seqno(struct intel_timeline *tl) { - return atomic_inc_return(&tl->seqno); + return ++tl->seqno; } void __i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -411,19 +385,19 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) struct intel_timeline *timeline; u32 seqno; + GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->timeline->lock); + + trace_i915_gem_request_execute(request); + /* Transfer from per-context onto the global per-engine timeline */ timeline = engine->timeline; GEM_BUG_ON(timeline == request->timeline); - assert_spin_locked(&timeline->lock); - seqno = timeline_get_seqno(timeline->common); + seqno = timeline_get_seqno(timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno)); - request->previous_seqno = timeline->last_submitted_seqno; - timeline->last_submitted_seqno = seqno; - /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); request->global_seqno = seqno; @@ -431,7 +405,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) intel_engine_enable_signaling(request); spin_unlock(&request->lock); - GEM_BUG_ON(!request->global_seqno); engine->emit_breadcrumb(request, request->ring->vaddr + request->postfix); @@ -439,7 +412,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); - i915_sw_fence_commit(&request->execute); + wake_up_all(&request->execute); } void i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -455,33 +428,66 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static int __i915_sw_fence_call -submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) { - struct drm_i915_gem_request *request = - container_of(fence, typeof(*request), submit); + struct intel_engine_cs *engine = request->engine; + struct intel_timeline *timeline; - switch (state) { - case FENCE_COMPLETE: - request->engine->submit_request(request); - break; + GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->timeline->lock); - case FENCE_FREE: - i915_gem_request_put(request); - break; - } + /* Only unwind in reverse order, required so that the per-context list + * is kept in seqno/ring order. + */ + GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + engine->timeline->seqno--; - return NOTIFY_DONE; + /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = 0; + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_cancel_signaling(request); + spin_unlock(&request->lock); + + /* Transfer back from the global per-engine timeline to per-context */ + timeline = request->timeline; + GEM_BUG_ON(timeline == engine->timeline); + + spin_lock(&timeline->lock); + list_move(&request->link, &timeline->requests); + spin_unlock(&timeline->lock); + + /* We don't need to wake_up any waiters on request->execute, they + * will get woken by any other event or us re-adding this request + * to the engine timeline (__i915_gem_request_submit()). The waiters + * should be quite adapt at finding that the request now has a new + * global_seqno to the one they went to sleep on. + */ +} + +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); + + __i915_gem_request_unsubmit(request); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); } static int __i915_sw_fence_call -execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct drm_i915_gem_request *request = - container_of(fence, typeof(*request), execute); + container_of(fence, typeof(*request), submit); switch (state) { case FENCE_COMPLETE: + trace_i915_gem_request_submit(request); + request->engine->submit_request(request); break; case FENCE_FREE: @@ -528,14 +534,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) return ERR_PTR(ret); - ret = reserve_global_seqno(dev_priv); + ret = reserve_seqno(engine); if (ret) goto err_unpin; /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); - if (req && __i915_gem_request_completed(req)) + if (req && i915_gem_request_completed(req)) i915_gem_request_retire(req); /* Beware: Dragons be flying overhead. @@ -580,17 +586,11 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, &i915_fence_ops, &req->lock, req->timeline->fence_context, - __timeline_get_seqno(req->timeline->common)); + timeline_get_seqno(req->timeline)); /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); - i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify); - - /* Ensure that the execute fence completes after the submit fence - - * as we complete the execute fence from within the submit fence - * callback, its completion would otherwise be visible first. - */ - i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq); + init_waitqueue_head(&req->execute); i915_priotree_init(&req->priotree); @@ -625,6 +625,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, */ req->head = req->ring->tail; + /* Check that we didn't interrupt ourselves with a new request */ + GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); return req; err_ctx: @@ -635,7 +637,7 @@ err_ctx: kmem_cache_free(dev_priv->requests, req); err_unreserve: - dev_priv->gt.active_requests--; + unreserve_seqno(engine); err_unpin: engine->context_unpin(engine, ctx); return ERR_PTR(ret); @@ -645,10 +647,14 @@ static int i915_gem_request_await_request(struct drm_i915_gem_request *to, struct drm_i915_gem_request *from) { + u32 seqno; int ret; GEM_BUG_ON(to == from); + if (i915_gem_request_completed(from)) + return 0; + if (to->engine->schedule) { ret = i915_priotree_add_dependency(to->i915, &to->priotree, @@ -667,14 +673,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret < 0 ? ret : 0; } - if (!from->global_seqno) { + seqno = i915_gem_request_global_seqno(from); + if (!seqno) { ret = i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, GFP_KERNEL); return ret < 0 ? ret : 0; } - if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id]) + if (seqno <= to->timeline->sync_seqno[from->engine->id]) return 0; trace_i915_gem_ring_sync_to(to, from); @@ -692,7 +699,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret; } - to->timeline->sync_seqno[from->engine->id] = from->global_seqno; + to->timeline->sync_seqno[from->engine->id] = seqno; return 0; } @@ -838,6 +845,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) struct intel_ring *ring = request->ring; struct intel_timeline *timeline = request->timeline; struct drm_i915_gem_request *prev; + u32 *cs; int err; lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -847,8 +855,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * our i915_gem_request_alloc() and called __i915_add_request() before * us, the timeline will hold its seqno which is later than ours. */ - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); + GEM_BUG_ON(timeline->seqno != request->fence.seqno); /* * To ensure that this call will not fail, space for its emissions @@ -876,10 +883,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - err = intel_ring_begin(request, engine->emit_breadcrumb_sz); - GEM_BUG_ON(err); - request->postfix = ring->tail; - ring->tail += engine->emit_breadcrumb_sz * sizeof(u32); + cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); + GEM_BUG_ON(IS_ERR(cs)); + request->postfix = intel_ring_offset(request, cs); /* Seal the request and mark it as pending execution. Note that * we may inspect this state, without holding any locks, during @@ -903,16 +909,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) list_add_tail(&request->link, &timeline->requests); spin_unlock_irq(&timeline->lock); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); - - timeline->last_submitted_seqno = request->fence.seqno; + GEM_BUG_ON(timeline->seqno != request->fence.seqno); i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); request->emitted_jiffies = jiffies; - i915_gem_mark_busy(engine); + if (!request->i915->gt.active_requests++) + i915_gem_mark_busy(engine); /* Let the backend know a new request has arrived that may need * to adjust the existing execution schedule due to a high priority @@ -932,16 +936,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ } -static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) -{ - unsigned long flags; - - spin_lock_irqsave(&q->lock, flags); - if (list_empty(&wait->task_list)) - __add_wait_queue(q, wait); - spin_unlock_irqrestore(&q->lock, flags); -} - static unsigned long local_clock_us(unsigned int *cpu) { unsigned long t; @@ -975,9 +969,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) } bool __i915_spin_request(const struct drm_i915_gem_request *req, - int state, unsigned long timeout_us) + u32 seqno, int state, unsigned long timeout_us) { - unsigned int cpu; + struct intel_engine_cs *engine = req->engine; + unsigned int irq, cpu; /* When waiting for high frequency requests, e.g. during synchronous * rendering split between the CPU and GPU, the finite amount of time @@ -989,11 +984,24 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, * takes to sleep on a request, on the order of a microsecond. */ + irq = atomic_read(&engine->irq_count); timeout_us += local_clock_us(&cpu); do { - if (__i915_gem_request_completed(req)) + if (seqno != i915_gem_request_global_seqno(req)) + break; + + if (i915_seqno_passed(intel_engine_get_seqno(req->engine), + seqno)) return true; + /* Seqno are meant to be ordered *before* the interrupt. If + * we see an interrupt without a corresponding seqno advance, + * assume we won't see one in the near future but require + * the engine->seqno_barrier() to fixup coherency. + */ + if (atomic_read(&engine->irq_count) != irq) + break; + if (signal_pending_state(state, current)) break; @@ -1006,52 +1014,14 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, return false; } -static long -__i915_request_wait_for_execute(struct drm_i915_gem_request *request, - unsigned int flags, - long timeout) +static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *request) { - const int state = flags & I915_WAIT_INTERRUPTIBLE ? - TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - wait_queue_head_t *q = &request->i915->gpu_error.wait_queue; - DEFINE_WAIT(reset); - DEFINE_WAIT(wait); - - if (flags & I915_WAIT_LOCKED) - add_wait_queue(q, &reset); - - do { - prepare_to_wait(&request->execute.wait, &wait, state); - - if (i915_sw_fence_done(&request->execute)) - break; - - if (flags & I915_WAIT_LOCKED && - i915_reset_in_progress(&request->i915->gpu_error)) { - __set_current_state(TASK_RUNNING); - i915_reset(request->i915); - reset_wait_queue(q, &reset); - continue; - } - - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - break; - } - - if (!timeout) { - timeout = -ETIME; - break; - } - - timeout = io_schedule_timeout(timeout); - } while (1); - finish_wait(&request->execute.wait, &wait); - - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(q, &reset); + if (likely(!i915_reset_handoff(&request->i915->gpu_error))) + return false; - return timeout; + __set_current_state(TASK_RUNNING); + i915_reset(request->i915); + return true; } /** @@ -1079,7 +1049,9 @@ long i915_wait_request(struct drm_i915_gem_request *req, { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - DEFINE_WAIT(reset); + wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; + DEFINE_WAIT_FUNC(reset, default_wake_function); + DEFINE_WAIT_FUNC(exec, default_wake_function); struct intel_wait wait; might_sleep(); @@ -1096,27 +1068,45 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (!timeout) return -ETIME; - trace_i915_gem_request_wait_begin(req); + trace_i915_gem_request_wait_begin(req, flags); + + add_wait_queue(&req->execute, &exec); + if (flags & I915_WAIT_LOCKED) + add_wait_queue(errq, &reset); + + intel_wait_init(&wait, req); - if (!i915_sw_fence_done(&req->execute)) { - timeout = __i915_request_wait_for_execute(req, flags, timeout); - if (timeout < 0) +restart: + do { + set_current_state(state); + if (intel_wait_update_request(&wait, req)) + break; + + if (flags & I915_WAIT_LOCKED && + __i915_wait_request_check_and_reset(req)) + continue; + + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; goto complete; + } - GEM_BUG_ON(!i915_sw_fence_done(&req->execute)); - } - GEM_BUG_ON(!i915_sw_fence_done(&req->submit)); - GEM_BUG_ON(!req->global_seqno); + if (!timeout) { + timeout = -ETIME; + goto complete; + } + + timeout = io_schedule_timeout(timeout); + } while (1); + + GEM_BUG_ON(!intel_wait_has_seqno(&wait)); + GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); /* Optimistic short spin before touching IRQs */ if (i915_spin_request(req, state, 5)) goto complete; set_current_state(state); - if (flags & I915_WAIT_LOCKED) - add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_wait_init(&wait, req->global_seqno); if (intel_engine_add_wait(req->engine, &wait)) /* In order to check that we haven't missed the interrupt * as we enabled it, we need to kick ourselves to do a @@ -1124,6 +1114,9 @@ long i915_wait_request(struct drm_i915_gem_request *req, */ goto wakeup; + if (flags & I915_WAIT_LOCKED) + __i915_wait_request_check_and_reset(req); + for (;;) { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; @@ -1137,7 +1130,8 @@ long i915_wait_request(struct drm_i915_gem_request *req, timeout = io_schedule_timeout(timeout); - if (intel_wait_complete(&wait)) + if (intel_wait_complete(&wait) && + intel_wait_check_request(&wait, req)) break; set_current_state(state); @@ -1162,25 +1156,25 @@ wakeup: * itself, or indirectly by recovering the GPU). */ if (flags & I915_WAIT_LOCKED && - i915_reset_in_progress(&req->i915->gpu_error)) { - __set_current_state(TASK_RUNNING); - i915_reset(req->i915); - reset_wait_queue(&req->i915->gpu_error.wait_queue, - &reset); + __i915_wait_request_check_and_reset(req)) continue; - } /* Only spin if we know the GPU is processing this request */ if (i915_spin_request(req, state, 2)) break; + + if (!intel_wait_check_request(&wait, req)) { + intel_engine_remove_wait(req->engine, &wait); + goto restart; + } } intel_engine_remove_wait(req->engine, &wait); - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - __set_current_state(TASK_RUNNING); - complete: + __set_current_state(TASK_RUNNING); + if (flags & I915_WAIT_LOCKED) + remove_wait_queue(errq, &reset); + remove_wait_queue(&req->execute, &exec); trace_i915_gem_request_wait_end(req); return timeout; @@ -1189,14 +1183,21 @@ complete: static void engine_retire_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request, *next; + u32 seqno = intel_engine_get_seqno(engine); + LIST_HEAD(retire); + spin_lock_irq(&engine->timeline->lock); list_for_each_entry_safe(request, next, &engine->timeline->requests, link) { - if (!__i915_gem_request_completed(request)) - return; + if (!i915_seqno_passed(seqno, request->global_seqno)) + break; - i915_gem_request_retire(request); + list_move_tail(&request->link, &retire); } + spin_unlock_irq(&engine->timeline->lock); + + list_for_each_entry_safe(request, next, &retire, link) + i915_gem_request_retire(request); } void i915_gem_retire_requests(struct drm_i915_private *dev_priv) @@ -1212,3 +1213,8 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) engine_retire_requests(engine); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_request.c" +#include "selftests/i915_gem_request.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index ea511f06efaf..a211c53c813f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -32,10 +32,12 @@ struct drm_file; struct drm_i915_gem_object; +struct drm_i915_gem_request; struct intel_wait { struct rb_node node; struct task_struct *tsk; + struct drm_i915_gem_request *request; u32 seqno; }; @@ -119,18 +121,10 @@ struct drm_i915_gem_request { * The submit fence is used to await upon all of the request's * dependencies. When it is signaled, the request is ready to run. * It is used by the driver to then queue the request for execution. - * - * The execute fence is used to signal when the request has been - * sent to hardware. - * - * It is illegal for the submit fence of one request to wait upon the - * execute fence of an earlier request. It should be sufficient to - * wait upon the submit fence of the earlier request. */ struct i915_sw_fence submit; - struct i915_sw_fence execute; wait_queue_t submitq; - wait_queue_t execq; + wait_queue_head_t execute; /* A list of everyone we wait upon, and everyone who waits upon us. * Even though we will not be submitted to the hardware before the @@ -143,13 +137,12 @@ struct drm_i915_gem_request { struct i915_priotree priotree; struct i915_dependency dep; - u32 global_seqno; - - /** GEM sequence number associated with the previous request, - * when the HWS breadcrumb is equal to this the GPU is processing - * this request. + /** GEM sequence number associated with this request on the + * global execution timeline. It is zero when the request is not + * on the HW queue (i.e. not on the engine timeline list). + * Its value is guarded by the timeline spinlock. */ - u32 previous_seqno; + u32 global_seqno; /** Position in the ring of the start of the request */ u32 head; @@ -187,7 +180,7 @@ struct drm_i915_gem_request { struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ - struct list_head client_list; + struct list_head client_link; }; extern const struct dma_fence_ops i915_fence_ops; @@ -200,8 +193,6 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence) struct drm_i915_gem_request * __must_check i915_gem_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx); -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file); void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); static inline struct drm_i915_gem_request * @@ -243,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, *pdst = src; } +/** + * i915_gem_request_global_seqno - report the current global seqno + * @request - the request + * + * A request is assigned a global seqno only when it is on the hardware + * execution queue. The global seqno can be used to maintain a list of + * requests on the same engine in retirement order, for example for + * constructing a priority queue for waiting. Prior to its execution, or + * if it is subsequently removed in the event of preemption, its global + * seqno is zero. As both insertion and removal from the execution queue + * may operate in IRQ context, it is not guarded by the usual struct_mutex + * BKL. Instead those relying on the global seqno must be prepared for its + * value to change between reads. Only when the request is complete can + * the global seqno be stable (due to the memory barriers on submitting + * the commands to the hardware to write the breadcrumb, if the HWS shows + * that it has passed the global seqno and the global seqno is unchanged + * after the read, it is indeed complete). + */ +static u32 +i915_gem_request_global_seqno(const struct drm_i915_gem_request *request) +{ + return READ_ONCE(request->global_seqno); +} + int i915_gem_request_await_object(struct drm_i915_gem_request *to, struct drm_i915_gem_object *obj, @@ -252,13 +267,14 @@ int i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); #define i915_add_request(req) \ - __i915_add_request(req, true) -#define i915_add_request_no_flush(req) \ __i915_add_request(req, false) void __i915_gem_request_submit(struct drm_i915_gem_request *request); void i915_gem_request_submit(struct drm_i915_gem_request *request); +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request); +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request); + struct intel_rps_client; #define NO_WAITBOOST ERR_PTR(-1) #define IS_RPS_CLIENT(p) (!IS_ERR(p)) @@ -283,46 +299,58 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) } static inline bool -__i915_gem_request_started(const struct drm_i915_gem_request *req) +__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno) { - GEM_BUG_ON(!req->global_seqno); + GEM_BUG_ON(!seqno); return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->previous_seqno); + seqno - 1); } static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) { - if (!req->global_seqno) + u32 seqno; + + seqno = i915_gem_request_global_seqno(req); + if (!seqno) return false; - return __i915_gem_request_started(req); + return __i915_gem_request_started(req, seqno); } static inline bool -__i915_gem_request_completed(const struct drm_i915_gem_request *req) +__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno) { - GEM_BUG_ON(!req->global_seqno); - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->global_seqno); + GEM_BUG_ON(!seqno); + return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) && + seqno == i915_gem_request_global_seqno(req); } static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) { - if (!req->global_seqno) + u32 seqno; + + seqno = i915_gem_request_global_seqno(req); + if (!seqno) return false; - return __i915_gem_request_completed(req); + return __i915_gem_request_completed(req, seqno); } bool __i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us); + u32 seqno, int state, unsigned long timeout_us); static inline bool i915_spin_request(const struct drm_i915_gem_request *request, int state, unsigned long timeout_us) { - return (__i915_gem_request_started(request) && - __i915_spin_request(request, state, timeout_us)); + u32 seqno; + + seqno = i915_gem_request_global_seqno(request); + if (!seqno) + return 0; + + return (__i915_gem_request_started(request, seqno) && + __i915_spin_request(request, seqno, state, timeout_us)); } /* We treat requests as fences. This is not be to confused with our diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 70b3832a79dd..129ed303a6c4 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -218,7 +218,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!(flags & I915_SHRINK_ACTIVE) && (i915_gem_object_is_active(obj) || - obj->framebuffer_references)) + i915_gem_object_is_framebuffer(obj))) continue; if (!can_release_pages(obj)) @@ -267,10 +267,13 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) { unsigned long freed; + intel_runtime_pm_get(dev_priv); freed = i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE); + intel_runtime_pm_put(dev_priv); + synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */ return freed; @@ -386,9 +389,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000)) return NOTIFY_DONE; - intel_runtime_pm_get(dev_priv); freed_pages = i915_gem_shrink_all(dev_priv); - intel_runtime_pm_put(dev_priv); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 9673bcc3b6ad..f3abdc27c5dd 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -79,12 +79,12 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->mm.stolen_lock); } -static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) +static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; struct i915_ggtt *ggtt = &dev_priv->ggtt; struct resource *r; - u32 base; + dma_addr_t base; /* Almost universally we can find the Graphics Base of Stolen Memory * at register BSM (0x5c) in the igfx configuration space. On a few @@ -189,14 +189,14 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) base = tom - tseg_size - ggtt->stolen_size; } - if (base == 0) + if (base == 0 || add_overflows(base, ggtt->stolen_size)) return 0; /* make sure we don't clobber the GTT if it's within stolen memory */ if (INTEL_GEN(dev_priv) <= 4 && !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { struct { - u32 start, end; + dma_addr_t start, end; } stolen[2] = { { .start = base, .end = base + ggtt->stolen_size, }, { .start = base, .end = base + ggtt->stolen_size, }, @@ -228,11 +228,13 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) if (stolen[0].start != stolen[1].start || stolen[0].end != stolen[1].end) { + dma_addr_t end = base + ggtt->stolen_size - 1; + DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n", (unsigned long long)ggtt_start, (unsigned long long)ggtt_end - 1); - DRM_DEBUG_KMS("Stolen memory adjusted to 0x%x-0x%x\n", - base, base + (u32)ggtt->stolen_size - 1); + DRM_DEBUG_KMS("Stolen memory adjusted to %pad - %pad\n", + &base, &end); } } @@ -261,8 +263,10 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) * range. Apparently this works. */ if (r == NULL && !IS_GEN3(dev_priv)) { - DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n", - base, base + (uint32_t)ggtt->stolen_size); + dma_addr_t end = base + ggtt->stolen_size; + + DRM_ERROR("conflict detected with stolen region: [%pad - %pad]\n", + &base, &end); base = 0; } } @@ -281,13 +285,13 @@ void i915_gem_cleanup_stolen(struct drm_device *dev) } static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ? CTG_STOLEN_RESERVED : ELK_STOLEN_RESERVED); - phys_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; + dma_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; @@ -304,7 +308,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -330,7 +334,7 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -350,7 +354,7 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -376,11 +380,11 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); - phys_addr_t stolen_top; + dma_addr_t stolen_top; stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; @@ -399,7 +403,7 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - phys_addr_t reserved_base, stolen_top; + dma_addr_t reserved_base, stolen_top; u32 reserved_total, reserved_size; u32 stolen_usable_start; @@ -420,7 +424,7 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (ggtt->stolen_size == 0) return 0; - dev_priv->mm.stolen_base = i915_stolen_to_physical(dev_priv); + dev_priv->mm.stolen_base = i915_stolen_to_dma(dev_priv); if (dev_priv->mm.stolen_base == 0) return 0; @@ -469,8 +473,8 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (reserved_base < dev_priv->mm.stolen_base || reserved_base + reserved_size > stolen_top) { - phys_addr_t reserved_top = reserved_base + reserved_size; - DRM_DEBUG_KMS("Stolen reserved area [%pa - %pa] outside stolen memory [%pa - %pa]\n", + dma_addr_t reserved_top = reserved_base + reserved_size; + DRM_DEBUG_KMS("Stolen reserved area [%pad - %pad] outside stolen memory [%pad - %pad]\n", &reserved_base, &reserved_top, &dev_priv->mm.stolen_base, &stolen_top); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 974ac08df473..a0d6d4317a49 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -158,13 +158,8 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, if (stride > 8192) return false; - if (IS_GEN3(i915)) { - if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 20) - return false; - } else { - if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 19) - return false; - } + if (!is_power_of_2(stride)) + return false; } if (IS_GEN2(i915) || @@ -176,12 +171,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, if (!stride || !IS_ALIGNED(stride, tile_width)) return false; - /* 965+ just needs multiples of tile width */ - if (INTEL_GEN(i915) >= 4) - return true; - - /* Pre-965 needs power of two tile widths */ - return is_power_of_2(stride); + return true; } static bool i915_vma_fence_prepare(struct i915_vma *vma, @@ -248,7 +238,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, if ((tiling | stride) == obj->tiling_and_stride) return 0; - if (obj->framebuffer_references) + if (i915_gem_object_is_framebuffer(obj)) return -EBUSY; /* We need to rebind the object if its current allocation @@ -268,6 +258,12 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, if (err) return err; + i915_gem_object_lock(obj); + if (i915_gem_object_is_framebuffer(obj)) { + i915_gem_object_unlock(obj); + return -EBUSY; + } + /* If the memory has unknown (i.e. varying) swizzling, we pin the * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. @@ -304,6 +300,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, } obj->tiling_and_stride = tiling | stride; + i915_gem_object_unlock(obj); /* Force the fence to be reacquired for GTT access */ i915_gem_release_mmap(obj); diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index f2e51f42cc2f..6c53e14cab2a 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -33,7 +33,13 @@ struct i915_gem_timeline; struct intel_timeline { u64 fence_context; - u32 last_submitted_seqno; + u32 seqno; + + /** + * Count of outstanding requests, from the time they are constructed + * to the moment they are retired. Loosely coupled to hardware. + */ + u32 inflight_seqnos; spinlock_t lock; @@ -56,7 +62,6 @@ struct intel_timeline { struct i915_gem_timeline { struct list_head link; - atomic_t seqno; struct drm_i915_private *i915; const char *name; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 22b46398831e..58ccf8b8ca1c 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -66,13 +66,18 @@ static void cancel_userptr(struct work_struct *work) { struct i915_mmu_object *mo = container_of(work, typeof(*mo), work); struct drm_i915_gem_object *obj = mo->obj; - struct drm_device *dev = obj->base.dev; + struct work_struct *active; + + /* Cancel any active worker and force us to re-evaluate gup */ + mutex_lock(&obj->mm.lock); + active = fetch_and_zero(&obj->userptr.work); + mutex_unlock(&obj->mm.lock); + if (active) + goto out; i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL); - mutex_lock(&dev->struct_mutex); - /* Cancel any active worker and force us to re-evaluate gup */ - obj->userptr.work = NULL; + mutex_lock(&obj->base.dev->struct_mutex); /* We are inside a kthread context and can't be interrupted */ if (i915_gem_object_unbind(obj) == 0) @@ -83,8 +88,10 @@ static void cancel_userptr(struct work_struct *work) atomic_read(&obj->mm.pages_pin_count), obj->pin_display); + mutex_unlock(&obj->base.dev->struct_mutex); + +out: i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); } static void add_object(struct i915_mmu_object *mo) @@ -145,7 +152,8 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, del_object(mo); spin_unlock(&mn->lock); - flush_workqueue(mn->wq); + if (!list_empty(&cancelled)) + flush_workqueue(mn->wq); } static const struct mmu_notifier_ops i915_gem_userptr_notifier = { @@ -541,6 +549,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) } obj->userptr.work = ERR_CAST(pages); + if (IS_ERR(pages)) + __i915_gem_userptr_set_active(obj, false); } mutex_unlock(&obj->mm.lock); @@ -553,8 +563,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) } static struct sg_table * -__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, - bool *active) +__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) { struct get_pages_work *work; @@ -591,7 +600,6 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); schedule_work(&work->work); - *active = true; return ERR_PTR(-EAGAIN); } @@ -599,10 +607,11 @@ static struct sg_table * i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const int num_pages = obj->base.size >> PAGE_SHIFT; + struct mm_struct *mm = obj->userptr.mm->mm; struct page **pvec; struct sg_table *pages; - int pinned, ret; bool active; + int pinned; /* If userspace should engineer that these pages are replaced in * the vma between us binding this page into the GTT and completion @@ -629,37 +638,39 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) return ERR_PTR(-EAGAIN); } - /* Let the mmu-notifier know that we have begun and need cancellation */ - ret = __i915_gem_userptr_set_active(obj, true); - if (ret) - return ERR_PTR(ret); - pvec = NULL; pinned = 0; - if (obj->userptr.mm->mm == current->mm) { - pvec = drm_malloc_gfp(num_pages, sizeof(struct page *), - GFP_TEMPORARY); - if (pvec == NULL) { - __i915_gem_userptr_set_active(obj, false); - return ERR_PTR(-ENOMEM); - } - pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages, - !obj->userptr.read_only, pvec); + if (mm == current->mm) { + pvec = drm_malloc_gfp(num_pages, sizeof(struct page *), + GFP_TEMPORARY | + __GFP_NORETRY | + __GFP_NOWARN); + if (pvec) /* defer to worker if malloc fails */ + pinned = __get_user_pages_fast(obj->userptr.ptr, + num_pages, + !obj->userptr.read_only, + pvec); } active = false; - if (pinned < 0) - pages = ERR_PTR(pinned), pinned = 0; - else if (pinned < num_pages) - pages = __i915_gem_userptr_get_pages_schedule(obj, &active); - else + if (pinned < 0) { + pages = ERR_PTR(pinned); + pinned = 0; + } else if (pinned < num_pages) { + pages = __i915_gem_userptr_get_pages_schedule(obj); + active = pages == ERR_PTR(-EAGAIN); + } else { pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages); - if (IS_ERR(pages)) { - __i915_gem_userptr_set_active(obj, active); - release_pages(pvec, pinned, 0); + active = !IS_ERR(pages); } + if (active) + __i915_gem_userptr_set_active(obj, true); + + if (IS_ERR(pages)) + release_pages(pvec, pinned, 0); drm_free_large(pvec); + return pages; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9cd22cda17af..8effc59f5cb5 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -342,7 +342,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, } static void error_print_instdone(struct drm_i915_error_state_buf *m, - struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee) { int slice; int subslice; @@ -372,7 +372,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, static void error_print_request(struct drm_i915_error_state_buf *m, const char *prefix, - struct drm_i915_error_request *erq) + const struct drm_i915_error_request *erq) { if (!erq->seqno) return; @@ -384,8 +384,17 @@ static void error_print_request(struct drm_i915_error_state_buf *m, erq->head, erq->tail); } +static void error_print_context(struct drm_i915_error_state_buf *m, + const char *header, + const struct drm_i915_error_context *ctx) +{ + err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n", + header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, + ctx->ban_score, ctx->guilty, ctx->active); +} + static void error_print_engine(struct drm_i915_error_state_buf *m, - struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee) { err_printf(m, "%s command stream:\n", engine_str(ee->engine_id)); err_printf(m, " START: 0x%08x\n", ee->start); @@ -457,6 +466,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, error_print_request(m, " ELSP[0]: ", &ee->execlist[0]); error_print_request(m, " ELSP[1]: ", &ee->execlist[1]); + error_print_context(m, " Active context: ", &ee->context); } void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) @@ -536,21 +546,57 @@ static void err_print_capabilities(struct drm_i915_error_state_buf *m, #undef PRINT_FLAG } +static __always_inline void err_print_param(struct drm_i915_error_state_buf *m, + const char *name, + const char *type, + const void *x) +{ + if (!__builtin_strcmp(type, "bool")) + err_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); + else if (!__builtin_strcmp(type, "int")) + err_printf(m, "i915.%s=%d\n", name, *(const int *)x); + else if (!__builtin_strcmp(type, "unsigned int")) + err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "char *")) + err_printf(m, "i915.%s=%s\n", name, *(const char **)x); + else + BUILD_BUG(); +} + +static void err_print_params(struct drm_i915_error_state_buf *m, + const struct i915_params *p) +{ +#define PRINT(T, x) err_print_param(m, #x, #T, &p->x); + I915_PARAMS_FOR_EACH(PRINT); +#undef PRINT +} + +static void err_print_pciid(struct drm_i915_error_state_buf *m, + struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + err_printf(m, "PCI ID: 0x%04x\n", pdev->device); + err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision); + err_printf(m, "PCI Subsystem: %04x:%04x\n", + pdev->subsystem_vendor, + pdev->subsystem_device); +} + int i915_error_state_to_str(struct drm_i915_error_state_buf *m, - const struct i915_error_state_file_priv *error_priv) + const struct i915_gpu_state *error) { - struct drm_i915_private *dev_priv = error_priv->i915; - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_i915_error_state *error = error_priv->error; + struct drm_i915_private *dev_priv = m->i915; struct drm_i915_error_object *obj; int i, j; if (!error) { - err_printf(m, "no error state collected\n"); - goto out; + err_printf(m, "No error state collected\n"); + return 0; } - err_printf(m, "%s\n", error->error_msg); + if (*error->error_msg) + err_printf(m, "%s\n", error->error_msg); err_printf(m, "Kernel: " UTS_RELEASE "\n"); err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, error->time.tv_usec); @@ -558,26 +604,22 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->boottime.tv_sec, error->boottime.tv_usec); err_printf(m, "Uptime: %ld s %ld us\n", error->uptime.tv_sec, error->uptime.tv_usec); - err_print_capabilities(m, &error->device_info); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_stalled && - error->engine[i].pid != -1) { - err_printf(m, "Active process (on ring %s): %s [%d], context bans %d\n", + error->engine[i].context.pid) { + err_printf(m, "Active process (on ring %s): %s [%d], score %d\n", engine_str(i), - error->engine[i].comm, - error->engine[i].pid, - error->engine[i].context_bans); + error->engine[i].context.comm, + error->engine[i].context.pid, + error->engine[i].context.ban_score); } } err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Suspend count: %u\n", error->suspend_count); err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform)); - err_printf(m, "PCI ID: 0x%04x\n", pdev->device); - err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision); - err_printf(m, "PCI Subsystem: %04x:%04x\n", - pdev->subsystem_vendor, - pdev->subsystem_device); + err_print_pciid(m, error->i915); + err_printf(m, "IOMMU enabled?: %d\n", error->iommu); if (HAS_CSR(dev_priv)) { @@ -590,21 +632,20 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, CSR_VERSION_MINOR(csr->version)); } + err_printf(m, "GT awake: %s\n", yesno(error->awake)); + err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock)); + err_printf(m, "PM suspended: %s\n", yesno(error->suspended)); err_printf(m, "EIR: 0x%08x\n", error->eir); err_printf(m, "IER: 0x%08x\n", error->ier); - if (INTEL_GEN(dev_priv) >= 8) { - for (i = 0; i < 4; i++) - err_printf(m, "GTIER gt %d: 0x%08x\n", i, - error->gtier[i]); - } else if (HAS_PCH_SPLIT(dev_priv) || IS_VALLEYVIEW(dev_priv)) - err_printf(m, "GTIER: 0x%08x\n", error->gtier[0]); + for (i = 0; i < error->ngtier; i++) + err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]); err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er); err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); err_printf(m, "CCID: 0x%08x\n", error->ccid); err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings); - for (i = 0; i < dev_priv->num_fence_regs; i++) + for (i = 0; i < error->nfence; i++) err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); if (INTEL_GEN(dev_priv) >= 6) { @@ -653,16 +694,18 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->pinned_bo_count); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - struct drm_i915_error_engine *ee = &error->engine[i]; + const struct drm_i915_error_engine *ee = &error->engine[i]; obj = ee->batchbuffer; if (obj) { err_puts(m, dev_priv->engine[i]->name); - if (ee->pid != -1) - err_printf(m, " (submitted by %s [%d], bans %d)", - ee->comm, - ee->pid, - ee->context_bans); + if (ee->context.pid) + err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d)", + ee->context.comm, + ee->context.pid, + ee->context.handle, + ee->context.hw_id, + ee->context.ban_score); err_printf(m, " --- gtt_offset = 0x%08x %08x\n", upper_32_bits(obj->gtt_offset), lower_32_bits(obj->gtt_offset)); @@ -716,9 +759,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, intel_overlay_print_error_state(m, error->overlay); if (error->display) - intel_display_print_error_state(m, dev_priv, error->display); + intel_display_print_error_state(m, error->display); + + err_print_capabilities(m, &error->device_info); + err_print_params(m, &error->params); -out: if (m->bytes == 0 && m->err) return m->err; @@ -770,10 +815,16 @@ static void i915_error_object_free(struct drm_i915_error_object *obj) kfree(obj); } -static void i915_error_state_free(struct kref *error_ref) +static __always_inline void free_param(const char *type, void *x) +{ + if (!__builtin_strcmp(type, "char *")) + kfree(*(void **)x); +} + +void __i915_gpu_state_free(struct kref *error_ref) { - struct drm_i915_error_state *error = container_of(error_ref, - typeof(*error), ref); + struct i915_gpu_state *error = + container_of(error_ref, typeof(*error), ref); int i; for (i = 0; i < ARRAY_SIZE(error->engine); i++) { @@ -800,6 +851,11 @@ static void i915_error_state_free(struct kref *error_ref) kfree(error->overlay); kfree(error->display); + +#define FREE(T, x) free_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(FREE); +#undef FREE + kfree(error); } @@ -938,7 +994,7 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, * It's only a small step better than a random number in its current form. */ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, int *engine_id) { uint32_t error_code = 0; @@ -963,20 +1019,21 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, } static void i915_gem_record_fences(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int i; - if (IS_GEN3(dev_priv) || IS_GEN2(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 6) { for (i = 0; i < dev_priv->num_fence_regs; i++) - error->fence[i] = I915_READ(FENCE_REG(i)); - } else if (IS_GEN5(dev_priv) || IS_GEN4(dev_priv)) { + error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i)); + } else if (INTEL_GEN(dev_priv) >= 4) { for (i = 0; i < dev_priv->num_fence_regs; i++) error->fence[i] = I915_READ64(FENCE_REG_965_LO(i)); - } else if (INTEL_GEN(dev_priv) >= 6) { + } else { for (i = 0; i < dev_priv->num_fence_regs; i++) - error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i)); + error->fence[i] = I915_READ(FENCE_REG(i)); } + error->nfence = i; } static inline u32 @@ -1000,7 +1057,7 @@ gen8_engine_sync_index(struct intel_engine_cs *engine, return idx; } -static void gen8_record_semaphore_state(struct drm_i915_error_state *error, +static void gen8_record_semaphore_state(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1054,7 +1111,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (RB_EMPTY_ROOT(&b->waiters)) return; - if (!spin_trylock_irq(&b->lock)) { + if (!spin_trylock_irq(&b->rb_lock)) { ee->waiters = ERR_PTR(-EDEADLK); return; } @@ -1062,7 +1119,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, count = 0; for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) count++; - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); waiter = NULL; if (count) @@ -1072,7 +1129,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (!waiter) return; - if (!spin_trylock_irq(&b->lock)) { + if (!spin_trylock_irq(&b->rb_lock)) { kfree(waiter); ee->waiters = ERR_PTR(-EDEADLK); return; @@ -1080,7 +1137,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, ee->waiters = waiter; for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = container_of(rb, typeof(*w), node); + struct intel_wait *w = rb_entry(rb, typeof(*w), node); strcpy(waiter->comm, w->tsk->comm); waiter->pid = w->tsk->pid; @@ -1090,10 +1147,10 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (++ee->num_waiters == count) break; } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); } -static void error_record_engine_registers(struct drm_i915_error_state *error, +static void error_record_engine_registers(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1267,8 +1324,30 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine, &ee->execlist[n]); } +static void record_context(struct drm_i915_error_context *e, + struct i915_gem_context *ctx) +{ + if (ctx->pid) { + struct task_struct *task; + + rcu_read_lock(); + task = pid_task(ctx->pid, PIDTYPE_PID); + if (task) { + strcpy(e->comm, task->comm); + e->pid = task->pid; + } + rcu_read_unlock(); + } + + e->handle = ctx->user_handle; + e->hw_id = ctx->hw_id; + e->ban_score = ctx->ban_score; + e->guilty = ctx->guilty_count; + e->active = ctx->active_count; +} + static void i915_gem_record_rings(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { struct i915_ggtt *ggtt = &dev_priv->ggtt; int i; @@ -1281,7 +1360,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_error_engine *ee = &error->engine[i]; struct drm_i915_gem_request *request; - ee->pid = -1; ee->engine_id = -1; if (!engine) @@ -1296,11 +1374,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, request = i915_gem_find_active_request(engine); if (request) { struct intel_ring *ring; - struct pid *pid; ee->vm = request->ctx->ppgtt ? &request->ctx->ppgtt->base : &ggtt->base; + record_context(&ee->context, request->ctx); + /* We need to copy these to an anonymous buffer * as the simplest method to avoid being overwritten * by userspace. @@ -1318,19 +1397,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, i915_error_object_create(dev_priv, request->ctx->engine[i].state); - pid = request->ctx->pid; - if (pid) { - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(pid, PIDTYPE_PID); - if (task) { - strcpy(ee->comm, task->comm); - ee->pid = task->pid; - } - rcu_read_unlock(); - } - error->simulated |= i915_gem_context_no_error_capture(request->ctx); @@ -1357,7 +1423,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, } static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, struct i915_address_space *vm, int idx) { @@ -1383,7 +1449,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, } static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int cnt = 0, i, j; @@ -1408,7 +1474,7 @@ static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, } static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { struct i915_address_space *vm = &dev_priv->ggtt.base; struct drm_i915_error_buffer *bo; @@ -1439,7 +1505,7 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, } static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { /* Capturing log buf contents won't be useful if logging was disabled */ if (!dev_priv->guc.log.vma || (i915.guc_log_level < 0)) @@ -1451,7 +1517,7 @@ static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, /* Capture all registers which don't fit into another category. */ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int i; @@ -1508,9 +1574,11 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, error->ier = I915_READ(GEN8_DE_MISC_IER); for (i = 0; i < 4; i++) error->gtier[i] = I915_READ(GEN8_GT_IER(i)); + error->ngtier = 4; } else if (HAS_PCH_SPLIT(dev_priv)) { error->ier = I915_READ(DEIER); error->gtier[0] = I915_READ(GTIER); + error->ngtier = 1; } else if (IS_GEN2(dev_priv)) { error->ier = I915_READ16(IER); } else if (!IS_VALLEYVIEW(dev_priv)) { @@ -1521,7 +1589,7 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, } static void i915_error_capture_msg(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, u32 engine_mask, const char *error_msg) { @@ -1534,12 +1602,12 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, "GPU HANG: ecode %d:%d:0x%08x", INTEL_GEN(dev_priv), engine_id, ecode); - if (engine_id != -1 && error->engine[engine_id].pid != -1) + if (engine_id != -1 && error->engine[engine_id].context.pid) len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->engine[engine_id].comm, - error->engine[engine_id].pid); + error->engine[engine_id].context.comm, + error->engine[engine_id].context.pid); scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", reason: %s, action: %s", @@ -1548,8 +1616,12 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, } static void i915_capture_gen_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { + error->awake = dev_priv->gt.awake; + error->wakelock = atomic_read(&dev_priv->pm.wakeref_count); + error->suspended = dev_priv->pm.suspended; + error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU error->iommu = intel_iommu_gfx_mapped; @@ -1562,9 +1634,26 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, sizeof(error->device_info)); } +static __always_inline void dup_param(const char *type, void *x) +{ + if (!__builtin_strcmp(type, "char *")) + *(void **)x = kstrdup(*(void **)x, GFP_ATOMIC); +} + static int capture(void *data) { - struct drm_i915_error_state *error = data; + struct i915_gpu_state *error = data; + + do_gettimeofday(&error->time); + error->boottime = ktime_to_timeval(ktime_get_boottime()); + error->uptime = + ktime_to_timeval(ktime_sub(ktime_get(), + error->i915->gt.last_init_time)); + + error->params = i915; +#define DUP(T, x) dup_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(DUP); +#undef DUP i915_capture_gen_state(error->i915, error); i915_capture_reg_state(error->i915, error); @@ -1574,12 +1663,6 @@ static int capture(void *data) i915_capture_pinned_buffers(error->i915, error); i915_gem_capture_guc_log_buffer(error->i915, error); - do_gettimeofday(&error->time); - error->boottime = ktime_to_timeval(ktime_get_boottime()); - error->uptime = - ktime_to_timeval(ktime_sub(ktime_get(), - error->i915->gt.last_init_time)); - error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); @@ -1588,6 +1671,23 @@ static int capture(void *data) #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) +struct i915_gpu_state * +i915_capture_gpu_state(struct drm_i915_private *i915) +{ + struct i915_gpu_state *error; + + error = kzalloc(sizeof(*error), GFP_ATOMIC); + if (!error) + return NULL; + + kref_init(&error->ref); + error->i915 = i915; + + stop_machine(capture, error, NULL); + + return error; +} + /** * i915_capture_error_state - capture an error record for later analysis * @dev: drm device @@ -1602,7 +1702,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, const char *error_msg) { static bool warned; - struct drm_i915_error_state *error; + struct i915_gpu_state *error; unsigned long flags; if (!i915.error_capture) @@ -1611,18 +1711,12 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, if (READ_ONCE(dev_priv->gpu_error.first_error)) return; - /* Account for pipe specific data like PIPE*STAT */ - error = kzalloc(sizeof(*error), GFP_ATOMIC); + error = i915_capture_gpu_state(dev_priv); if (!error) { DRM_DEBUG_DRIVER("out of memory, not capturing error state\n"); return; } - kref_init(&error->ref); - error->i915 = dev_priv; - - stop_machine(capture, error, NULL); - i915_error_capture_msg(dev_priv, error, engine_mask, error_msg); DRM_INFO("%s\n", error->error_msg); @@ -1636,7 +1730,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, } if (error) { - i915_error_state_free(&error->ref); + __i915_gpu_state_free(&error->ref); return; } @@ -1652,33 +1746,28 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, } } -void i915_error_state_get(struct drm_device *dev, - struct i915_error_state_file_priv *error_priv) +struct i915_gpu_state * +i915_first_error_state(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct i915_gpu_state *error; - spin_lock_irq(&dev_priv->gpu_error.lock); - error_priv->error = dev_priv->gpu_error.first_error; - if (error_priv->error) - kref_get(&error_priv->error->ref); - spin_unlock_irq(&dev_priv->gpu_error.lock); -} + spin_lock_irq(&i915->gpu_error.lock); + error = i915->gpu_error.first_error; + if (error) + i915_gpu_state_get(error); + spin_unlock_irq(&i915->gpu_error.lock); -void i915_error_state_put(struct i915_error_state_file_priv *error_priv) -{ - if (error_priv->error) - kref_put(&error_priv->error->ref, i915_error_state_free); + return error; } -void i915_destroy_error_state(struct drm_i915_private *dev_priv) +void i915_reset_error_state(struct drm_i915_private *i915) { - struct drm_i915_error_state *error; + struct i915_gpu_state *error; - spin_lock_irq(&dev_priv->gpu_error.lock); - error = dev_priv->gpu_error.first_error; - dev_priv->gpu_error.first_error = NULL; - spin_unlock_irq(&dev_priv->gpu_error.lock); + spin_lock_irq(&i915->gpu_error.lock); + error = i915->gpu_error.first_error; + i915->gpu_error.first_error = NULL; + spin_unlock_irq(&i915->gpu_error.lock); - if (error) - kref_put(&error->ref, i915_error_state_free); + i915_gpu_state_put(error); } diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 8ced9e26f075..1642fff9cf13 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -25,19 +25,30 @@ #include "i915_drv.h" #include "intel_uc.h" +#include <trace/events/dma_fence.h> + /** * DOC: GuC-based command submission * - * i915_guc_client: - * We use the term client to avoid confusion with contexts. A i915_guc_client is - * equivalent to GuC object guc_context_desc. This context descriptor is - * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell - * and workqueue for it. Also the process descriptor (guc_process_desc), which - * is mapped to client space. So the client can write Work Item then ring the - * doorbell. + * GuC client: + * A i915_guc_client refers to a submission path through GuC. Currently, there + * is only one of these (the execbuf_client) and this one is charged with all + * submissions to the GuC. This struct is the owner of a doorbell, a process + * descriptor and a workqueue (all of them inside a single gem object that + * contains all required pages for these elements). * - * To simplify the implementation, we allocate one gem object that contains all - * pages for doorbell, process descriptor and workqueue. + * GuC stage descriptor: + * During initialization, the driver allocates a static pool of 1024 such + * descriptors, and shares them with the GuC. + * Currently, there exists a 1:1 mapping between a i915_guc_client and a + * guc_stage_desc (via the client's stage_id), so effectively only one + * gets used. This stage descriptor lets the GuC know about the doorbell, + * workqueue and process descriptor. Theoretically, it also lets the GuC + * know about our HW contexts (context ID, etc...), but we actually + * employ a kind of submission where the GuC uses the LRCA sent via the work + * item instead (the single guc_stage_desc associated to execbuf client + * contains information about the default kernel context only, but this is + * essentially unused). This is called a "proxy" submission. * * The Scratch registers: * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes @@ -60,34 +71,91 @@ * ELSP context descriptor dword into Work Item. * See guc_wq_item_append() * + * ADS: + * The Additional Data Struct (ADS) has pointers for different buffers used by + * the GuC. One single gem object contains the ADS struct itself (guc_ads), the + * scheduling policies (guc_policies), a structure describing a collection of + * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save + * its internal state for sleep. + * */ +static inline bool is_high_priority(struct i915_guc_client* client) +{ + return client->priority <= GUC_CLIENT_PRIORITY_HIGH; +} + +static int __reserve_doorbell(struct i915_guc_client *client) +{ + unsigned long offset; + unsigned long end; + u16 id; + + GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID); + + /* + * The bitmap tracks which doorbell registers are currently in use. + * It is split into two halves; the first half is used for normal + * priority contexts, the second half for high-priority ones. + */ + offset = 0; + end = GUC_NUM_DOORBELLS/2; + if (is_high_priority(client)) { + offset = end; + end += offset; + } + + id = find_next_zero_bit(client->guc->doorbell_bitmap, offset, end); + if (id == end) + return -ENOSPC; + + __set_bit(id, client->guc->doorbell_bitmap); + client->doorbell_id = id; + DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n", + client->stage_id, yesno(is_high_priority(client)), + id); + return 0; +} + +static void __unreserve_doorbell(struct i915_guc_client *client) +{ + GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID); + + __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap); + client->doorbell_id = GUC_DOORBELL_INVALID; +} + /* * Tell the GuC to allocate or deallocate a specific doorbell */ -static int guc_allocate_doorbell(struct intel_guc *guc, - struct i915_guc_client *client) +static int __guc_allocate_doorbell(struct intel_guc *guc, u32 stage_id) { u32 action[] = { INTEL_GUC_ACTION_ALLOCATE_DOORBELL, - client->ctx_index + stage_id }; return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static int guc_release_doorbell(struct intel_guc *guc, - struct i915_guc_client *client) +static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id) { u32 action[] = { INTEL_GUC_ACTION_DEALLOCATE_DOORBELL, - client->ctx_index + stage_id }; return intel_guc_send(guc, action, ARRAY_SIZE(action)); } +static struct guc_stage_desc *__get_stage_desc(struct i915_guc_client *client) +{ + struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr; + + return &base[client->stage_id]; +} + /* * Initialise, update, or clear doorbell data shared with the GuC * @@ -95,107 +163,129 @@ static int guc_release_doorbell(struct intel_guc *guc, * client object which contains the page being used for the doorbell */ -static int guc_update_doorbell_id(struct intel_guc *guc, - struct i915_guc_client *client, - u16 new_id) +static void __update_doorbell_desc(struct i915_guc_client *client, u16 new_id) { - struct sg_table *sg = guc->ctx_pool_vma->pages; - void *doorbell_bitmap = guc->doorbell_bitmap; - struct guc_doorbell_info *doorbell; - struct guc_context_desc desc; - size_t len; + struct guc_stage_desc *desc; - doorbell = client->vaddr + client->doorbell_offset; + /* Update the GuC's idea of the doorbell ID */ + desc = __get_stage_desc(client); + desc->db_id = new_id; +} - if (client->doorbell_id != GUC_INVALID_DOORBELL_ID && - test_bit(client->doorbell_id, doorbell_bitmap)) { - /* Deactivate the old doorbell */ - doorbell->db_status = GUC_DOORBELL_DISABLED; - (void)guc_release_doorbell(guc, client); - __clear_bit(client->doorbell_id, doorbell_bitmap); - } +static struct guc_doorbell_info *__get_doorbell(struct i915_guc_client *client) +{ + return client->vaddr + client->doorbell_offset; +} - /* Update the GuC's idea of the doorbell ID */ - len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), - sizeof(desc) * client->ctx_index); - if (len != sizeof(desc)) - return -EFAULT; - desc.db_id = new_id; - len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), - sizeof(desc) * client->ctx_index); - if (len != sizeof(desc)) - return -EFAULT; - - client->doorbell_id = new_id; - if (new_id == GUC_INVALID_DOORBELL_ID) - return 0; +static bool has_doorbell(struct i915_guc_client *client) +{ + if (client->doorbell_id == GUC_DOORBELL_INVALID) + return false; + + return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); +} - /* Activate the new doorbell */ - __set_bit(new_id, doorbell_bitmap); +static int __create_doorbell(struct i915_guc_client *client) +{ + struct guc_doorbell_info *doorbell; + int err; + + doorbell = __get_doorbell(client); doorbell->db_status = GUC_DOORBELL_ENABLED; doorbell->cookie = client->doorbell_cookie; - return guc_allocate_doorbell(guc, client); + + err = __guc_allocate_doorbell(client->guc, client->stage_id); + if (err) { + doorbell->db_status = GUC_DOORBELL_DISABLED; + doorbell->cookie = 0; + } + return err; } -static void guc_disable_doorbell(struct intel_guc *guc, - struct i915_guc_client *client) +static int __destroy_doorbell(struct i915_guc_client *client) { - (void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID); + struct drm_i915_private *dev_priv = guc_to_i915(client->guc); + struct guc_doorbell_info *doorbell; + u16 db_id = client->doorbell_id; - /* XXX: wait for any interrupts */ - /* XXX: wait for workqueue to drain */ + GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID); + + doorbell = __get_doorbell(client); + doorbell->db_status = GUC_DOORBELL_DISABLED; + doorbell->cookie = 0; + + /* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit + * to go to zero after updating db_status before we call the GuC to + * release the doorbell */ + if (wait_for_us(!(I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID), 10)) + WARN_ONCE(true, "Doorbell never became invalid after disable\n"); + + return __guc_deallocate_doorbell(client->guc, client->stage_id); } -static uint16_t -select_doorbell_register(struct intel_guc *guc, uint32_t priority) +static int create_doorbell(struct i915_guc_client *client) { - /* - * The bitmap tracks which doorbell registers are currently in use. - * It is split into two halves; the first half is used for normal - * priority contexts, the second half for high-priority ones. - * Note that logically higher priorities are numerically less than - * normal ones, so the test below means "is it high-priority?" - */ - const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH); - const uint16_t half = GUC_MAX_DOORBELLS / 2; - const uint16_t start = hi_pri ? half : 0; - const uint16_t end = start + half; - uint16_t id; + int ret; - id = find_next_zero_bit(guc->doorbell_bitmap, end, start); - if (id == end) - id = GUC_INVALID_DOORBELL_ID; + ret = __reserve_doorbell(client); + if (ret) + return ret; - DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n", - hi_pri ? "high" : "normal", id); + __update_doorbell_desc(client, client->doorbell_id); - return id; + ret = __create_doorbell(client); + if (ret) + goto err; + + return 0; + +err: + __update_doorbell_desc(client, GUC_DOORBELL_INVALID); + __unreserve_doorbell(client); + return ret; } -/* - * Select, assign and relase doorbell cachelines - * - * These functions track which doorbell cachelines are in use. - * The data they manipulate is protected by the intel_guc_send lock. - */ +static int destroy_doorbell(struct i915_guc_client *client) +{ + int err; + + GEM_BUG_ON(!has_doorbell(client)); + + /* XXX: wait for any interrupts */ + /* XXX: wait for workqueue to drain */ + + err = __destroy_doorbell(client); + if (err) + return err; + + __update_doorbell_desc(client, GUC_DOORBELL_INVALID); + + __unreserve_doorbell(client); + + return 0; +} -static uint32_t select_doorbell_cacheline(struct intel_guc *guc) +static unsigned long __select_cacheline(struct intel_guc* guc) { - const uint32_t cacheline_size = cache_line_size(); - uint32_t offset; + unsigned long offset; /* Doorbell uses a single cache line within a page */ offset = offset_in_page(guc->db_cacheline); /* Moving to next cache line to reduce contention */ - guc->db_cacheline += cacheline_size; - - DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n", - offset, guc->db_cacheline, cacheline_size); + guc->db_cacheline += cache_line_size(); + DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n", + offset, guc->db_cacheline, cache_line_size()); return offset; } +static inline struct guc_process_desc * +__get_process_desc(struct i915_guc_client *client) +{ + return client->vaddr + client->proc_desc_offset; +} + /* * Initialise the process descriptor shared with the GuC firmware. */ @@ -204,9 +294,7 @@ static void guc_proc_desc_init(struct intel_guc *guc, { struct guc_process_desc *desc; - desc = client->vaddr + client->proc_desc_offset; - - memset(desc, 0, sizeof(*desc)); + desc = memset(__get_process_desc(client), 0, sizeof(*desc)); /* * XXX: pDoorbell and WQVBaseAddress are pointers in process address @@ -217,42 +305,41 @@ static void guc_proc_desc_init(struct intel_guc *guc, desc->wq_base_addr = 0; desc->db_base_addr = 0; - desc->context_id = client->ctx_index; + desc->stage_id = client->stage_id; desc->wq_size_bytes = client->wq_size; desc->wq_status = WQ_STATUS_ACTIVE; desc->priority = client->priority; } /* - * Initialise/clear the context descriptor shared with the GuC firmware. + * Initialise/clear the stage descriptor shared with the GuC firmware. * * This descriptor tells the GuC where (in GGTT space) to find the important * data structures relating to this client (doorbell, process descriptor, * write queue, etc). */ - -static void guc_ctx_desc_init(struct intel_guc *guc, - struct i915_guc_client *client) +static void guc_stage_desc_init(struct intel_guc *guc, + struct i915_guc_client *client) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_engine_cs *engine; struct i915_gem_context *ctx = client->owner; - struct guc_context_desc desc; - struct sg_table *sg; + struct guc_stage_desc *desc; unsigned int tmp; u32 gfx_addr; - memset(&desc, 0, sizeof(desc)); + desc = __get_stage_desc(client); + memset(desc, 0, sizeof(*desc)); - desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL; - desc.context_id = client->ctx_index; - desc.priority = client->priority; - desc.db_id = client->doorbell_id; + desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | GUC_STAGE_DESC_ATTR_KERNEL; + desc->stage_id = client->stage_id; + desc->priority = client->priority; + desc->db_id = client->doorbell_id; for_each_engine_masked(engine, dev_priv, client->engines, tmp) { struct intel_context *ce = &ctx->engine[engine->id]; uint32_t guc_engine_id = engine->guc_id; - struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; + struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; /* TODO: We have a design issue to be solved here. Only when we * receive the first batch, we know which engine is used by the @@ -264,12 +351,22 @@ static void guc_ctx_desc_init(struct intel_guc *guc, if (!ce->state) break; /* XXX: continue? */ + /* + * XXX: When this is a GUC_STAGE_DESC_ATTR_KERNEL client (proxy + * submission or, in other words, not using a direct submission + * model) the KMD's LRCA is not used for any work submission. + * Instead, the GuC uses the LRCA of the user mode context (see + * guc_wq_item_append below). + */ lrc->context_desc = lower_32_bits(ce->lrc_desc); /* The state page is after PPHWSP */ - lrc->ring_lcra = + lrc->ring_lrca = guc_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; - lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | + + /* XXX: In direct submission, the GuC wants the HW context id + * here. In proxy submission, it wants the stage id */ + lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) | (guc_engine_id << GUC_ELC_ENGINE_OFFSET); lrc->ring_begin = guc_ggtt_offset(ce->ring->vma); @@ -277,50 +374,36 @@ static void guc_ctx_desc_init(struct intel_guc *guc, lrc->ring_next_free_location = lrc->ring_begin; lrc->ring_current_tail_pointer_value = 0; - desc.engines_used |= (1 << guc_engine_id); + desc->engines_used |= (1 << guc_engine_id); } DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", - client->engines, desc.engines_used); - WARN_ON(desc.engines_used == 0); + client->engines, desc->engines_used); + WARN_ON(desc->engines_used == 0); /* * The doorbell, process descriptor, and workqueue are all parts * of the client object, which the GuC will reference via the GGTT */ gfx_addr = guc_ggtt_offset(client->vma); - desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + + desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; - desc.db_trigger_cpu = - (uintptr_t)client->vaddr + client->doorbell_offset; - desc.db_trigger_uk = gfx_addr + client->doorbell_offset; - desc.process_desc = gfx_addr + client->proc_desc_offset; - desc.wq_addr = gfx_addr + client->wq_offset; - desc.wq_size = client->wq_size; - - /* - * XXX: Take LRCs from an existing context if this is not an - * IsKMDCreatedContext client - */ - desc.desc_private = (uintptr_t)client; + desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client); + desc->db_trigger_uk = gfx_addr + client->doorbell_offset; + desc->process_desc = gfx_addr + client->proc_desc_offset; + desc->wq_addr = gfx_addr + client->wq_offset; + desc->wq_size = client->wq_size; - /* Pool context is pinned already */ - sg = guc->ctx_pool_vma->pages; - sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), - sizeof(desc) * client->ctx_index); + desc->desc_private = (uintptr_t)client; } -static void guc_ctx_desc_fini(struct intel_guc *guc, - struct i915_guc_client *client) +static void guc_stage_desc_fini(struct intel_guc *guc, + struct i915_guc_client *client) { - struct guc_context_desc desc; - struct sg_table *sg; + struct guc_stage_desc *desc; - memset(&desc, 0, sizeof(desc)); - - sg = guc->ctx_pool_vma->pages; - sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), - sizeof(desc) * client->ctx_index); + desc = __get_stage_desc(client); + memset(desc, 0, sizeof(*desc)); } /** @@ -343,12 +426,11 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) { const size_t wqi_size = sizeof(struct guc_wq_item); struct i915_guc_client *client = request->i915->guc.execbuf_client; - struct guc_process_desc *desc = client->vaddr + - client->proc_desc_offset; + struct guc_process_desc *desc = __get_process_desc(client); u32 freespace; int ret; - spin_lock(&client->wq_lock); + spin_lock_irq(&client->wq_lock); freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); freespace -= client->wq_rsvd; if (likely(freespace >= wqi_size)) { @@ -358,21 +440,27 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) client->no_wq_space++; ret = -EAGAIN; } - spin_unlock(&client->wq_lock); + spin_unlock_irq(&client->wq_lock); return ret; } +static void guc_client_update_wq_rsvd(struct i915_guc_client *client, int size) +{ + unsigned long flags; + + spin_lock_irqsave(&client->wq_lock, flags); + client->wq_rsvd += size; + spin_unlock_irqrestore(&client->wq_lock, flags); +} + void i915_guc_wq_unreserve(struct drm_i915_gem_request *request) { - const size_t wqi_size = sizeof(struct guc_wq_item); + const int wqi_size = sizeof(struct guc_wq_item); struct i915_guc_client *client = request->i915->guc.execbuf_client; GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size); - - spin_lock(&client->wq_lock); - client->wq_rsvd -= wqi_size; - spin_unlock(&client->wq_lock); + guc_client_update_wq_rsvd(client, -wqi_size); } /* Construct a Work Item and append it to the GuC's Work Queue */ @@ -383,19 +471,17 @@ static void guc_wq_item_append(struct i915_guc_client *client, const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size/sizeof(u32) - 1; struct intel_engine_cs *engine = rq->engine; - struct guc_process_desc *desc; + struct guc_process_desc *desc = __get_process_desc(client); struct guc_wq_item *wqi; u32 freespace, tail, wq_off; - desc = client->vaddr + client->proc_desc_offset; - /* Free space is guaranteed, see i915_guc_wq_reserve() above */ freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); GEM_BUG_ON(freespace < wqi_size); /* The GuC firmware wants the tail index in QWords, not bytes */ tail = rq->tail; - GEM_BUG_ON(tail & 7); + assert_ring_tail_valid(rq->ring, rq->tail); tail >>= 3; GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); @@ -428,19 +514,27 @@ static void guc_wq_item_append(struct i915_guc_client *client, /* The GuC wants only the low-order word of the context descriptor */ wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine); - wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; + wqi->submit_element_info = tail << WQ_RING_TAIL_SHIFT; wqi->fence_id = rq->global_seqno; } +static void guc_reset_wq(struct i915_guc_client *client) +{ + struct guc_process_desc *desc = __get_process_desc(client); + + desc->head = 0; + desc->tail = 0; + + client->wq_tail = 0; +} + static int guc_ring_doorbell(struct i915_guc_client *client) { - struct guc_process_desc *desc; + struct guc_process_desc *desc = __get_process_desc(client); union guc_doorbell_qw db_cmp, db_exc, db_ret; union guc_doorbell_qw *db; int attempt = 2, ret = -EAGAIN; - desc = client->vaddr + client->proc_desc_offset; - /* Update the tail so it is visible to GuC */ desc->tail = client->wq_tail; @@ -455,7 +549,7 @@ static int guc_ring_doorbell(struct i915_guc_client *client) db_exc.cookie = 1; /* pointer of current doorbell cacheline */ - db = client->vaddr + client->doorbell_offset; + db = (union guc_doorbell_qw *)__get_doorbell(client); while (attempt--) { /* lets ring the doorbell */ @@ -509,15 +603,16 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) unsigned int engine_id = engine->id; struct intel_guc *guc = &rq->i915->guc; struct i915_guc_client *client = guc->execbuf_client; + unsigned long flags; int b_ret; - spin_lock(&client->wq_lock); - guc_wq_item_append(client, rq); - /* WA to flush out the pending GMADR writes to ring buffer. */ if (i915_vma_is_map_and_fenceable(rq->ring->vma)) POSTING_READ_FW(GUC_STATUS); + spin_lock_irqsave(&client->wq_lock, flags); + + guc_wq_item_append(client, rq); b_ret = guc_ring_doorbell(client); client->submissions[engine_id] += 1; @@ -527,15 +622,104 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) guc->submissions[engine_id] += 1; guc->last_seqno[engine_id] = rq->global_seqno; - spin_unlock(&client->wq_lock); + + spin_unlock_irqrestore(&client->wq_lock, flags); } static void i915_guc_submit(struct drm_i915_gem_request *rq) { - i915_gem_request_submit(rq); + __i915_gem_request_submit(rq); __i915_guc_submit(rq); } +static void nested_enable_signaling(struct drm_i915_gem_request *rq) +{ + /* If we use dma_fence_enable_sw_signaling() directly, lockdep + * detects an ordering issue between the fence lockclass and the + * global_timeline. This circular dependency can only occur via 2 + * different fences (but same fence lockclass), so we use the nesting + * annotation here to prevent the warn, equivalent to the nesting + * inside i915_gem_request_submit() for when we also enable the + * signaler. + */ + + if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags)) + return; + + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); + trace_dma_fence_enable_signal(&rq->fence); + + spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); + intel_engine_enable_signaling(rq); + spin_unlock(&rq->lock); +} + +static bool i915_guc_dequeue(struct intel_engine_cs *engine) +{ + struct execlist_port *port = engine->execlist_port; + struct drm_i915_gem_request *last = port[0].request; + struct rb_node *rb; + bool submit = false; + + spin_lock_irq(&engine->timeline->lock); + rb = engine->execlist_first; + while (rb) { + struct drm_i915_gem_request *rq = + rb_entry(rb, typeof(*rq), priotree.node); + + if (last && rq->ctx != last->ctx) { + if (port != engine->execlist_port) + break; + + i915_gem_request_assign(&port->request, last); + nested_enable_signaling(last); + port++; + } + + rb = rb_next(rb); + rb_erase(&rq->priotree.node, &engine->execlist_queue); + RB_CLEAR_NODE(&rq->priotree.node); + rq->priotree.priority = INT_MAX; + + i915_guc_submit(rq); + trace_i915_gem_request_in(rq, port - engine->execlist_port); + last = rq; + submit = true; + } + if (submit) { + i915_gem_request_assign(&port->request, last); + nested_enable_signaling(last); + engine->execlist_first = rb; + } + spin_unlock_irq(&engine->timeline->lock); + + return submit; +} + +static void i915_guc_irq_handler(unsigned long data) +{ + struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct execlist_port *port = engine->execlist_port; + struct drm_i915_gem_request *rq; + bool submit; + + do { + rq = port[0].request; + while (rq && i915_gem_request_completed(rq)) { + trace_i915_gem_request_out(rq); + i915_gem_request_put(rq); + port[0].request = port[1].request; + port[1].request = NULL; + rq = port[0].request; + } + + submit = false; + if (!port[1].request) + submit = i915_guc_dequeue(engine); + } while (submit); +} + /* * Everything below here is concerned with setup & teardown, and is * therefore not part of the somewhat time-critical batch-submission @@ -584,93 +768,100 @@ err: return vma; } -static void -guc_client_free(struct drm_i915_private *dev_priv, - struct i915_guc_client *client) +/* Check that a doorbell register is in the expected state */ +static bool doorbell_ok(struct intel_guc *guc, u16 db_id) { - struct intel_guc *guc = &dev_priv->guc; - - if (!client) - return; + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 drbregl; + bool valid; - /* - * XXX: wait for any outstanding submissions before freeing memory. - * Be sure to drop any locks - */ + GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID); - if (client->vaddr) { - /* - * If we got as far as setting up a doorbell, make sure we - * shut it down before unmapping & deallocating the memory. - */ - guc_disable_doorbell(guc, client); + drbregl = I915_READ(GEN8_DRBREGL(db_id)); + valid = drbregl & GEN8_DRB_VALID; - i915_gem_object_unpin_map(client->vma->obj); - } + if (test_bit(db_id, guc->doorbell_bitmap) == valid) + return true; - i915_vma_unpin_and_release(&client->vma); + DRM_DEBUG_DRIVER("Doorbell %d has unexpected state (0x%x): valid=%s\n", + db_id, drbregl, yesno(valid)); - if (client->ctx_index != GUC_INVALID_CTX_ID) { - guc_ctx_desc_fini(guc, client); - ida_simple_remove(&guc->ctx_ids, client->ctx_index); - } - - kfree(client); + return false; } -/* Check that a doorbell register is in the expected state */ -static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id) +/* + * If the GuC thinks that the doorbell is unassigned (e.g. because we reset and + * reloaded the GuC FW) we can use this function to tell the GuC to reassign the + * doorbell to the rightful owner. + */ +static int __reset_doorbell(struct i915_guc_client* client, u16 db_id) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - i915_reg_t drbreg = GEN8_DRBREGL(db_id); - uint32_t value = I915_READ(drbreg); - bool enabled = (value & GUC_DOORBELL_ENABLED) != 0; - bool expected = test_bit(db_id, guc->doorbell_bitmap); + int err; - if (enabled == expected) - return true; - - DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n", - db_id, drbreg.reg, value, - expected ? "active" : "inactive"); + __update_doorbell_desc(client, db_id); + err = __create_doorbell(client); + if (!err) + err = __destroy_doorbell(client); - return false; + return err; } /* - * Borrow the first client to set up & tear down each unused doorbell - * in turn, to ensure that all doorbell h/w is (re)initialised. + * Set up & tear down each unused doorbell in turn, to ensure that all doorbell + * HW is (re)initialised. For that end, we might have to borrow the first + * client. Also, tell GuC about all the doorbells in use by all clients. + * We do this because the KMD, the GuC and the doorbell HW can easily go out of + * sync (e.g. we can reset the GuC, but not the doorbel HW). */ -static void guc_init_doorbell_hw(struct intel_guc *guc) +static int guc_init_doorbell_hw(struct intel_guc *guc) { struct i915_guc_client *client = guc->execbuf_client; - uint16_t db_id; - int i, err; - - guc_disable_doorbell(guc, client); + bool recreate_first_client = false; + u16 db_id; + int ret; - for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { - /* Skip if doorbell is OK */ - if (guc_doorbell_check(guc, i)) + /* For unused doorbells, make sure they are disabled */ + for_each_clear_bit(db_id, guc->doorbell_bitmap, GUC_NUM_DOORBELLS) { + if (doorbell_ok(guc, db_id)) continue; - err = guc_update_doorbell_id(guc, client, i); - if (err) - DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n", - i, err); + if (has_doorbell(client)) { + /* Borrow execbuf_client (we will recreate it later) */ + destroy_doorbell(client); + recreate_first_client = true; + } + + ret = __reset_doorbell(client, db_id); + WARN(ret, "Doorbell %u reset failed, err %d\n", db_id, ret); } - db_id = select_doorbell_register(guc, client->priority); - WARN_ON(db_id == GUC_INVALID_DOORBELL_ID); + if (recreate_first_client) { + ret = __reserve_doorbell(client); + if (unlikely(ret)) { + DRM_ERROR("Couldn't re-reserve first client db: %d\n", ret); + return ret; + } - err = guc_update_doorbell_id(guc, client, db_id); - if (err) - DRM_WARN("Failed to restore doorbell to %d, err %d\n", - db_id, err); + __update_doorbell_desc(client, client->doorbell_id); + } - /* Read back & verify all doorbell registers */ - for (i = 0; i < GUC_MAX_DOORBELLS; ++i) - (void)guc_doorbell_check(guc, i); + /* Now for every client (and not only execbuf_client) make sure their + * doorbells are known by the GuC */ + //for (client = client_list; client != NULL; client = client->next) + { + ret = __create_doorbell(client); + if (ret) { + DRM_ERROR("Couldn't recreate client %u doorbell: %d\n", + client->stage_id, ret); + return ret; + } + } + + /* Read back & verify all (used & unused) doorbell registers */ + for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) + WARN_ON(!doorbell_ok(guc, db_id)); + + return 0; } /** @@ -696,49 +887,46 @@ guc_client_alloc(struct drm_i915_private *dev_priv, struct intel_guc *guc = &dev_priv->guc; struct i915_vma *vma; void *vaddr; - uint16_t db_id; + int ret; client = kzalloc(sizeof(*client), GFP_KERNEL); if (!client) - return NULL; + return ERR_PTR(-ENOMEM); - client->owner = ctx; client->guc = guc; + client->owner = ctx; client->engines = engines; client->priority = priority; - client->doorbell_id = GUC_INVALID_DOORBELL_ID; + client->doorbell_id = GUC_DOORBELL_INVALID; + client->wq_offset = GUC_DB_SIZE; + client->wq_size = GUC_WQ_SIZE; + spin_lock_init(&client->wq_lock); - client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, - GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); - if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) { - client->ctx_index = GUC_INVALID_CTX_ID; - goto err; - } + ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, + GFP_KERNEL); + if (ret < 0) + goto err_client; + + client->stage_id = ret; /* The first page is doorbell/proc_desc. Two followed pages are wq. */ vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE); - if (IS_ERR(vma)) - goto err; + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_id; + } /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ client->vma = vma; vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) - goto err; - + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto err_vma; + } client->vaddr = vaddr; - spin_lock_init(&client->wq_lock); - client->wq_offset = GUC_DB_SIZE; - client->wq_size = GUC_WQ_SIZE; - - db_id = select_doorbell_register(guc, client->priority); - if (db_id == GUC_INVALID_DOORBELL_ID) - /* XXX: evict a doorbell instead? */ - goto err; - - client->doorbell_offset = select_doorbell_cacheline(guc); + client->doorbell_offset = __select_cacheline(guc); /* * Since the doorbell only requires a single cacheline, we can save @@ -751,28 +939,47 @@ guc_client_alloc(struct drm_i915_private *dev_priv, client->proc_desc_offset = (GUC_DB_SIZE / 2); guc_proc_desc_init(guc, client); - guc_ctx_desc_init(guc, client); + guc_stage_desc_init(guc, client); - /* For runtime client allocation we need to enable the doorbell. Not - * required yet for the static execbuf_client as this special kernel - * client is enabled from i915_guc_submission_enable(). - * - * guc_update_doorbell_id(guc, client, db_id); - */ + ret = create_doorbell(client); + if (ret) + goto err_vaddr; - DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n", - priority, client, client->engines, client->ctx_index); - DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n", - client->doorbell_id, client->doorbell_offset); + DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: stage_id %u\n", + priority, client, client->engines, client->stage_id); + DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n", + client->doorbell_id, client->doorbell_offset); return client; -err: - guc_client_free(dev_priv, client); - return NULL; +err_vaddr: + i915_gem_object_unpin_map(client->vma->obj); +err_vma: + i915_vma_unpin_and_release(&client->vma); +err_id: + ida_simple_remove(&guc->stage_ids, client->stage_id); +err_client: + kfree(client); + return ERR_PTR(ret); } +static void guc_client_free(struct i915_guc_client *client) +{ + /* + * XXX: wait for any outstanding submissions before freeing memory. + * Be sure to drop any locks + */ + /* FIXME: in many cases, by the time we get here the GuC has been + * reset, so we cannot destroy the doorbell properly. Ignore the + * error message for now */ + destroy_doorbell(client); + guc_stage_desc_fini(client->guc, client); + i915_gem_object_unpin_map(client->vma->obj); + i915_vma_unpin_and_release(&client->vma); + ida_simple_remove(&client->guc->stage_ids, client->stage_id); + kfree(client); +} static void guc_policies_init(struct guc_policies *policies) { @@ -782,7 +989,7 @@ static void guc_policies_init(struct guc_policies *policies) policies->dpc_promote_time = 500000; policies->max_num_work_items = POLICY_MAX_NUM_WI; - for (p = 0; p < GUC_CTX_PRIORITY_NUM; p++) { + for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) { for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) { policy = &policies->policy[p][i]; @@ -796,34 +1003,44 @@ static void guc_policies_init(struct guc_policies *policies) policies->is_valid = 1; } -static void guc_addon_create(struct intel_guc *guc) +static int guc_ads_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct i915_vma *vma; - struct guc_ads *ads; - struct guc_policies *policies; - struct guc_mmio_reg_state *reg_state; + struct page *page; + /* The ads obj includes the struct itself and buffers passed to GuC */ + struct { + struct guc_ads ads; + struct guc_policies policies; + struct guc_mmio_reg_state reg_state; + u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE]; + } __packed *blob; struct intel_engine_cs *engine; enum intel_engine_id id; - struct page *page; - u32 size; + u32 base; - /* The ads obj includes the struct itself and buffers passed to GuC */ - size = sizeof(struct guc_ads) + sizeof(struct guc_policies) + - sizeof(struct guc_mmio_reg_state) + - GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE; + GEM_BUG_ON(guc->ads_vma); - vma = guc->ads_vma; - if (!vma) { - vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(size)); - if (IS_ERR(vma)) - return; + vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob))); + if (IS_ERR(vma)) + return PTR_ERR(vma); - guc->ads_vma = vma; - } + guc->ads_vma = vma; page = i915_vma_first_page(vma); - ads = kmap(page); + blob = kmap(page); + + /* GuC scheduling policies */ + guc_policies_init(&blob->policies); + + /* MMIO reg state */ + for_each_engine(engine, dev_priv, id) { + blob->reg_state.white_list[engine->guc_id].mmio_start = + engine->mmio_base + GUC_MMIO_WHITE_LIST_START; + + /* Nothing to be saved or restored for now. */ + blob->reg_state.white_list[engine->guc_id].count = 0; + } /* * The GuC requires a "Golden Context" when it reinitialises @@ -832,98 +1049,154 @@ static void guc_addon_create(struct intel_guc *guc) * so its address won't change after we've told the GuC where * to find it. */ - engine = dev_priv->engine[RCS]; - ads->golden_context_lrca = engine->status_page.ggtt_offset; + blob->ads.golden_context_lrca = + dev_priv->engine[RCS]->status_page.ggtt_offset; for_each_engine(engine, dev_priv, id) - ads->eng_state_size[engine->guc_id] = intel_lr_context_size(engine); + blob->ads.eng_state_size[engine->guc_id] = + intel_lr_context_size(engine); - /* GuC scheduling policies */ - policies = (void *)ads + sizeof(struct guc_ads); - guc_policies_init(policies); - - ads->scheduler_policies = - guc_ggtt_offset(vma) + sizeof(struct guc_ads); + base = guc_ggtt_offset(vma); + blob->ads.scheduler_policies = base + ptr_offset(blob, policies); + blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer); + blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); - /* MMIO reg state */ - reg_state = (void *)policies + sizeof(struct guc_policies); - - for_each_engine(engine, dev_priv, id) { - reg_state->mmio_white_list[engine->guc_id].mmio_start = - engine->mmio_base + GUC_MMIO_WHITE_LIST_START; - - /* Nothing to be saved or restored for now. */ - reg_state->mmio_white_list[engine->guc_id].count = 0; - } - - ads->reg_state_addr = ads->scheduler_policies + - sizeof(struct guc_policies); + kunmap(page); - ads->reg_state_buffer = ads->reg_state_addr + - sizeof(struct guc_mmio_reg_state); + return 0; +} - kunmap(page); +static void guc_ads_destroy(struct intel_guc *guc) +{ + i915_vma_unpin_and_release(&guc->ads_vma); } /* - * Set up the memory resources to be shared with the GuC. At this point, - * we require just one object that can be mapped through the GGTT. + * Set up the memory resources to be shared with the GuC (via the GGTT) + * at firmware loading time. */ int i915_guc_submission_init(struct drm_i915_private *dev_priv) { - const size_t ctxsize = sizeof(struct guc_context_desc); - const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize; - const size_t gemsize = round_up(poolsize, PAGE_SIZE); struct intel_guc *guc = &dev_priv->guc; struct i915_vma *vma; + void *vaddr; + int ret; - if (!HAS_GUC_SCHED(dev_priv)) + if (guc->stage_desc_pool) return 0; - /* Wipe bitmap & delete client in case of reinitialisation */ - bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS); - i915_guc_submission_disable(dev_priv); - - if (!i915.enable_guc_submission) - return 0; /* not enabled */ - - if (guc->ctx_pool_vma) - return 0; /* already allocated */ - - vma = intel_guc_allocate_vma(guc, gemsize); + vma = intel_guc_allocate_vma(guc, + PAGE_ALIGN(sizeof(struct guc_stage_desc) * + GUC_MAX_STAGE_DESCRIPTORS)); if (IS_ERR(vma)) return PTR_ERR(vma); - guc->ctx_pool_vma = vma; - ida_init(&guc->ctx_ids); - intel_guc_log_create(guc); - guc_addon_create(guc); - - guc->execbuf_client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, - GUC_CTX_PRIORITY_KMD_NORMAL, - dev_priv->kernel_context); - if (!guc->execbuf_client) { - DRM_ERROR("Failed to create GuC client for execbuf!\n"); - goto err; + guc->stage_desc_pool = vma; + + vaddr = i915_gem_object_pin_map(guc->stage_desc_pool->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto err_vma; } + guc->stage_desc_pool_vaddr = vaddr; + + ret = intel_guc_log_create(guc); + if (ret < 0) + goto err_vaddr; + + ret = guc_ads_create(guc); + if (ret < 0) + goto err_log; + + ida_init(&guc->stage_ids); + return 0; -err: - i915_guc_submission_fini(dev_priv); - return -ENOMEM; +err_log: + intel_guc_log_destroy(guc); +err_vaddr: + i915_gem_object_unpin_map(guc->stage_desc_pool->obj); +err_vma: + i915_vma_unpin_and_release(&guc->stage_desc_pool); + return ret; } -static void guc_reset_wq(struct i915_guc_client *client) +void i915_guc_submission_fini(struct drm_i915_private *dev_priv) { - struct guc_process_desc *desc = client->vaddr + - client->proc_desc_offset; + struct intel_guc *guc = &dev_priv->guc; - desc->head = 0; - desc->tail = 0; + ida_destroy(&guc->stage_ids); + guc_ads_destroy(guc); + intel_guc_log_destroy(guc); + i915_gem_object_unpin_map(guc->stage_desc_pool->obj); + i915_vma_unpin_and_release(&guc->stage_desc_pool); +} - client->wq_tail = 0; +static void guc_interrupts_capture(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int irqs; + + /* tell all command streamers to forward interrupts (but not vblank) to GuC */ + irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); + for_each_engine(engine, dev_priv, id) + I915_WRITE(RING_MODE_GEN7(engine), irqs); + + /* route USER_INTERRUPT to Host, all others are sent to GuC. */ + irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + /* These three registers have the same bit definitions */ + I915_WRITE(GUC_BCS_RCS_IER, ~irqs); + I915_WRITE(GUC_VCS2_VCS1_IER, ~irqs); + I915_WRITE(GUC_WD_VECS_IER, ~irqs); + + /* + * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all + * (unmasked) PM interrupts to the GuC. All other bits of this + * register *disable* generation of a specific interrupt. + * + * 'pm_intrmsk_mbz' indicates bits that are NOT to be set when + * writing to the PM interrupt mask register, i.e. interrupts + * that must not be disabled. + * + * If the GuC is handling these interrupts, then we must not let + * the PM code disable ANY interrupt that the GuC is expecting. + * So for each ENABLED (0) bit in this register, we must SET the + * bit in pm_intrmsk_mbz so that it's left enabled for the GuC. + * GuC needs ARAT expired interrupt unmasked hence it is set in + * pm_intrmsk_mbz. + * + * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will + * result in the register bit being left SET! + */ + dev_priv->rps.pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; + dev_priv->rps.pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; +} + +static void guc_interrupts_release(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int irqs; + + /* + * tell all command streamers NOT to forward interrupts or vblank + * to GuC. + */ + irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); + irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); + for_each_engine(engine, dev_priv, id) + I915_WRITE(RING_MODE_GEN7(engine), irqs); + + /* route all GT interrupts to the host */ + I915_WRITE(GUC_BCS_RCS_IER, 0); + I915_WRITE(GUC_VCS2_VCS1_IER, 0); + I915_WRITE(GUC_WD_VECS_IER, 0); + + dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; } int i915_guc_submission_enable(struct drm_i915_private *dev_priv) @@ -932,60 +1205,74 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) struct i915_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine; enum intel_engine_id id; + int err; + + if (!client) { + client = guc_client_alloc(dev_priv, + INTEL_INFO(dev_priv)->ring_mask, + GUC_CLIENT_PRIORITY_KMD_NORMAL, + dev_priv->kernel_context); + if (IS_ERR(client)) { + DRM_ERROR("Failed to create GuC client for execbuf!\n"); + return PTR_ERR(client); + } - if (!client) - return -ENODEV; + guc->execbuf_client = client; + } - intel_guc_sample_forcewake(guc); + err = intel_guc_sample_forcewake(guc); + if (err) + goto err_execbuf_client; guc_reset_wq(client); - guc_init_doorbell_hw(guc); + + err = guc_init_doorbell_hw(guc); + if (err) + goto err_execbuf_client; /* Take over from manual control of ELSP (execlists) */ + guc_interrupts_capture(dev_priv); + for_each_engine(engine, dev_priv, id) { + const int wqi_size = sizeof(struct guc_wq_item); struct drm_i915_gem_request *rq; - engine->submit_request = i915_guc_submit; - engine->schedule = NULL; + /* The tasklet was initialised by execlists, and may be in + * a state of flux (across a reset) and so we just want to + * take over the callback without changing any other state + * in the tasklet. + */ + engine->irq_tasklet.func = i915_guc_irq_handler; + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); /* Replay the current set of previously submitted requests */ + spin_lock_irq(&engine->timeline->lock); list_for_each_entry(rq, &engine->timeline->requests, link) { - client->wq_rsvd += sizeof(struct guc_wq_item); + guc_client_update_wq_rsvd(client, wqi_size); __i915_guc_submit(rq); } + spin_unlock_irq(&engine->timeline->lock); } return 0; + +err_execbuf_client: + guc_client_free(guc->execbuf_client); + guc->execbuf_client = NULL; + return err; } void i915_guc_submission_disable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - if (!guc->execbuf_client) - return; + guc_interrupts_release(dev_priv); /* Revert back to manual ELSP submission */ - intel_execlists_enable_submission(dev_priv); -} - -void i915_guc_submission_fini(struct drm_i915_private *dev_priv) -{ - struct intel_guc *guc = &dev_priv->guc; - struct i915_guc_client *client; + intel_engines_reset_default_submission(dev_priv); - client = fetch_and_zero(&guc->execbuf_client); - if (!client) - return; - - guc_client_free(dev_priv, client); - - i915_vma_unpin_and_release(&guc->ads_vma); - i915_vma_unpin_and_release(&guc->log.vma); - - if (guc->ctx_pool_vma) - ida_destroy(&guc->ctx_ids); - i915_vma_unpin_and_release(&guc->ctx_pool_vma); + guc_client_free(guc->execbuf_client); + guc->execbuf_client = NULL; } /** @@ -1014,7 +1301,6 @@ int intel_guc_suspend(struct drm_i915_private *dev_priv) return intel_guc_send(guc, data, ARRAY_SIZE(data)); } - /** * intel_guc_resume() - notify GuC resuming from suspend state * @dev_priv: i915 device private @@ -1040,5 +1326,3 @@ int intel_guc_resume(struct drm_i915_private *dev_priv) return intel_guc_send(guc, data, ARRAY_SIZE(data)); } - - diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b6c886ac901b..fd97fe00cd0d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -180,7 +180,7 @@ i915_hotplug_interrupt_update_locked(struct drm_i915_private *dev_priv, { uint32_t val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(bits & ~mask); val = I915_READ(PORT_HOTPLUG_EN); @@ -222,7 +222,7 @@ void ilk_update_display_irq(struct drm_i915_private *dev_priv, { uint32_t new_val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -250,7 +250,7 @@ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, uint32_t interrupt_mask, uint32_t enabled_irq_mask) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -302,7 +302,7 @@ static void snb_update_pm_irq(struct drm_i915_private *dev_priv, WARN_ON(enabled_irq_mask & ~interrupt_mask); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); new_val = dev_priv->pm_imr; new_val &= ~interrupt_mask; @@ -340,7 +340,7 @@ void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) { i915_reg_t reg = gen6_pm_iir(dev_priv); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); I915_WRITE(reg, reset_mask); I915_WRITE(reg, reset_mask); @@ -349,7 +349,7 @@ void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); dev_priv->pm_ier |= enable_mask; I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier); @@ -359,7 +359,7 @@ void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_mask) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); dev_priv->pm_ier &= ~disable_mask; __gen6_mask_pm_irq(dev_priv, disable_mask); @@ -389,11 +389,6 @@ void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) spin_unlock_irq(&dev_priv->irq_lock); } -u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask) -{ - return (mask & ~dev_priv->rps.pm_intr_keep); -} - void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) { if (!READ_ONCE(dev_priv->rps.interrupts_enabled)) @@ -463,7 +458,7 @@ static void bdw_update_port_irq(struct drm_i915_private *dev_priv, uint32_t new_val; uint32_t old_val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -496,7 +491,7 @@ void bdw_update_pipe_irq(struct drm_i915_private *dev_priv, { uint32_t new_val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -530,7 +525,7 @@ void ibx_display_interrupt_update(struct drm_i915_private *dev_priv, WARN_ON(enabled_irq_mask & ~interrupt_mask); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (WARN_ON(!intel_irqs_enabled(dev_priv))) return; @@ -546,7 +541,7 @@ __i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, i915_reg_t reg = PIPESTAT(pipe); u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(!intel_irqs_enabled(dev_priv)); if (WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK || @@ -573,7 +568,7 @@ __i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, i915_reg_t reg = PIPESTAT(pipe); u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(!intel_irqs_enabled(dev_priv)); if (WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK || @@ -728,6 +723,7 @@ static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe) struct intel_crtc *intel_crtc = intel_get_crtc_for_pipe(dev_priv, pipe); const struct drm_display_mode *mode = &intel_crtc->base.hwmode; + unsigned long irqflags; htotal = mode->crtc_htotal; hsync_start = mode->crtc_hsync_start; @@ -744,17 +740,21 @@ static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe) high_frame = PIPEFRAME(pipe); low_frame = PIPEFRAMEPIXEL(pipe); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + /* * High & low register fields aren't synchronized, so make sure * we get a low value that's stable across two reads of the high * register. */ do { - high1 = I915_READ(high_frame) & PIPE_FRAME_HIGH_MASK; - low = I915_READ(low_frame); - high2 = I915_READ(high_frame) & PIPE_FRAME_HIGH_MASK; + high1 = I915_READ_FW(high_frame) & PIPE_FRAME_HIGH_MASK; + low = I915_READ_FW(low_frame); + high2 = I915_READ_FW(high_frame) & PIPE_FRAME_HIGH_MASK; } while (high1 != high2); + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + high1 >>= PIPE_FRAME_HIGH_SHIFT; pixel = low & PIPE_PIXEL_MASK; low >>= PIPE_FRAME_LOW_SHIFT; @@ -783,6 +783,9 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) enum pipe pipe = crtc->pipe; int position, vtotal; + if (!crtc->active) + return -1; + vtotal = mode->crtc_vtotal; if (mode->flags & DRM_MODE_FLAG_INTERLACE) vtotal /= 2; @@ -809,8 +812,7 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) for (i = 0; i < 100; i++) { udelay(1); - temp = __raw_i915_read32(dev_priv, PIPEDSL(pipe)) & - DSL_LINEMASK_GEN3; + temp = I915_READ_FW(PIPEDSL(pipe)) & DSL_LINEMASK_GEN3; if (temp != position) { position = temp; break; @@ -1033,15 +1035,50 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { - smp_store_mb(engine->breadcrumbs.irq_posted, true); - if (intel_engine_wakeup(engine)) - trace_i915_gem_request_notify(engine); + struct drm_i915_gem_request *rq = NULL; + struct intel_wait *wait; + + atomic_inc(&engine->irq_count); + set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); + + spin_lock(&engine->breadcrumbs.irq_lock); + wait = engine->breadcrumbs.irq_wait; + if (wait) { + /* We use a callback from the dma-fence to submit + * requests after waiting on our own requests. To + * ensure minimum delay in queuing the next request to + * hardware, signal the fence now rather than wait for + * the signaler to be woken up. We still wake up the + * waiter in order to handle the irq-seqno coherency + * issues (we may receive the interrupt before the + * seqno is written, see __i915_request_irq_complete()) + * and to handle coalescing of multiple seqno updates + * and many waiters. + */ + if (i915_seqno_passed(intel_engine_get_seqno(engine), + wait->seqno) && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &wait->request->fence.flags)) + rq = i915_gem_request_get(wait->request); + + wake_up_process(wait->tsk); + } else { + __intel_engine_disarm_breadcrumbs(engine); + } + spin_unlock(&engine->breadcrumbs.irq_lock); + + if (rq) { + dma_fence_signal(&rq->fence); + i915_gem_request_put(rq); + } + + trace_intel_engine_notify(engine, wait); } static void vlv_c0_read(struct drm_i915_private *dev_priv, struct intel_rps_ei *ei) { - ei->cz_clock = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP); + ei->ktime = ktime_get_raw(); ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT); ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); } @@ -1061,18 +1098,13 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) return 0; vlv_c0_read(dev_priv, &now); - if (now.cz_clock == 0) - return 0; - if (prev->cz_clock) { + if (prev->ktime) { u64 time, c0; - unsigned int mul; + u32 render, media; - mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to threshold% */ - if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) - mul <<= 8; + time = ktime_us_delta(now.ktime, prev->ktime); - time = now.cz_clock - prev->cz_clock; time *= dev_priv->czclk_freq; /* Workload can be split between render + media, @@ -1080,9 +1112,10 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) * mesa. To account for this we need to combine both engines * into our activity counter. */ - c0 = now.render_c0 - prev->render_c0; - c0 += now.media_c0 - prev->media_c0; - c0 *= mul; + render = now.render_c0 - prev->render_c0; + media = now.media_c0 - prev->media_c0; + c0 = max(render, media); + c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ if (c0 > time * dev_priv->rps.up_threshold) events = GEN6_PM_RP_UP_THRESHOLD; @@ -1110,30 +1143,21 @@ static void gen6_pm_rps_work(struct work_struct *work) { struct drm_i915_private *dev_priv = container_of(work, struct drm_i915_private, rps.work); - bool client_boost; + bool client_boost = false; int new_delay, adj, min, max; - u32 pm_iir; + u32 pm_iir = 0; spin_lock_irq(&dev_priv->irq_lock); - /* Speed up work cancelation during disabling rps interrupts. */ - if (!dev_priv->rps.interrupts_enabled) { - spin_unlock_irq(&dev_priv->irq_lock); - return; + if (dev_priv->rps.interrupts_enabled) { + pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir); + client_boost = fetch_and_zero(&dev_priv->rps.client_boost); } - - pm_iir = dev_priv->rps.pm_iir; - dev_priv->rps.pm_iir = 0; - /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ - gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events); - client_boost = dev_priv->rps.client_boost; - dev_priv->rps.client_boost = false; spin_unlock_irq(&dev_priv->irq_lock); /* Make sure we didn't queue anything we're not going to process. */ WARN_ON(pm_iir & ~dev_priv->pm_rps_events); - if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) - return; + goto out; mutex_lock(&dev_priv->rps.hw_lock); @@ -1156,20 +1180,12 @@ static void gen6_pm_rps_work(struct work_struct *work) if (new_delay >= dev_priv->rps.max_freq_softlimit) adj = 0; - /* - * For better performance, jump directly - * to RPe if we're below it. - */ - if (new_delay < dev_priv->rps.efficient_freq - adj) { - new_delay = dev_priv->rps.efficient_freq; - adj = 0; - } } else if (client_boost || any_waiters(dev_priv)) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) new_delay = dev_priv->rps.efficient_freq; - else + else if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit) new_delay = dev_priv->rps.min_freq_softlimit; adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { @@ -1192,9 +1208,19 @@ static void gen6_pm_rps_work(struct work_struct *work) new_delay += adj; new_delay = clamp_t(int, new_delay, min, max); - intel_set_rps(dev_priv, new_delay); + if (intel_set_rps(dev_priv, new_delay)) { + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); + dev_priv->rps.last_adj = 0; + } mutex_unlock(&dev_priv->rps.hw_lock); + +out: + /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->rps.interrupts_enabled) + gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events); + spin_unlock_irq(&dev_priv->irq_lock); } @@ -1330,10 +1356,20 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, static __always_inline void gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) { - if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) + bool tasklet = false; + + if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { + set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet = true; + } + + if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { notify_ring(engine); - if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) - tasklet_schedule(&engine->irq_tasklet); + tasklet |= i915.enable_guc_submission; + } + + if (tasklet) + tasklet_hi_schedule(&engine->irq_tasklet); } static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, @@ -1706,8 +1742,8 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) I915_WRITE(SOFT_SCRATCH(15), msg & ~flush); /* Handle flush interrupt in bottom half */ - queue_work(dev_priv->guc.log.flush_wq, - &dev_priv->guc.log.flush_work); + queue_work(dev_priv->guc.log.runtime.flush_wq, + &dev_priv->guc.log.runtime.flush_work); dev_priv->guc.log.flush_interrupt_count++; } else { @@ -2596,22 +2632,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) return ret; } -static void i915_error_wake_up(struct drm_i915_private *dev_priv) -{ - /* - * Notify all waiters for GPU completion events that reset state has - * been changed, and that they need to restart their wait after - * checking for potential errors (and bail out to drop locks if there is - * a gpu reset pending so that i915_error_work_func can acquire them). - */ - - /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */ - wake_up_all(&dev_priv->gpu_error.wait_queue); - - /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */ - wake_up_all(&dev_priv->pending_flip_queue); -} - /** * i915_reset_and_wakeup - do process context error handling work * @dev_priv: i915 device private @@ -2631,16 +2651,11 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("resetting chip\n"); kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); - /* - * In most cases it's guaranteed that we get here with an RPM - * reference held, for example because there is a pending GPU - * request that won't finish until the reset is done. This - * isn't the case at least when we get here by doing a - * simulated reset via debugs, so get an RPM reference. - */ - intel_runtime_pm_get(dev_priv); intel_prepare_reset(dev_priv); + set_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags); + wake_up_all(&dev_priv->gpu_error.wait_queue); + do { /* * All state reset _must_ be completed before we update the @@ -2655,12 +2670,11 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) /* We need to wait for anyone holding the lock to wakeup */ } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags, - I915_RESET_IN_PROGRESS, + I915_RESET_HANDOFF, TASK_UNINTERRUPTIBLE, HZ)); intel_finish_reset(dev_priv); - intel_runtime_pm_put(dev_priv); if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) kobject_uevent_env(kobj, @@ -2670,6 +2684,7 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) * Note: The wake_up also serves as a memory barrier so that * waiters see the updated value of the dev_priv->gpu_error. */ + clear_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags); wake_up_all(&dev_priv->gpu_error.reset_queue); } @@ -2747,31 +2762,29 @@ void i915_handle_error(struct drm_i915_private *dev_priv, vscnprintf(error_msg, sizeof(error_msg), fmt, args); va_end(args); + /* + * In most cases it's guaranteed that we get here with an RPM + * reference held, for example because there is a pending GPU + * request that won't finish until the reset is done. This + * isn't the case at least when we get here by doing a + * simulated reset via debugfs, so get an RPM reference. + */ + intel_runtime_pm_get(dev_priv); + i915_capture_error_state(dev_priv, engine_mask, error_msg); i915_clear_error_registers(dev_priv); if (!engine_mask) - return; + goto out; - if (test_and_set_bit(I915_RESET_IN_PROGRESS, + if (test_and_set_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) - return; - - /* - * Wakeup waiting processes so that the reset function - * i915_reset_and_wakeup doesn't deadlock trying to grab - * various locks. By bumping the reset counter first, the woken - * processes will see a reset in progress and back off, - * releasing their locks and then wait for the reset completion. - * We must do this for _all_ gpu waiters that might hold locks - * that the reset work needs to acquire. - * - * Note: The wake_up also provides a memory barrier to ensure that the - * waiters see the updated value of the reset flags. - */ - i915_error_wake_up(dev_priv); + goto out; i915_reset_and_wakeup(dev_priv); + +out: + intel_runtime_pm_put(dev_priv); } /* Called from drm generic code, passed 'crtc' which @@ -3089,19 +3102,9 @@ static u32 intel_hpd_enabled_irqs(struct drm_i915_private *dev_priv, return enabled_irqs; } -static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) +static void ibx_hpd_detection_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, hotplug, enabled_irqs; - - if (HAS_PCH_IBX(dev_priv)) { - hotplug_irqs = SDE_HOTPLUG_MASK; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ibx); - } else { - hotplug_irqs = SDE_HOTPLUG_MASK_CPT; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_cpt); - } - - ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); + u32 hotplug; /* * Enable digital hotplug on the PCH, and configure the DP short pulse @@ -3109,10 +3112,12 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) * The pulse duration bits are reserved on LPT+. */ hotplug = I915_READ(PCH_PORT_HOTPLUG); - hotplug &= ~(PORTD_PULSE_DURATION_MASK|PORTC_PULSE_DURATION_MASK|PORTB_PULSE_DURATION_MASK); - hotplug |= PORTD_HOTPLUG_ENABLE | PORTD_PULSE_DURATION_2ms; - hotplug |= PORTC_HOTPLUG_ENABLE | PORTC_PULSE_DURATION_2ms; + hotplug &= ~(PORTB_PULSE_DURATION_MASK | + PORTC_PULSE_DURATION_MASK | + PORTD_PULSE_DURATION_MASK); hotplug |= PORTB_HOTPLUG_ENABLE | PORTB_PULSE_DURATION_2ms; + hotplug |= PORTC_HOTPLUG_ENABLE | PORTC_PULSE_DURATION_2ms; + hotplug |= PORTD_HOTPLUG_ENABLE | PORTD_PULSE_DURATION_2ms; /* * When CPU and PCH are on the same package, port A * HPD must be enabled in both north and south. @@ -3122,6 +3127,23 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) I915_WRITE(PCH_PORT_HOTPLUG, hotplug); } +static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug_irqs, enabled_irqs; + + if (HAS_PCH_IBX(dev_priv)) { + hotplug_irqs = SDE_HOTPLUG_MASK; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ibx); + } else { + hotplug_irqs = SDE_HOTPLUG_MASK_CPT; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_cpt); + } + + ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); + + ibx_hpd_detection_setup(dev_priv); +} + static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) { u32 hotplug; @@ -3151,9 +3173,25 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) spt_hpd_detection_setup(dev_priv); } +static void ilk_hpd_detection_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug; + + /* + * Enable digital hotplug on the CPU, and configure the DP short pulse + * duration to 2ms (which is the minimum in the Display Port spec) + * The pulse duration bits are reserved on HSW+. + */ + hotplug = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); + hotplug &= ~DIGITAL_PORTA_PULSE_DURATION_MASK; + hotplug |= DIGITAL_PORTA_HOTPLUG_ENABLE | + DIGITAL_PORTA_PULSE_DURATION_2ms; + I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, hotplug); +} + static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, hotplug, enabled_irqs; + u32 hotplug_irqs, enabled_irqs; if (INTEL_GEN(dev_priv) >= 8) { hotplug_irqs = GEN8_PORT_DP_A_HOTPLUG; @@ -3172,15 +3210,7 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); } - /* - * Enable digital hotplug on the CPU, and configure the DP short pulse - * duration to 2ms (which is the minimum in the Display Port spec) - * The pulse duration bits are reserved on HSW+. - */ - hotplug = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); - hotplug &= ~DIGITAL_PORTA_PULSE_DURATION_MASK; - hotplug |= DIGITAL_PORTA_HOTPLUG_ENABLE | DIGITAL_PORTA_PULSE_DURATION_2ms; - I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, hotplug); + ilk_hpd_detection_setup(dev_priv); ibx_hpd_irq_setup(dev_priv); } @@ -3251,7 +3281,7 @@ static void ibx_irq_postinstall(struct drm_device *dev) if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || HAS_PCH_LPT(dev_priv)) - ; /* TODO: Enable HPD detection on older PCH platforms too */ + ibx_hpd_detection_setup(dev_priv); else spt_hpd_detection_setup(dev_priv); } @@ -3328,6 +3358,8 @@ static int ironlake_irq_postinstall(struct drm_device *dev) gen5_gt_irq_postinstall(dev); + ilk_hpd_detection_setup(dev_priv); + ibx_irq_postinstall(dev); if (IS_IRONLAKE_M(dev_priv)) { @@ -3346,7 +3378,7 @@ static int ironlake_irq_postinstall(struct drm_device *dev) void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (dev_priv->display_irqs_enabled) return; @@ -3361,7 +3393,7 @@ void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv) void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (!dev_priv->display_irqs_enabled) return; @@ -3468,6 +3500,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (IS_GEN9_LP(dev_priv)) bxt_hpd_detection_setup(dev_priv); + else if (IS_BROADWELL(dev_priv)) + ilk_hpd_detection_setup(dev_priv); } static int gen8_irq_postinstall(struct drm_device *dev) @@ -4035,7 +4069,7 @@ static void i915_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_en; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); /* Note HDMI and DP share hotplug bits */ /* enable bits are the same for all generations */ @@ -4215,24 +4249,23 @@ void intel_irq_init(struct drm_i915_private *dev_priv) else dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; - dev_priv->rps.pm_intr_keep = 0; + dev_priv->rps.pm_intrmsk_mbz = 0; /* - * SNB,IVB can while VLV,CHV may hard hang on looping batchbuffer + * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer * if GEN6_PM_UP_EI_EXPIRED is masked. * * TODO: verify if this can be reproduced on VLV,CHV. */ - if (INTEL_INFO(dev_priv)->gen <= 7 && !IS_HASWELL(dev_priv)) - dev_priv->rps.pm_intr_keep |= GEN6_PM_RP_UP_EI_EXPIRED; + if (INTEL_INFO(dev_priv)->gen <= 7) + dev_priv->rps.pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; if (INTEL_INFO(dev_priv)->gen >= 8) - dev_priv->rps.pm_intr_keep |= GEN8_PMINTR_REDIRECT_TO_GUC; + dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; if (IS_GEN2(dev_priv)) { /* Gen2 doesn't have a hardware frame counter */ dev->max_vblank_count = 0; - dev->driver->get_vblank_counter = drm_vblank_no_hw_counter; } else if (IS_G4X(dev_priv) || INTEL_INFO(dev_priv)->gen >= 5) { dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */ dev->driver->get_vblank_counter = g4x_get_vblank_counter; @@ -4259,6 +4292,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) dev_priv->display_irqs_enabled = false; + dev_priv->hotplug.hpd_storm_threshold = HPD_STORM_DEFAULT_THRESHOLD; + dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 0e280fbd52f1..b6a7e363d076 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -59,6 +59,8 @@ struct i915_params i915 __read_mostly = { .enable_guc_loading = 0, .enable_guc_submission = 0, .guc_log_level = -1, + .guc_firmware_path = NULL, + .huc_firmware_path = NULL, .enable_dp_mst = true, .inject_load_failure = 0, .enable_dpcd_backlight = false, @@ -145,7 +147,7 @@ MODULE_PARM_DESC(enable_psr, "Enable PSR " "(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) " "Default: -1 (use per-chip default)"); -module_param_named_unsafe(alpha_support, i915.alpha_support, int, 0400); +module_param_named_unsafe(alpha_support, i915.alpha_support, bool, 0400); MODULE_PARM_DESC(alpha_support, "Enable alpha quality driver support for latest hardware. " "See also CONFIG_DRM_I915_ALPHA_SUPPORT."); @@ -205,9 +207,9 @@ module_param_named(verbose_state_checks, i915.verbose_state_checks, bool, 0600); MODULE_PARM_DESC(verbose_state_checks, "Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions."); -module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0600); +module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0400); MODULE_PARM_DESC(nuclear_pageflip, - "Force atomic modeset functionality; asynchronous mode is not yet supported. (default: false)."); + "Force enable atomic functionality on platforms that don't have full support yet."); /* WA to get away with the default setting in VBT for early platforms.Will be removed */ module_param_named_unsafe(edp_vswing, i915.edp_vswing, int, 0400); @@ -230,6 +232,14 @@ module_param_named(guc_log_level, i915.guc_log_level, int, 0400); MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (-1:disabled (default), 0-3:enabled)"); +module_param_named_unsafe(guc_firmware_path, i915.guc_firmware_path, charp, 0400); +MODULE_PARM_DESC(guc_firmware_path, + "GuC firmware path to use instead of the default one"); + +module_param_named_unsafe(huc_firmware_path, i915.huc_firmware_path, charp, 0400); +MODULE_PARM_DESC(huc_firmware_path, + "HuC firmware path to use instead of the default one"); + module_param_named_unsafe(enable_dp_mst, i915.enable_dp_mst, bool, 0600); MODULE_PARM_DESC(enable_dp_mst, "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 8e433de04679..34148cc8637c 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -27,46 +27,53 @@ #include <linux/cache.h> /* for __read_mostly */ +#define I915_PARAMS_FOR_EACH(func) \ + func(int, modeset); \ + func(int, panel_ignore_lid); \ + func(int, semaphores); \ + func(int, lvds_channel_mode); \ + func(int, panel_use_ssc); \ + func(int, vbt_sdvo_panel_type); \ + func(int, enable_rc6); \ + func(int, enable_dc); \ + func(int, enable_fbc); \ + func(int, enable_ppgtt); \ + func(int, enable_execlists); \ + func(int, enable_psr); \ + func(int, disable_power_well); \ + func(int, enable_ips); \ + func(int, invert_brightness); \ + func(int, enable_guc_loading); \ + func(int, enable_guc_submission); \ + func(int, guc_log_level); \ + func(char *, guc_firmware_path); \ + func(char *, huc_firmware_path); \ + func(int, use_mmio_flip); \ + func(int, mmio_debug); \ + func(int, edp_vswing); \ + func(unsigned int, inject_load_failure); \ + /* leave bools at the end to not create holes */ \ + func(bool, alpha_support); \ + func(bool, enable_cmd_parser); \ + func(bool, enable_hangcheck); \ + func(bool, fastboot); \ + func(bool, prefault_disable); \ + func(bool, load_detect_test); \ + func(bool, force_reset_modeset_test); \ + func(bool, reset); \ + func(bool, error_capture); \ + func(bool, disable_display); \ + func(bool, verbose_state_checks); \ + func(bool, nuclear_pageflip); \ + func(bool, enable_dp_mst); \ + func(bool, enable_dpcd_backlight); \ + func(bool, enable_gvt) + +#define MEMBER(T, member) T member struct i915_params { - int modeset; - int panel_ignore_lid; - int semaphores; - int lvds_channel_mode; - int panel_use_ssc; - int vbt_sdvo_panel_type; - int enable_rc6; - int enable_dc; - int enable_fbc; - int enable_ppgtt; - int enable_execlists; - int enable_psr; - unsigned int alpha_support; - int disable_power_well; - int enable_ips; - int invert_brightness; - int enable_guc_loading; - int enable_guc_submission; - int guc_log_level; - int use_mmio_flip; - int mmio_debug; - int edp_vswing; - unsigned int inject_load_failure; - /* leave bools at the end to not create holes */ - bool enable_cmd_parser; - bool enable_hangcheck; - bool fastboot; - bool prefault_disable; - bool load_detect_test; - bool force_reset_modeset_test; - bool reset; - bool error_capture; - bool disable_display; - bool verbose_state_checks; - bool nuclear_pageflip; - bool enable_dp_mst; - bool enable_dpcd_backlight; - bool enable_gvt; + I915_PARAMS_FOR_EACH(MEMBER); }; +#undef MEMBER extern struct i915_params i915 __read_mostly; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 9bbbd4e83e3c..f87b0c4e564d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -27,6 +27,7 @@ #include <linux/vga_switcheroo.h> #include "i915_drv.h" +#include "i915_selftest.h" #define GEN_DEFAULT_PIPEOFFSETS \ .pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \ @@ -408,6 +409,7 @@ static const struct intel_device_info intel_geminilake_info = { .platform = INTEL_GEMINILAKE, .is_alpha_support = 1, .ddb_size = 1024, + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } }; static const struct intel_device_info intel_kabylake_info = { @@ -477,10 +479,19 @@ static const struct pci_device_id pciidlist[] = { }; MODULE_DEVICE_TABLE(pci, pciidlist); +static void i915_pci_remove(struct pci_dev *pdev) +{ + struct drm_device *dev = pci_get_drvdata(pdev); + + i915_driver_unload(dev); + drm_dev_unref(dev); +} + static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct intel_device_info *intel_info = (struct intel_device_info *) ent->driver_data; + int err; if (IS_ALPHA_SUPPORT(intel_info) && !i915.alpha_support) { DRM_INFO("The driver support for your hardware in this kernel version is alpha quality\n" @@ -504,15 +515,17 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (vga_switcheroo_client_probe_defer(pdev)) return -EPROBE_DEFER; - return i915_driver_load(pdev, ent); -} + err = i915_driver_load(pdev, ent); + if (err) + return err; -static void i915_pci_remove(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); + err = i915_live_selftests(pdev); + if (err) { + i915_pci_remove(pdev); + return err > 0 ? -ENOTTY : err; + } - i915_driver_unload(dev); - drm_dev_unref(dev); + return 0; } static struct pci_driver i915_pci_driver = { @@ -526,6 +539,11 @@ static struct pci_driver i915_pci_driver = { static int __init i915_init(void) { bool use_kms = true; + int err; + + err = i915_mock_selftests(); + if (err) + return err > 0 ? 0 : err; /* * Enable KMS by default, unless explicitly overriden by diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 70964ca9251e..060b171480d5 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1008,7 +1008,7 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv) { - assert_spin_locked(&dev_priv->perf.hook_lock); + lockdep_assert_held(&dev_priv->perf.hook_lock); if (dev_priv->perf.oa.exclusive_stream->enabled) { struct i915_gem_context *ctx = diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1c8f5b9a7fcd..11b12f412492 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -48,6 +48,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG); } +#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index]) + #define _PIPE(pipe, a, b) ((a) + (pipe)*((b)-(a))) #define _MMIO_PIPE(pipe, a, b) _MMIO(_PIPE(pipe, a, b)) #define _PLANE(plane, a, b) _PIPE(plane, a, b) @@ -56,14 +58,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_TRANS(tran, a, b) _MMIO(_TRANS(tran, a, b)) #define _PORT(port, a, b) ((a) + (port)*((b)-(a))) #define _MMIO_PORT(port, a, b) _MMIO(_PORT(port, a, b)) -#define _PIPE3(pipe, a, b, c) ((pipe) == PIPE_A ? (a) : \ - (pipe) == PIPE_B ? (b) : (c)) +#define _PIPE3(pipe, ...) _PICK(pipe, __VA_ARGS__) #define _MMIO_PIPE3(pipe, a, b, c) _MMIO(_PIPE3(pipe, a, b, c)) -#define _PORT3(port, a, b, c) ((port) == PORT_A ? (a) : \ - (port) == PORT_B ? (b) : (c)) +#define _PORT3(port, ...) _PICK(port, __VA_ARGS__) #define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PORT3(pipe, a, b, c)) -#define _PHY3(phy, a, b, c) ((phy) == DPIO_PHY0 ? (a) : \ - (phy) == DPIO_PHY1 ? (b) : (c)) +#define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__) #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c)) #define _MASKED_FIELD(mask, value) ({ \ @@ -78,7 +77,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MASKED_BIT_ENABLE(a) ({ typeof(a) _a = (a); _MASKED_FIELD(_a, _a); }) #define _MASKED_BIT_DISABLE(a) (_MASKED_FIELD((a), 0)) +/* Engine ID */ +#define RCS_HW 0 +#define VCS_HW 1 +#define BCS_HW 2 +#define VECS_HW 3 +#define VCS2_HW 4 /* PCI config space */ @@ -120,7 +125,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GCFGC 0xf0 /* 915+ only */ #define GC_LOW_FREQUENCY_ENABLE (1 << 7) #define GC_DISPLAY_CLOCK_190_200_MHZ (0 << 4) -#define GC_DISPLAY_CLOCK_333_MHZ (4 << 4) +#define GC_DISPLAY_CLOCK_333_320_MHZ (4 << 4) #define GC_DISPLAY_CLOCK_267_MHZ_PNV (0 << 4) #define GC_DISPLAY_CLOCK_333_MHZ_PNV (1 << 4) #define GC_DISPLAY_CLOCK_444_MHZ_PNV (2 << 4) @@ -1135,8 +1140,6 @@ enum skl_disp_power_wells { #define VLV_BIAS_CPU_125_SOC_875 (6 << 2) #define CHV_BIAS_CPU_50_SOC_50 (3 << 2) -#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000 - /* vlv2 north clock has */ #define CCK_FUSE_REG 0x8 #define CCK_FUSE_HPLL_FREQ_MASK 0x3 @@ -1553,6 +1556,7 @@ enum skl_disp_power_wells { _MMIO(_BXT_PHY_CH(phy, ch, reg_ch0, reg_ch1)) #define BXT_P_CR_GT_DISP_PWRON _MMIO(0x138090) +#define MIPIO_RST_CTRL (1 << 2) #define _BXT_PHY_CTL_DDI_A 0x64C00 #define _BXT_PHY_CTL_DDI_B 0x64C10 @@ -3376,10 +3380,22 @@ enum { INTEL_LEGACY_64B_CONTEXT }; +enum { + FAULT_AND_HANG = 0, + FAULT_AND_HALT, /* Debug only */ + FAULT_AND_STREAM, + FAULT_AND_CONTINUE /* Unsupported */ +}; + +#define GEN8_CTX_VALID (1<<0) +#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) +#define GEN8_CTX_FORCE_RESTORE (1<<2) +#define GEN8_CTX_L3LLC_COHERENT (1<<5) +#define GEN8_CTX_PRIVILEGE (1<<8) #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 -#define GEN8_CTX_ADDRESSING_MODE(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\ - INTEL_LEGACY_64B_CONTEXT : \ - INTEL_LEGACY_32B_CONTEXT) + +#define GEN8_CTX_ID_SHIFT 32 +#define GEN8_CTX_ID_WIDTH 21 #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) @@ -5887,11 +5903,18 @@ enum { #define _PLANE_KEYMSK_2_A 0x70298 #define _PLANE_KEYMAX_1_A 0x701a0 #define _PLANE_KEYMAX_2_A 0x702a0 +#define _PLANE_COLOR_CTL_1_A 0x701CC /* GLK+ */ +#define _PLANE_COLOR_CTL_2_A 0x702CC /* GLK+ */ +#define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ +#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) +#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) +#define PLANE_COLOR_PLANE_GAMMA_DISABLE (1 << 13) #define _PLANE_BUF_CFG_1_A 0x7027c #define _PLANE_BUF_CFG_2_A 0x7037c #define _PLANE_NV12_BUF_CFG_1_A 0x70278 #define _PLANE_NV12_BUF_CFG_2_A 0x70378 + #define _PLANE_CTL_1_B 0x71180 #define _PLANE_CTL_2_B 0x71280 #define _PLANE_CTL_3_B 0x71380 @@ -5986,7 +6009,17 @@ enum { #define PLANE_NV12_BUF_CFG(pipe, plane) \ _MMIO_PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe)) -/* SKL new cursor registers */ +#define _PLANE_COLOR_CTL_1_B 0x711CC +#define _PLANE_COLOR_CTL_2_B 0x712CC +#define _PLANE_COLOR_CTL_3_B 0x713CC +#define _PLANE_COLOR_CTL_1(pipe) \ + _PIPE(pipe, _PLANE_COLOR_CTL_1_A, _PLANE_COLOR_CTL_1_B) +#define _PLANE_COLOR_CTL_2(pipe) \ + _PIPE(pipe, _PLANE_COLOR_CTL_2_A, _PLANE_COLOR_CTL_2_B) +#define PLANE_COLOR_CTL(pipe, plane) \ + _MMIO_PLANE(plane, _PLANE_COLOR_CTL_1(pipe), _PLANE_COLOR_CTL_2(pipe)) + +#/* SKL new cursor registers */ #define _CUR_BUF_CFG_A 0x7017c #define _CUR_BUF_CFG_B 0x7117c #define CUR_BUF_CFG(pipe) _MMIO_PIPE(pipe, _CUR_BUF_CFG_A, _CUR_BUF_CFG_B) @@ -6466,6 +6499,11 @@ enum { #define CHICKEN_PAR2_1 _MMIO(0x42090) #define KVM_CONFIG_CHANGE_NOTIFICATION_SELECT (1 << 14) +#define CHICKEN_MISC_2 _MMIO(0x42084) +#define GLK_CL0_PWR_DOWN (1 << 10) +#define GLK_CL1_PWR_DOWN (1 << 11) +#define GLK_CL2_PWR_DOWN (1 << 12) + #define _CHICKEN_PIPESL_1_A 0x420b0 #define _CHICKEN_PIPESL_1_B 0x420b4 #define HSW_FBCQ_DIS (1 << 22) @@ -7413,7 +7451,8 @@ enum { #define VLV_RCEDATA _MMIO(0xA0BC) #define GEN6_RC6pp_THRESHOLD _MMIO(0xA0C0) #define GEN6_PMINTRMSK _MMIO(0xA168) -#define GEN8_PMINTR_REDIRECT_TO_GUC (1<<31) +#define GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC (1<<31) +#define ARAT_EXPIRED_INTRMSK (1<<9) #define GEN8_MISC_CTRL0 _MMIO(0xA180) #define VLV_PWRDWNUPCTL _MMIO(0xA294) #define GEN9_MEDIA_PG_IDLE_HYSTERESIS _MMIO(0xA0C4) @@ -7790,7 +7829,14 @@ enum { #define TRANS_DDI_EDP_INPUT_B_ONOFF (5<<12) #define TRANS_DDI_EDP_INPUT_C_ONOFF (6<<12) #define TRANS_DDI_DP_VC_PAYLOAD_ALLOC (1<<8) +#define TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE (1<<7) +#define TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ (1<<6) #define TRANS_DDI_BFI_ENABLE (1<<4) +#define TRANS_DDI_HIGH_TMDS_CHAR_RATE (1<<4) +#define TRANS_DDI_HDMI_SCRAMBLING (1<<0) +#define TRANS_DDI_HDMI_SCRAMBLING_MASK (TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE \ + | TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ \ + | TRANS_DDI_HDMI_SCRAMBLING) /* DisplayPort Transport Control */ #define _DP_TP_CTL_A 0x64040 @@ -8167,6 +8213,7 @@ enum { #define PAL_PREC_10_12_BIT (0 << 31) #define PAL_PREC_SPLIT_MODE (1 << 31) #define PAL_PREC_AUTO_INCREMENT (1 << 15) +#define PAL_PREC_INDEX_VALUE_MASK (0x3ff << 0) #define _PAL_PREC_DATA_A 0x4A404 #define _PAL_PREC_DATA_B 0x4AC04 #define _PAL_PREC_DATA_C 0x4B404 @@ -8176,12 +8223,26 @@ enum { #define _PAL_PREC_EXT_GC_MAX_A 0x4A420 #define _PAL_PREC_EXT_GC_MAX_B 0x4AC20 #define _PAL_PREC_EXT_GC_MAX_C 0x4B420 +#define _PAL_PREC_EXT2_GC_MAX_A 0x4A430 +#define _PAL_PREC_EXT2_GC_MAX_B 0x4AC30 +#define _PAL_PREC_EXT2_GC_MAX_C 0x4B430 #define PREC_PAL_INDEX(pipe) _MMIO_PIPE(pipe, _PAL_PREC_INDEX_A, _PAL_PREC_INDEX_B) #define PREC_PAL_DATA(pipe) _MMIO_PIPE(pipe, _PAL_PREC_DATA_A, _PAL_PREC_DATA_B) #define PREC_PAL_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_GC_MAX_A, _PAL_PREC_GC_MAX_B) + (i) * 4) #define PREC_PAL_EXT_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_EXT_GC_MAX_A, _PAL_PREC_EXT_GC_MAX_B) + (i) * 4) +#define _PRE_CSC_GAMC_INDEX_A 0x4A484 +#define _PRE_CSC_GAMC_INDEX_B 0x4AC84 +#define _PRE_CSC_GAMC_INDEX_C 0x4B484 +#define PRE_CSC_GAMC_AUTO_INCREMENT (1 << 10) +#define _PRE_CSC_GAMC_DATA_A 0x4A488 +#define _PRE_CSC_GAMC_DATA_B 0x4AC88 +#define _PRE_CSC_GAMC_DATA_C 0x4B488 + +#define PRE_CSC_GAMC_INDEX(pipe) _MMIO_PIPE(pipe, _PRE_CSC_GAMC_INDEX_A, _PRE_CSC_GAMC_INDEX_B) +#define PRE_CSC_GAMC_DATA(pipe) _MMIO_PIPE(pipe, _PRE_CSC_GAMC_DATA_A, _PRE_CSC_GAMC_DATA_B) + /* pipe CSC & degamma/gamma LUTs on CHV */ #define _CGM_PIPE_A_CSC_COEFF01 (VLV_DISPLAY_BASE + 0x67900) #define _CGM_PIPE_A_CSC_COEFF23 (VLV_DISPLAY_BASE + 0x67904) @@ -8215,9 +8276,14 @@ enum { /* MIPI DSI registers */ -#define _MIPI_PORT(port, a, c) _PORT3(port, a, 0, c) /* ports A and C only */ +#define _MIPI_PORT(port, a, c) ((port) ? c : a) /* ports A and C only */ #define _MMIO_MIPI(port, a, c) _MMIO(_MIPI_PORT(port, a, c)) +#define MIPIO_TXESC_CLK_DIV1 _MMIO(0x160004) +#define GLK_TX_ESC_CLK_DIV1_MASK 0x3FF +#define MIPIO_TXESC_CLK_DIV2 _MMIO(0x160008) +#define GLK_TX_ESC_CLK_DIV2_MASK 0x3FF + /* BXT MIPI clock controls */ #define BXT_MAX_VAR_OUTPUT_KHZ 39500 @@ -8304,10 +8370,12 @@ enum { #define BXT_DSI_PLL_PVD_RATIO_SHIFT 16 #define BXT_DSI_PLL_PVD_RATIO_MASK (3 << BXT_DSI_PLL_PVD_RATIO_SHIFT) #define BXT_DSI_PLL_PVD_RATIO_1 (1 << BXT_DSI_PLL_PVD_RATIO_SHIFT) +#define BXT_DSIC_16X_BY1 (0 << 10) #define BXT_DSIC_16X_BY2 (1 << 10) #define BXT_DSIC_16X_BY3 (2 << 10) #define BXT_DSIC_16X_BY4 (3 << 10) #define BXT_DSIC_16X_MASK (3 << 10) +#define BXT_DSIA_16X_BY1 (0 << 8) #define BXT_DSIA_16X_BY2 (1 << 8) #define BXT_DSIA_16X_BY3 (2 << 8) #define BXT_DSIA_16X_BY4 (3 << 8) @@ -8317,6 +8385,8 @@ enum { #define BXT_DSI_PLL_RATIO_MAX 0x7D #define BXT_DSI_PLL_RATIO_MIN 0x22 +#define GLK_DSI_PLL_RATIO_MAX 0x6F +#define GLK_DSI_PLL_RATIO_MIN 0x22 #define BXT_DSI_PLL_RATIO_MASK 0xFF #define BXT_REF_CLOCK_KHZ 19200 @@ -8333,6 +8403,12 @@ enum { #define _BXT_MIPIC_PORT_CTRL 0x6B8C0 #define BXT_MIPI_PORT_CTRL(tc) _MMIO_MIPI(tc, _BXT_MIPIA_PORT_CTRL, _BXT_MIPIC_PORT_CTRL) +#define BXT_P_DSI_REGULATOR_CFG _MMIO(0x160020) +#define STAP_SELECT (1 << 0) + +#define BXT_P_DSI_REGULATOR_TX_CTRL _MMIO(0x160054) +#define HS_IO_CTRL_SELECT (1 << 0) + #define DPI_ENABLE (1 << 31) /* A + C */ #define MIPIA_MIPI4DPHY_DELAY_COUNT_SHIFT 27 #define MIPIA_MIPI4DPHY_DELAY_COUNT_MASK (0xf << 27) @@ -8586,6 +8662,14 @@ enum { #define LP_BYTECLK_SHIFT 0 #define LP_BYTECLK_MASK (0xffff << 0) +#define _MIPIA_TLPX_TIME_COUNT (dev_priv->mipi_mmio_base + 0xb0a4) +#define _MIPIC_TLPX_TIME_COUNT (dev_priv->mipi_mmio_base + 0xb8a4) +#define MIPI_TLPX_TIME_COUNT(port) _MMIO_MIPI(port, _MIPIA_TLPX_TIME_COUNT, _MIPIC_TLPX_TIME_COUNT) + +#define _MIPIA_CLK_LANE_TIMING (dev_priv->mipi_mmio_base + 0xb098) +#define _MIPIC_CLK_LANE_TIMING (dev_priv->mipi_mmio_base + 0xb898) +#define MIPI_CLK_LANE_TIMING(port) _MMIO_MIPI(port, _MIPIA_CLK_LANE_TIMING, _MIPIC_CLK_LANE_TIMING) + /* bits 31:0 */ #define _MIPIA_LP_GEN_DATA (dev_priv->mipi_mmio_base + 0xb064) #define _MIPIC_LP_GEN_DATA (dev_priv->mipi_mmio_base + 0xb864) diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h new file mode 100644 index 000000000000..9d7d86f1733d --- /dev/null +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -0,0 +1,106 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __I915_SELFTEST_H__ +#define __I915_SELFTEST_H__ + +struct pci_dev; +struct drm_i915_private; + +struct i915_selftest { + unsigned long timeout_jiffies; + unsigned int timeout_ms; + unsigned int random_seed; + int mock; + int live; +}; + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include <linux/fault-inject.h> + +extern struct i915_selftest i915_selftest; + +int i915_mock_selftests(void); +int i915_live_selftests(struct pci_dev *pdev); + +/* We extract the function declarations from i915_mock_selftests.h and + * i915_live_selftests.h Add your unit test declarations there! + * + * Mock unit tests are run very early upon module load, before the driver + * is probed. All hardware interactions, as well as other subsystems, must + * be "mocked". + * + * Live unit tests are run after the driver is loaded - all hardware + * interactions are real. + */ +#define selftest(name, func) int func(void); +#include "selftests/i915_mock_selftests.h" +#undef selftest +#define selftest(name, func) int func(struct drm_i915_private *i915); +#include "selftests/i915_live_selftests.h" +#undef selftest + +struct i915_subtest { + int (*func)(void *data); + const char *name; +}; + +int __i915_subtests(const char *caller, + const struct i915_subtest *st, + unsigned int count, + void *data); +#define i915_subtests(T, data) \ + __i915_subtests(__func__, T, ARRAY_SIZE(T), data) + +#define SUBTEST(x) { x, #x } + +#define I915_SELFTEST_DECLARE(x) x +#define I915_SELFTEST_ONLY(x) unlikely(x) + +#else /* !IS_ENABLED(CONFIG_DRM_I915_SELFTEST) */ + +static inline int i915_mock_selftests(void) { return 0; } +static inline int i915_live_selftests(struct pci_dev *pdev) { return 0; } + +#define I915_SELFTEST_DECLARE(x) +#define I915_SELFTEST_ONLY(x) 0 + +#endif + +/* Using the i915_selftest_ prefix becomes a little unwieldy with the helpers. + * Instead we use the igt_ shorthand, in reference to the intel-gpu-tools + * suite of uabi test cases (which includes a test runner for our selftests). + */ + +#define IGT_TIMEOUT(name__) \ + unsigned long name__ = jiffies + i915_selftest.timeout_jiffies + +__printf(2, 3) +bool __igt_timeout(unsigned long timeout, const char *fmt, ...); + +#define igt_timeout(t, fmt, ...) \ + __igt_timeout((t), KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) + +#define igt_can_mi_store_dword_imm(D) (INTEL_GEN(D) > 2) + +#endif /* !__I915_SELFTEST_H__ */ diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 40f4e5efaf83..a277f8eb7beb 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -395,10 +395,10 @@ static void timer_i915_sw_fence_wake(unsigned long data) { struct i915_sw_dma_fence_cb *cb = (struct i915_sw_dma_fence_cb *)data; - printk(KERN_WARNING "asynchronous wait on fence %s:%s:%x timed out\n", - cb->dma->ops->get_driver_name(cb->dma), - cb->dma->ops->get_timeline_name(cb->dma), - cb->dma->seqno); + pr_warn("asynchronous wait on fence %s:%s:%x timed out\n", + cb->dma->ops->get_driver_name(cb->dma), + cb->dma->ops->get_timeline_name(cb->dma), + cb->dma->seqno); dma_fence_put(cb->dma); cb->dma = NULL; diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 376ac957cd1c..f3fdfda5e558 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -42,32 +42,8 @@ static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) static u32 calc_residency(struct drm_i915_private *dev_priv, i915_reg_t reg) { - u64 raw_time; /* 32b value may overflow during fixed point math */ - u64 units = 128ULL, div = 100000ULL; - u32 ret; - - if (!intel_enable_rc6()) - return 0; - - intel_runtime_pm_get(dev_priv); - - /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - units = 1; - div = dev_priv->czclk_freq; - - if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) - units <<= 8; - } else if (IS_GEN9_LP(dev_priv)) { - units = 1; - div = 1200; /* 833.33ns */ - } - - raw_time = I915_READ(reg) * units; - ret = DIV_ROUND_UP_ULL(raw_time, div); - - intel_runtime_pm_put(dev_priv); - return ret; + return DIV_ROUND_CLOSEST_ULL(intel_rc6_residency_us(dev_priv, reg), + 1000); } static ssize_t @@ -395,13 +371,13 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, /* We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ - intel_set_rps(dev_priv, val); + ret = intel_set_rps(dev_priv, val); mutex_unlock(&dev_priv->rps.hw_lock); intel_runtime_pm_put(dev_priv); - return count; + return ret ?: count; } static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -448,14 +424,13 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, /* We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ - intel_set_rps(dev_priv, val); + ret = intel_set_rps(dev_priv, val); mutex_unlock(&dev_priv->rps.hw_lock); intel_runtime_pm_put(dev_priv); - return count; - + return ret ?: count; } static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); @@ -523,33 +498,27 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct drm_device *dev = &dev_priv->drm; - struct i915_error_state_file_priv error_priv; struct drm_i915_error_state_buf error_str; - ssize_t ret_count = 0; - int ret; - - memset(&error_priv, 0, sizeof(error_priv)); + struct i915_gpu_state *gpu; + ssize_t ret; - ret = i915_error_state_buf_init(&error_str, to_i915(dev), count, off); + ret = i915_error_state_buf_init(&error_str, dev_priv, count, off); if (ret) return ret; - error_priv.i915 = dev_priv; - i915_error_state_get(dev, &error_priv); - - ret = i915_error_state_to_str(&error_str, &error_priv); + gpu = i915_first_error_state(dev_priv); + ret = i915_error_state_to_str(&error_str, gpu); if (ret) goto out; - ret_count = count < error_str.bytes ? count : error_str.bytes; + ret = count < error_str.bytes ? count : error_str.bytes; + memcpy(buf, error_str.buf, ret); - memcpy(buf, error_str.buf, ret_count); out: - i915_error_state_put(&error_priv); + i915_gpu_state_put(gpu); i915_error_state_buf_release(&error_str); - return ret ?: ret_count; + return ret; } static ssize_t error_state_write(struct file *file, struct kobject *kobj, @@ -560,7 +529,7 @@ static ssize_t error_state_write(struct file *file, struct kobject *kobj, struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); DRM_DEBUG_DRIVER("Resetting error state\n"); - i915_destroy_error_state(dev_priv); + i915_reset_error_state(dev_priv); return count; } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 4461df5a94fe..66404c5aee82 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -14,6 +14,206 @@ #define TRACE_SYSTEM i915 #define TRACE_INCLUDE_FILE i915_trace +/* watermark/fifo updates */ + +TRACE_EVENT(intel_cpu_fifo_underrun, + TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pipe), + TP_ARGS(dev_priv, pipe), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + __entry->pipe = pipe; + __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); + __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + ), + + TP_printk("pipe %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), + __entry->frame, __entry->scanline) +); + +TRACE_EVENT(intel_pch_fifo_underrun, + TP_PROTO(struct drm_i915_private *dev_priv, enum transcoder pch_transcoder), + TP_ARGS(dev_priv, pch_transcoder), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + enum pipe pipe = (enum pipe)pch_transcoder; + __entry->pipe = pipe; + __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); + __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + ), + + TP_printk("pch transcoder %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), + __entry->frame, __entry->scanline) +); + +TRACE_EVENT(intel_memory_cxsr, + TP_PROTO(struct drm_i915_private *dev_priv, bool old, bool new), + TP_ARGS(dev_priv, old, new), + + TP_STRUCT__entry( + __array(u32, frame, 3) + __array(u32, scanline, 3) + __field(bool, old) + __field(bool, new) + ), + + TP_fast_assign( + enum pipe pipe; + for_each_pipe(dev_priv, pipe) { + __entry->frame[pipe] = + dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); + __entry->scanline[pipe] = + intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + } + __entry->old = old; + __entry->new = new; + ), + + TP_printk("%s->%s, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", + onoff(__entry->old), onoff(__entry->new), + __entry->frame[PIPE_A], __entry->scanline[PIPE_A], + __entry->frame[PIPE_B], __entry->scanline[PIPE_B], + __entry->frame[PIPE_C], __entry->scanline[PIPE_C]) +); + +TRACE_EVENT(vlv_wm, + TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm), + TP_ARGS(crtc, wm), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u32, level) + __field(u32, cxsr) + __field(u32, primary) + __field(u32, sprite0) + __field(u32, sprite1) + __field(u32, cursor) + __field(u32, sr_plane) + __field(u32, sr_cursor) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->level = wm->level; + __entry->cxsr = wm->cxsr; + __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY]; + __entry->sprite0 = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0]; + __entry->sprite1 = wm->pipe[crtc->pipe].plane[PLANE_SPRITE1]; + __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR]; + __entry->sr_plane = wm->sr.plane; + __entry->sr_cursor = wm->sr.cursor; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, level=%d, cxsr=%d, wm %d/%d/%d/%d, sr %d/%d", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline, __entry->level, __entry->cxsr, + __entry->primary, __entry->sprite0, __entry->sprite1, __entry->cursor, + __entry->sr_plane, __entry->sr_cursor) +); + +TRACE_EVENT(vlv_fifo_size, + TP_PROTO(struct intel_crtc *crtc, u32 sprite0_start, u32 sprite1_start, u32 fifo_size), + TP_ARGS(crtc, sprite0_start, sprite1_start, fifo_size), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u32, sprite0_start) + __field(u32, sprite1_start) + __field(u32, fifo_size) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->sprite0_start = sprite0_start; + __entry->sprite1_start = sprite1_start; + __entry->fifo_size = fifo_size; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, %d/%d/%d", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline, __entry->sprite0_start, + __entry->sprite1_start, __entry->fifo_size) +); + +/* plane updates */ + +TRACE_EVENT(intel_update_plane, + TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), + TP_ARGS(plane, crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(const char *, name) + __field(u32, frame) + __field(u32, scanline) + __array(int, src, 4) + __array(int, dst, 4) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->name = plane->name; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + memcpy(__entry->src, &plane->state->src, sizeof(__entry->src)); + memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst)); + ), + + TP_printk("pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT, + pipe_name(__entry->pipe), __entry->name, + __entry->frame, __entry->scanline, + DRM_RECT_FP_ARG((const struct drm_rect *)__entry->src), + DRM_RECT_ARG((const struct drm_rect *)__entry->dst)) +); + +TRACE_EVENT(intel_disable_plane, + TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), + TP_ARGS(plane, crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(const char *, name) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->name = plane->name; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + ), + + TP_printk("pipe %c, plane %s, frame=%u, scanline=%u", + pipe_name(__entry->pipe), __entry->name, + __entry->frame, __entry->scanline) +); + /* pipe updates */ TRACE_EVENT(i915_pipe_update_start, @@ -175,134 +375,6 @@ TRACE_EVENT(i915_vma_unbind, __entry->obj, __entry->offset, __entry->size, __entry->vm) ); -TRACE_EVENT(i915_va_alloc, - TP_PROTO(struct i915_vma *vma), - TP_ARGS(vma), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u64, start) - __field(u64, end) - ), - - TP_fast_assign( - __entry->vm = vma->vm; - __entry->start = vma->node.start; - __entry->end = vma->node.start + vma->node.size - 1; - ), - - TP_printk("vm=%p (%c), 0x%llx-0x%llx", - __entry->vm, i915_is_ggtt(__entry->vm) ? 'G' : 'P', __entry->start, __entry->end) -); - -DECLARE_EVENT_CLASS(i915_px_entry, - TP_PROTO(struct i915_address_space *vm, u32 px, u64 start, u64 px_shift), - TP_ARGS(vm, px, start, px_shift), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u32, px) - __field(u64, start) - __field(u64, end) - ), - - TP_fast_assign( - __entry->vm = vm; - __entry->px = px; - __entry->start = start; - __entry->end = ((start + (1ULL << px_shift)) & ~((1ULL << px_shift)-1)) - 1; - ), - - TP_printk("vm=%p, pde=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -DEFINE_EVENT(i915_px_entry, i915_page_table_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 pde_shift), - TP_ARGS(vm, pde, start, pde_shift) -); - -DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pdpe, u64 start, u64 pdpe_shift), - TP_ARGS(vm, pdpe, start, pdpe_shift), - - TP_printk("vm=%p, pdpe=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift), - TP_ARGS(vm, pml4e, start, pml4e_shift), - - TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -/* Avoid extra math because we only support two sizes. The format is defined by - * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */ -#define TRACE_PT_SIZE(bits) \ - ((((bits) == 1024) ? 288 : 144) + 1) - -DECLARE_EVENT_CLASS(i915_page_table_entry_update, - TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count, u32 bits), - TP_ARGS(vm, pde, pt, first, count, bits), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u32, pde) - __field(u32, first) - __field(u32, last) - __dynamic_array(char, cur_ptes, TRACE_PT_SIZE(bits)) - ), - - TP_fast_assign( - __entry->vm = vm; - __entry->pde = pde; - __entry->first = first; - __entry->last = first + count - 1; - scnprintf(__get_str(cur_ptes), - TRACE_PT_SIZE(bits), - "%*pb", - bits, - pt->used_ptes); - ), - - TP_printk("vm=%p, pde=%d, updating %u:%u\t%s", - __entry->vm, __entry->pde, __entry->last, __entry->first, - __get_str(cur_ptes)) -); - -DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map, - TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count, u32 bits), - TP_ARGS(vm, pde, pt, first, count, bits) -); - -TRACE_EVENT(i915_gem_object_change_domain, - TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write), - TP_ARGS(obj, old_read, old_write), - - TP_STRUCT__entry( - __field(struct drm_i915_gem_object *, obj) - __field(u32, read_domains) - __field(u32, write_domain) - ), - - TP_fast_assign( - __entry->obj = obj; - __entry->read_domains = obj->base.read_domains | (old_read << 16); - __entry->write_domain = obj->base.write_domain | (old_write << 16); - ), - - TP_printk("obj=%p, read=%02x=>%02x, write=%02x=>%02x", - __entry->obj, - __entry->read_domains >> 16, - __entry->read_domains & 0xffff, - __entry->write_domain >> 16, - __entry->write_domain & 0xffff) -); - TRACE_EVENT(i915_gem_object_pwrite, TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), TP_ARGS(obj, offset, len), @@ -503,13 +575,14 @@ TRACE_EVENT(i915_gem_ring_sync_to, __entry->seqno) ); -TRACE_EVENT(i915_gem_ring_dispatch, +TRACE_EVENT(i915_gem_request_queue, TP_PROTO(struct drm_i915_gem_request *req, u32 flags), TP_ARGS(req, flags), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) __field(u32, flags) ), @@ -517,13 +590,14 @@ TRACE_EVENT(i915_gem_ring_dispatch, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; + __entry->ctx = req->fence.context; + __entry->seqno = req->fence.seqno; __entry->flags = flags; - dma_fence_enable_sw_signaling(&req->fence); ), - TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", - __entry->dev, __entry->ring, __entry->seqno, __entry->flags) + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->flags) ); TRACE_EVENT(i915_gem_ring_flush, @@ -555,18 +629,23 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_STRUCT__entry( __field(u32, dev) + __field(u32, ctx) __field(u32, ring) __field(u32, seqno) + __field(u32, global) ), TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; + __entry->ctx = req->fence.context; + __entry->seqno = req->fence.seqno; + __entry->global = req->global_seqno; ), - TP_printk("dev=%u, ring=%u, seqno=%u", - __entry->dev, __entry->ring, __entry->seqno) + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->global) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_add, @@ -574,24 +653,100 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_add, TP_ARGS(req) ); -TRACE_EVENT(i915_gem_request_notify, - TP_PROTO(struct intel_engine_cs *engine), - TP_ARGS(engine), +#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) +DEFINE_EVENT(i915_gem_request, i915_gem_request_submit, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); + +DEFINE_EVENT(i915_gem_request, i915_gem_request_execute, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); + +DECLARE_EVENT_CLASS(i915_gem_request_hw, + TP_PROTO(struct drm_i915_gem_request *req, + unsigned int port), + TP_ARGS(req, port), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, ring) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, ctx) + __field(u32, port) + ), + + TP_fast_assign( + __entry->dev = req->i915->drm.primary->index; + __entry->ring = req->engine->id; + __entry->ctx = req->fence.context; + __entry->seqno = req->fence.seqno; + __entry->global_seqno = req->global_seqno; + __entry->port = port; + ), + + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", + __entry->dev, __entry->ring, __entry->ctx, + __entry->seqno, __entry->global_seqno, + __entry->port) +); + +DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in, + TP_PROTO(struct drm_i915_gem_request *req, unsigned int port), + TP_ARGS(req, port) +); + +DEFINE_EVENT(i915_gem_request, i915_gem_request_out, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); +#else +#if !defined(TRACE_HEADER_MULTI_READ) +static inline void +trace_i915_gem_request_submit(struct drm_i915_gem_request *req) +{ +} + +static inline void +trace_i915_gem_request_execute(struct drm_i915_gem_request *req) +{ +} + +static inline void +trace_i915_gem_request_in(struct drm_i915_gem_request *req, unsigned int port) +{ +} + +static inline void +trace_i915_gem_request_out(struct drm_i915_gem_request *req) +{ +} +#endif +#endif + +TRACE_EVENT(intel_engine_notify, + TP_PROTO(struct intel_engine_cs *engine, bool waiters), + TP_ARGS(engine, waiters), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) __field(u32, seqno) + __field(bool, waiters) ), TP_fast_assign( __entry->dev = engine->i915->drm.primary->index; __entry->ring = engine->id; __entry->seqno = intel_engine_get_seqno(engine); + __entry->waiters = waiters; ), - TP_printk("dev=%u, ring=%u, seqno=%u", - __entry->dev, __entry->ring, __entry->seqno) + TP_printk("dev=%u, ring=%u, seqno=%u, waiters=%u", + __entry->dev, __entry->ring, __entry->seqno, + __entry->waiters) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, @@ -599,20 +754,17 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, TP_ARGS(req) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_complete, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) -); - TRACE_EVENT(i915_gem_request_wait_begin, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req), + TP_PROTO(struct drm_i915_gem_request *req, unsigned int flags), + TP_ARGS(req, flags), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) - __field(bool, blocking) + __field(u32, global) + __field(unsigned int, flags) ), /* NB: the blocking information is racy since mutex_is_locked @@ -624,14 +776,16 @@ TRACE_EVENT(i915_gem_request_wait_begin, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; - __entry->blocking = - mutex_is_locked(&req->i915->drm.struct_mutex); + __entry->ctx = req->fence.context; + __entry->seqno = req->fence.seqno; + __entry->global = req->global_seqno; + __entry->flags = flags; ), - TP_printk("dev=%u, ring=%u, seqno=%u, blocking=%s", - __entry->dev, __entry->ring, - __entry->seqno, __entry->blocking ? "yes (NB)" : "no") + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->global, !!(__entry->flags & I915_WAIT_LOCKED), + __entry->flags) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end, @@ -769,17 +923,19 @@ DECLARE_EVENT_CLASS(i915_context, TP_STRUCT__entry( __field(u32, dev) __field(struct i915_gem_context *, ctx) + __field(u32, hw_id) __field(struct i915_address_space *, vm) ), TP_fast_assign( + __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; + __entry->hw_id = ctx->hw_id; __entry->vm = ctx->ppgtt ? &ctx->ppgtt->base : NULL; - __entry->dev = ctx->i915->drm.primary->index; ), - TP_printk("dev=%u, ctx=%p, ctx_vm=%p", - __entry->dev, __entry->ctx, __entry->vm) + TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u", + __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id) ) DEFINE_EVENT(i915_context, i915_context_create, diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 34020873e1f6..c5455d36b617 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -25,6 +25,35 @@ #ifndef __I915_UTILS_H #define __I915_UTILS_H +#undef WARN_ON +/* Many gcc seem to no see through this and fall over :( */ +#if 0 +#define WARN_ON(x) ({ \ + bool __i915_warn_cond = (x); \ + if (__builtin_constant_p(__i915_warn_cond)) \ + BUILD_BUG_ON(__i915_warn_cond); \ + WARN(__i915_warn_cond, "WARN_ON(" #x ")"); }) +#else +#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")") +#endif + +#undef WARN_ON_ONCE +#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")") + +#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \ + (long)(x), __func__) + +#if GCC_VERSION >= 70000 +#define add_overflows(A, B) \ + __builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0) +#else +#define add_overflows(A, B) ({ \ + typeof(A) a = (A); \ + typeof(B) b = (B); \ + a + b < a; \ +}) +#endif + #define range_overflows(start, size, max) ({ \ typeof(start) start__ = (start); \ typeof(size) size__ = (size); \ @@ -55,6 +84,8 @@ #define ptr_pack_bits(ptr, bits) \ ((typeof(ptr))((unsigned long)(ptr) | (bits))) +#define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member) + #define fetch_and_zero(ptr) ({ \ typeof(*ptr) __T = *(ptr); \ *(ptr) = (typeof(*ptr))0; \ diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index d0abfd08a01c..4ab8a973b61f 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -179,7 +179,7 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, int intel_vgt_balloon(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - unsigned long ggtt_end = ggtt->base.start + ggtt->base.total; + unsigned long ggtt_end = ggtt->base.total; unsigned long mappable_base, mappable_size, mappable_end; unsigned long unmappable_base, unmappable_size, unmappable_end; @@ -202,8 +202,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) DRM_INFO("Unmappable graphic memory: base 0x%lx size %ldKiB\n", unmappable_base, unmappable_size / 1024); - if (mappable_base < ggtt->base.start || - mappable_end > ggtt->mappable_end || + if (mappable_end > ggtt->mappable_end || unmappable_base < ggtt->mappable_end || unmappable_end > ggtt_end) { DRM_ERROR("Invalid ballooning configuration!\n"); @@ -219,21 +218,17 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) goto err; } - /* - * No need to partition out the last physical page, - * because it is reserved to the guard page. - */ - if (unmappable_end < ggtt_end - PAGE_SIZE) { + if (unmappable_end < ggtt_end) { ret = vgt_balloon_space(ggtt, &bl_info.space[3], - unmappable_end, ggtt_end - PAGE_SIZE); + unmappable_end, ggtt_end); if (ret) goto err; } /* Mappable graphic memory ballooning */ - if (mappable_base > ggtt->base.start) { + if (mappable_base) { ret = vgt_balloon_space(ggtt, &bl_info.space[0], - ggtt->base.start, mappable_base); + 0, mappable_base); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index df20e9bc1c0f..1aba47024656 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -78,6 +78,9 @@ vma_create(struct drm_i915_gem_object *obj, struct rb_node *rb, **p; int i; + /* The aliasing_ppgtt should never be used directly! */ + GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); + vma = kmem_cache_zalloc(vm->i915->vmas, GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); @@ -238,7 +241,15 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 vma_flags; int ret; - if (WARN_ON(flags == 0)) + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->size > vma->node.size); + + if (GEM_WARN_ON(range_overflows(vma->node.start, + vma->node.size, + vma->vm->total))) + return -ENODEV; + + if (GEM_WARN_ON(!flags)) return -EINVAL; bind_flags = 0; @@ -255,20 +266,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (bind_flags == 0) return 0; - if (GEM_WARN_ON(range_overflows(vma->node.start, - vma->node.size, - vma->vm->total))) - return -ENODEV; - - if (vma_flags == 0 && vma->vm->allocate_va_range) { - trace_i915_va_alloc(vma); - ret = vma->vm->allocate_va_range(vma->vm, - vma->node.start, - vma->node.size); - if (ret) - return ret; - } - trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) @@ -324,8 +321,8 @@ void i915_vma_unpin_and_release(struct i915_vma **p_vma) __i915_gem_object_release_unless_active(obj); } -bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +bool i915_vma_misplaced(const struct i915_vma *vma, + u64 size, u64 alignment, u64 flags) { if (!drm_mm_node_allocated(&vma->node)) return false; @@ -704,3 +701,6 @@ destroy: return 0; } +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_vma.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e39d922cfb6f..2e03f81dddbe 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -228,8 +228,8 @@ i915_vma_compare(struct i915_vma *vma, int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); -bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); +bool i915_vma_misplaced(const struct i915_vma *vma, + u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index aa9160e7f1d8..50fb1f76cc5f 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -99,6 +99,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->update_wm_pre = false; crtc_state->update_wm_post = false; crtc_state->fb_changed = false; + crtc_state->fifo_changed = false; crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; @@ -121,7 +122,7 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, /** * intel_atomic_setup_scalers() - setup scalers for crtc per staged requests - * @dev: DRM device + * @dev_priv: i915 device * @crtc: intel crtc * @crtc_state: incoming crtc_state to validate and setup scalers * @@ -136,9 +137,9 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, * 0 - scalers were setup succesfully * error code - otherwise */ -int intel_atomic_setup_scalers(struct drm_device *dev, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state) +int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state) { struct drm_plane *plane = NULL; struct intel_plane *intel_plane; @@ -199,7 +200,7 @@ int intel_atomic_setup_scalers(struct drm_device *dev, */ if (!plane) { struct drm_plane_state *state; - plane = drm_plane_from_index(dev, i); + plane = drm_plane_from_index(&dev_priv->drm, i); state = drm_atomic_get_plane_state(drm_state, plane); if (IS_ERR(state)) { DRM_DEBUG_KMS("Failed to add [PLANE:%d] to drm_state\n", @@ -247,7 +248,9 @@ int intel_atomic_setup_scalers(struct drm_device *dev, } /* set scaler mode */ - if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { + if (IS_GEMINILAKE(dev_priv)) { + scaler_state->scalers[*scaler_id].mode = 0; + } else if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { /* * when only 1 scaler is in use on either pipe A or B, * scaler 0 operates in high quality (HQ) mode. diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 41fd94e62d3c..cfb47293fd53 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -189,6 +189,12 @@ int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state, if (ret) return ret; + /* FIXME pre-g4x don't work like this */ + if (intel_state->base.visible) + crtc_state->active_planes |= BIT(intel_plane->id); + else + crtc_state->active_planes &= ~BIT(intel_plane->id); + return intel_plane_atomic_calc_changes(&crtc_state->base, state); } @@ -225,12 +231,19 @@ static void intel_plane_atomic_update(struct drm_plane *plane, to_intel_plane_state(plane->state); struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc; - if (intel_state->base.visible) + if (intel_state->base.visible) { + trace_intel_update_plane(plane, + to_intel_crtc(crtc)); + intel_plane->update_plane(plane, to_intel_crtc_state(crtc->state), intel_state); - else + } else { + trace_intel_disable_plane(plane, + to_intel_crtc(crtc)); + intel_plane->disable_plane(plane, crtc); + } } const struct drm_plane_helper_funcs intel_plane_helper_funcs = { diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index d76f3033e890..52c207e81f41 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -720,7 +720,7 @@ static void i915_audio_component_codec_wake_override(struct device *kdev, struct drm_i915_private *dev_priv = kdev_to_i915(kdev); u32 tmp; - if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) + if (!IS_GEN9_BC(dev_priv)) return; i915_audio_component_get_power(kdev); @@ -752,7 +752,7 @@ static int i915_audio_component_get_cdclk_freq(struct device *kdev) if (WARN_ON_ONCE(!HAS_DDI(dev_priv))) return -ENODEV; - return dev_priv->cdclk_freq; + return dev_priv->cdclk.hw.cdclk; } /* diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index e144f033f4b5..639d45c1dd2e 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1341,6 +1341,7 @@ parse_device_mapping(struct drm_i915_private *dev_priv, return; } +/* Common defaults which may be overridden by VBT. */ static void init_vbt_defaults(struct drm_i915_private *dev_priv) { @@ -1377,6 +1378,18 @@ init_vbt_defaults(struct drm_i915_private *dev_priv) &dev_priv->vbt.ddi_port_info[port]; info->hdmi_level_shift = HDMI_LEVEL_SHIFT_UNKNOWN; + } +} + +/* Defaults to initialize only if there is no VBT. */ +static void +init_vbt_missing_defaults(struct drm_i915_private *dev_priv) +{ + enum port port; + + for (port = PORT_A; port < I915_MAX_PORTS; port++) { + struct ddi_vbt_port_info *info = + &dev_priv->vbt.ddi_port_info[port]; info->supports_dvi = (port != PORT_A && port != PORT_E); info->supports_hdmi = info->supports_dvi; @@ -1462,36 +1475,35 @@ static const struct vbt_header *find_vbt(void __iomem *bios, size_t size) * intel_bios_init - find VBT and initialize settings from the BIOS * @dev_priv: i915 device instance * - * Loads the Video BIOS and checks that the VBT exists. Sets scratch registers - * to appropriate values. - * - * Returns 0 on success, nonzero on failure. + * Parse and initialize settings from the Video BIOS Tables (VBT). If the VBT + * was not found in ACPI OpRegion, try to find it in PCI ROM first. Also + * initialize some defaults if the VBT is not present at all. */ -int -intel_bios_init(struct drm_i915_private *dev_priv) +void intel_bios_init(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; const struct vbt_header *vbt = dev_priv->opregion.vbt; const struct bdb_header *bdb; u8 __iomem *bios = NULL; - if (HAS_PCH_NOP(dev_priv)) - return -ENODEV; + if (HAS_PCH_NOP(dev_priv)) { + DRM_DEBUG_KMS("Skipping VBT init due to disabled display.\n"); + return; + } init_vbt_defaults(dev_priv); + /* If the OpRegion does not have VBT, look in PCI ROM. */ if (!vbt) { size_t size; bios = pci_map_rom(pdev, &size); if (!bios) - return -1; + goto out; vbt = find_vbt(bios, size); - if (!vbt) { - pci_unmap_rom(pdev, bios); - return -1; - } + if (!vbt) + goto out; DRM_DEBUG_KMS("Found valid VBT in PCI ROM\n"); } @@ -1516,10 +1528,14 @@ intel_bios_init(struct drm_i915_private *dev_priv) parse_mipi_sequence(dev_priv, bdb); parse_ddi_ports(dev_priv, bdb); +out: + if (!vbt) { + DRM_INFO("Failed to find VBIOS tables (VBT)\n"); + init_vbt_missing_defaults(dev_priv); + } + if (bios) pci_unmap_rom(pdev, bios); - - return 0; } /** diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 7044e9a6abf7..9ccbf26124c6 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -27,22 +27,105 @@ #include "i915_drv.h" +static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) +{ + struct intel_wait *wait; + unsigned int result = 0; + + lockdep_assert_held(&b->irq_lock); + + wait = b->irq_wait; + if (wait) { + result = ENGINE_WAKEUP_WAITER; + if (wake_up_process(wait->tsk)) + result |= ENGINE_WAKEUP_ASLEEP; + } + + return result; +} + +unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; + unsigned int result; + + spin_lock_irqsave(&b->irq_lock, flags); + result = __intel_breadcrumbs_wakeup(b); + spin_unlock_irqrestore(&b->irq_lock, flags); + + return result; +} + +static unsigned long wait_timeout(void) +{ + return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); +} + +static noinline void missed_breadcrumb(struct intel_engine_cs *engine) +{ + DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s\n", + engine->name, __builtin_return_address(0), + yesno(test_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted))); + + set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); +} + static void intel_breadcrumbs_hangcheck(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; struct intel_breadcrumbs *b = &engine->breadcrumbs; - if (!b->irq_enabled) + if (!b->irq_armed) return; - if (time_before(jiffies, b->timeout)) { - mod_timer(&b->hangcheck, b->timeout); + if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) { + b->hangcheck_interrupts = atomic_read(&engine->irq_count); + mod_timer(&b->hangcheck, wait_timeout()); return; } - DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); - set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + /* We keep the hangcheck timer alive until we disarm the irq, even + * if there are no waiters at present. + * + * If the waiter was currently running, assume it hasn't had a chance + * to process the pending interrupt (e.g, low priority task on a loaded + * system) and wait until it sleeps before declaring a missed interrupt. + * + * If the waiter was asleep (and not even pending a wakeup), then we + * must have missed an interrupt as the GPU has stopped advancing + * but we still have a waiter. Assuming all batches complete within + * DRM_I915_HANGCHECK_JIFFIES [1.5s]! + */ + if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { + missed_breadcrumb(engine); + mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + } else { + mod_timer(&b->hangcheck, wait_timeout()); + } +} + +static void intel_breadcrumbs_fake_irq(unsigned long data) +{ + struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + /* The timer persists in case we cannot enable interrupts, + * or if we have previously seen seqno/interrupt incoherency + * ("missed interrupt" syndrome, better known as a "missed breadcrumb"). + * Here the worker will wake up every jiffie in order to kick the + * oldest waiter to do the coherent seqno check. + */ + + spin_lock_irq(&b->irq_lock); + if (!__intel_breadcrumbs_wakeup(b)) + __intel_engine_disarm_breadcrumbs(engine); + spin_unlock_irq(&b->irq_lock); + if (!b->irq_armed) + return; + + mod_timer(&b->fake_irq, jiffies + 1); /* Ensure that even if the GPU hangs, we get woken up. * @@ -56,33 +139,13 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) i915_queue_hangcheck(engine->i915); } -static unsigned long wait_timeout(void) -{ - return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); -} - -static void intel_breadcrumbs_fake_irq(unsigned long data) -{ - struct intel_engine_cs *engine = (struct intel_engine_cs *)data; - - /* - * The timer persists in case we cannot enable interrupts, - * or if we have previously seen seqno/interrupt incoherency - * ("missed interrupt" syndrome). Here the worker will wake up - * every jiffie in order to kick the oldest waiter to do the - * coherent seqno check. - */ - if (intel_engine_wakeup(engine)) - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); -} - static void irq_enable(struct intel_engine_cs *engine) { /* Enabling the IRQ may miss the generation of the interrupt, but * we still need to force the barrier before reading the seqno, * just in case. */ - engine->breadcrumbs.irq_posted = true; + set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); /* Caller disables interrupts */ spin_lock(&engine->i915->irq_lock); @@ -96,61 +159,123 @@ static void irq_disable(struct intel_engine_cs *engine) spin_lock(&engine->i915->irq_lock); engine->irq_disable(engine); spin_unlock(&engine->i915->irq_lock); +} - engine->breadcrumbs.irq_posted = false; +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + lockdep_assert_held(&b->irq_lock); + GEM_BUG_ON(b->irq_wait); + + if (b->irq_enabled) { + irq_disable(engine); + b->irq_enabled = false; + } + + b->irq_armed = false; } -static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); - struct drm_i915_private *i915 = engine->i915; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_wait *wait, *n, *first; - assert_spin_locked(&b->lock); - if (b->rpm_wakelock) + if (!b->irq_armed) return; - /* Since we are waiting on a request, the GPU should be busy - * and should have its own rpm reference. For completeness, - * record an rpm reference for ourselves to cover the - * interrupt we unmask. + /* We only disarm the irq when we are idle (all requests completed), + * so if the bottom-half remains asleep, it missed the request + * completion. */ - intel_runtime_pm_get_noresume(i915); - b->rpm_wakelock = true; - /* No interrupts? Kick the waiter every jiffie! */ - if (intel_irqs_enabled(i915)) { - if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) - irq_enable(engine); - b->irq_enabled = true; + spin_lock_irq(&b->rb_lock); + + spin_lock(&b->irq_lock); + first = fetch_and_zero(&b->irq_wait); + __intel_engine_disarm_breadcrumbs(engine); + spin_unlock(&b->irq_lock); + + rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { + RB_CLEAR_NODE(&wait->node); + if (wake_up_process(wait->tsk) && wait == first) + missed_breadcrumb(engine); } + b->waiters = RB_ROOT; + + spin_unlock_irq(&b->rb_lock); +} - if (!b->irq_enabled || - test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { +static bool use_fake_irq(const struct intel_breadcrumbs *b) +{ + const struct intel_engine_cs *engine = + container_of(b, struct intel_engine_cs, breadcrumbs); + + if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) + return false; + + /* Only start with the heavy weight fake irq timer if we have not + * seen any interrupts since enabling it the first time. If the + * interrupts are still arriving, it means we made a mistake in our + * engine->seqno_barrier(), a timing error that should be transient + * and unlikely to reoccur. + */ + return atomic_read(&engine->irq_count) == b->hangcheck_interrupts; +} + +static void enable_fake_irq(struct intel_breadcrumbs *b) +{ + /* Ensure we never sleep indefinitely */ + if (!b->irq_enabled || use_fake_irq(b)) mod_timer(&b->fake_irq, jiffies + 1); - } else { - /* Ensure we never sleep indefinitely */ - GEM_BUG_ON(!time_after(b->timeout, jiffies)); - mod_timer(&b->hangcheck, b->timeout); - } + else + mod_timer(&b->hangcheck, wait_timeout()); } -static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) +static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); + struct drm_i915_private *i915 = engine->i915; - assert_spin_locked(&b->lock); - if (!b->rpm_wakelock) + lockdep_assert_held(&b->irq_lock); + if (b->irq_armed) return; - if (b->irq_enabled) { - irq_disable(engine); - b->irq_enabled = false; + /* The breadcrumb irq will be disarmed on the interrupt after the + * waiters are signaled. This gives us a single interrupt window in + * which we can add a new waiter and avoid the cost of re-enabling + * the irq. + */ + b->irq_armed = true; + GEM_BUG_ON(b->irq_enabled); + + if (I915_SELFTEST_ONLY(b->mock)) { + /* For our mock objects we want to avoid interaction + * with the real hardware (which is not set up). So + * we simply pretend we have enabled the powerwell + * and the irq, and leave it up to the mock + * implementation to call intel_engine_wakeup() + * itself when it wants to simulate a user interrupt, + */ + return; + } + + /* Since we are waiting on a request, the GPU should be busy + * and should have its own rpm reference. This is tracked + * by i915->gt.awake, we can forgo holding our own wakref + * for the interrupt as before i915->gt.awake is released (when + * the driver is idle) we disarm the breadcrumbs. + */ + + /* No interrupts? Kick the waiter every jiffie! */ + if (intel_irqs_enabled(i915)) { + if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) + irq_enable(engine); + b->irq_enabled = true; } - intel_runtime_pm_put(engine->i915); - b->rpm_wakelock = false; + enable_fake_irq(b); } static inline struct intel_wait *to_wait(struct rb_node *node) @@ -161,10 +286,16 @@ static inline struct intel_wait *to_wait(struct rb_node *node) static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, struct intel_wait *wait) { - assert_spin_locked(&b->lock); + lockdep_assert_held(&b->rb_lock); + GEM_BUG_ON(b->irq_wait == wait); /* This request is completed, so remove it from the tree, mark it as - * complete, and *then* wake up the associated task. + * complete, and *then* wake up the associated task. N.B. when the + * task wakes up, it will find the empty rb_node, discern that it + * has already been removed from the tree and skip the serialisation + * of the b->rb_lock and b->irq_lock. This means that the destruction + * of the intel_wait is not serialised with the interrupt handler + * by the waiter - it must instead be serialised by the caller. */ rb_erase(&wait->node, &b->waiters); RB_CLEAR_NODE(&wait->node); @@ -172,6 +303,25 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, wake_up_process(wait->tsk); /* implicit smp_wmb() */ } +static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, + struct rb_node *next) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + spin_lock(&b->irq_lock); + GEM_BUG_ON(!b->irq_armed); + GEM_BUG_ON(!b->irq_wait); + b->irq_wait = to_wait(next); + spin_unlock(&b->irq_lock); + + /* We always wake up the next waiter that takes over as the bottom-half + * as we may delegate not only the irq-seqno barrier to the next waiter + * but also the task of waking up concurrent waiters. + */ + if (next) + wake_up_process(to_wait(next)->tsk); +} + static bool __intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait) { @@ -235,44 +385,10 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, } rb_link_node(&wait->node, parent, p); rb_insert_color(&wait->node, &b->waiters); - GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh)); - - if (completed) { - struct rb_node *next = rb_next(completed); - - GEM_BUG_ON(!next && !first); - if (next && next != &wait->node) { - GEM_BUG_ON(first); - b->timeout = wait_timeout(); - b->first_wait = to_wait(next); - rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); - /* As there is a delay between reading the current - * seqno, processing the completed tasks and selecting - * the next waiter, we may have missed the interrupt - * and so need for the next bottom-half to wakeup. - * - * Also as we enable the IRQ, we may miss the - * interrupt for that seqno, so we have to wake up - * the next bottom-half in order to do a coherent check - * in case the seqno passed. - */ - __intel_breadcrumbs_enable_irq(b); - if (READ_ONCE(b->irq_posted)) - wake_up_process(to_wait(next)->tsk); - } - - do { - struct intel_wait *crumb = to_wait(completed); - completed = rb_prev(completed); - __intel_breadcrumbs_finish(b, crumb); - } while (completed); - } if (first) { - GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); - b->timeout = wait_timeout(); - b->first_wait = wait; - rcu_assign_pointer(b->irq_seqno_bh, wait->tsk); + spin_lock(&b->irq_lock); + b->irq_wait = wait; /* After assigning ourselves as the new bottom-half, we must * perform a cursory check to prevent a missed interrupt. * Either we miss the interrupt whilst programming the hardware, @@ -282,10 +398,31 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, * and so we miss the wake up. */ __intel_breadcrumbs_enable_irq(b); + spin_unlock(&b->irq_lock); + } + + if (completed) { + /* Advance the bottom-half (b->irq_wait) before we wake up + * the waiters who may scribble over their intel_wait + * just as the interrupt handler is dereferencing it via + * b->irq_wait. + */ + if (!first) { + struct rb_node *next = rb_next(completed); + GEM_BUG_ON(next == &wait->node); + __intel_breadcrumbs_next(engine, next); + } + + do { + struct intel_wait *crumb = to_wait(completed); + completed = rb_prev(completed); + __intel_breadcrumbs_finish(b, crumb); + } while (completed); } - GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh)); - GEM_BUG_ON(!b->first_wait); - GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); + + GEM_BUG_ON(!b->irq_wait); + GEM_BUG_ON(!b->irq_armed); + GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node); return first; } @@ -296,9 +433,9 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_breadcrumbs *b = &engine->breadcrumbs; bool first; - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); first = __intel_engine_add_wait(engine, wait); - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); return first; } @@ -317,29 +454,20 @@ static inline int wakeup_priority(struct intel_breadcrumbs *b, return tsk->prio; } -void intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) +static void __intel_engine_remove_wait(struct intel_engine_cs *engine, + struct intel_wait *wait) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - /* Quick check to see if this waiter was already decoupled from - * the tree by the bottom-half to avoid contention on the spinlock - * by the herd. - */ - if (RB_EMPTY_NODE(&wait->node)) - return; - - spin_lock_irq(&b->lock); + lockdep_assert_held(&b->rb_lock); if (RB_EMPTY_NODE(&wait->node)) - goto out_unlock; + goto out; - if (b->first_wait == wait) { + if (b->irq_wait == wait) { const int priority = wakeup_priority(b, wait->tsk); struct rb_node *next; - GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk); - /* We are the current bottom-half. Find the next candidate, * the first waiter in the queue on the remaining oldest * request. As multiple seqnos may complete in the time it @@ -372,25 +500,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, } } - if (next) { - /* In our haste, we may have completed the first waiter - * before we enabled the interrupt. Do so now as we - * have a second waiter for a future seqno. Afterwards, - * we have to wake up that waiter in case we missed - * the interrupt, or if we have to handle an - * exception rather than a seqno completion. - */ - b->timeout = wait_timeout(); - b->first_wait = to_wait(next); - rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); - if (b->first_wait->seqno != wait->seqno) - __intel_breadcrumbs_enable_irq(b); - wake_up_process(b->first_wait->tsk); - } else { - b->first_wait = NULL; - rcu_assign_pointer(b->irq_seqno_bh, NULL); - __intel_breadcrumbs_disable_irq(b); - } + __intel_breadcrumbs_next(engine, next); } else { GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); } @@ -398,15 +508,37 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); rb_erase(&wait->node, &b->waiters); -out_unlock: - GEM_BUG_ON(b->first_wait == wait); +out: + GEM_BUG_ON(b->irq_wait == wait); GEM_BUG_ON(rb_first(&b->waiters) != - (b->first_wait ? &b->first_wait->node : NULL)); - GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters)); - spin_unlock_irq(&b->lock); + (b->irq_wait ? &b->irq_wait->node : NULL)); +} + +void intel_engine_remove_wait(struct intel_engine_cs *engine, + struct intel_wait *wait) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + /* Quick check to see if this waiter was already decoupled from + * the tree by the bottom-half to avoid contention on the spinlock + * by the herd. + */ + if (RB_EMPTY_NODE(&wait->node)) { + GEM_BUG_ON(READ_ONCE(b->irq_wait) == wait); + return; + } + + spin_lock_irq(&b->rb_lock); + __intel_engine_remove_wait(engine, wait); + spin_unlock_irq(&b->rb_lock); } -static bool signal_complete(struct drm_i915_gem_request *request) +static bool signal_valid(const struct drm_i915_gem_request *request) +{ + return intel_wait_check_request(&request->signaling.wait, request); +} + +static bool signal_complete(const struct drm_i915_gem_request *request) { if (!request) return false; @@ -415,7 +547,7 @@ static bool signal_complete(struct drm_i915_gem_request *request) * signalled that this wait is already completed. */ if (intel_wait_complete(&request->signaling.wait)) - return true; + return signal_valid(request); /* Carefully check if the request is complete, giving time for the * seqno to be visible or if the GPU hung. @@ -448,6 +580,8 @@ static int intel_breadcrumbs_signaler(void *arg) signaler_set_rtpriority(); do { + bool do_schedule = true; + set_current_state(TASK_INTERRUPTIBLE); /* We are either woken up by the interrupt bottom-half, @@ -458,40 +592,73 @@ static int intel_breadcrumbs_signaler(void *arg) * need to wait for a new interrupt from the GPU or for * a new client. */ - request = READ_ONCE(b->first_signal); + rcu_read_lock(); + request = rcu_dereference(b->first_signal); + if (request) + request = i915_gem_request_get_rcu(request); + rcu_read_unlock(); if (signal_complete(request)) { - /* Wake up all other completed waiters and select the - * next bottom-half for the next user interrupt. - */ - intel_engine_remove_wait(engine, - &request->signaling.wait); - local_bh_disable(); dma_fence_signal(&request->fence); local_bh_enable(); /* kick start the tasklets */ + spin_lock_irq(&b->rb_lock); + + /* Wake up all other completed waiters and select the + * next bottom-half for the next user interrupt. + */ + __intel_engine_remove_wait(engine, + &request->signaling.wait); + /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may * have installed an even older signal than the one * we just completed - so double check we are still * the oldest before picking the next one. */ - spin_lock_irq(&b->lock); - if (request == b->first_signal) { + if (request == rcu_access_pointer(b->first_signal)) { struct rb_node *rb = rb_next(&request->signaling.node); - b->first_signal = rb ? to_signaler(rb) : NULL; + rcu_assign_pointer(b->first_signal, + rb ? to_signaler(rb) : NULL); } rb_erase(&request->signaling.node, &b->signals); - spin_unlock_irq(&b->lock); + RB_CLEAR_NODE(&request->signaling.node); + + spin_unlock_irq(&b->rb_lock); i915_gem_request_put(request); - } else { - if (kthread_should_stop()) + + /* If the engine is saturated we may be continually + * processing completed requests. This angers the + * NMI watchdog if we never let anything else + * have access to the CPU. Let's pretend to be nice + * and relinquish the CPU if we burn through the + * entire RT timeslice! + */ + do_schedule = need_resched(); + } + + if (unlikely(do_schedule)) { + DEFINE_WAIT(exec); + + if (kthread_should_park()) + kthread_parkme(); + + if (kthread_should_stop()) { + GEM_BUG_ON(request); break; + } + + if (request) + add_wait_queue(&request->execute, &exec); schedule(); + + if (request) + remove_wait_queue(&request->execute, &exec); } + i915_gem_request_put(request); } while (1); __set_current_state(TASK_RUNNING); @@ -504,24 +671,29 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node *parent, **p; bool first, wakeup; + u32 seqno; /* Note that we may be called from an interrupt handler on another * device (e.g. nouveau signaling a fence completion causing us * to submit a request, and so enable signaling). As such, - * we need to make sure that all other users of b->lock protect + * we need to make sure that all other users of b->rb_lock protect * against interrupts, i.e. use spin_lock_irqsave. */ /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ - assert_spin_locked(&request->lock); - if (!request->global_seqno) + GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&request->lock); + + seqno = i915_gem_request_global_seqno(request); + if (!seqno) return; request->signaling.wait.tsk = b->signaler; - request->signaling.wait.seqno = request->global_seqno; + request->signaling.wait.request = request; + request->signaling.wait.seqno = seqno; i915_gem_request_get(request); - spin_lock(&b->lock); + spin_lock(&b->rb_lock); /* First add ourselves into the list of waiters, but register our * bottom-half as the signaller thread. As per usual, only the oldest @@ -542,8 +714,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) p = &b->signals.rb_node; while (*p) { parent = *p; - if (i915_seqno_passed(request->global_seqno, - to_signaler(parent)->global_seqno)) { + if (i915_seqno_passed(seqno, + to_signaler(parent)->signaling.wait.seqno)) { p = &parent->rb_right; first = false; } else { @@ -553,20 +725,52 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) rb_link_node(&request->signaling.node, parent, p); rb_insert_color(&request->signaling.node, &b->signals); if (first) - smp_store_mb(b->first_signal, request); + rcu_assign_pointer(b->first_signal, request); - spin_unlock(&b->lock); + spin_unlock(&b->rb_lock); if (wakeup) wake_up_process(b->signaler); } +void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&request->lock); + GEM_BUG_ON(!request->signaling.wait.seqno); + + spin_lock(&b->rb_lock); + + if (!RB_EMPTY_NODE(&request->signaling.node)) { + if (request == rcu_access_pointer(b->first_signal)) { + struct rb_node *rb = + rb_next(&request->signaling.node); + rcu_assign_pointer(b->first_signal, + rb ? to_signaler(rb) : NULL); + } + rb_erase(&request->signaling.node, &b->signals); + RB_CLEAR_NODE(&request->signaling.node); + i915_gem_request_put(request); + } + + __intel_engine_remove_wait(engine, &request->signaling.wait); + + spin_unlock(&b->rb_lock); + + request->signaling.wait.seqno = 0; +} + int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct task_struct *tsk; - spin_lock_init(&b->lock); + spin_lock_init(&b->rb_lock); + spin_lock_init(&b->irq_lock); + setup_timer(&b->fake_irq, intel_breadcrumbs_fake_irq, (unsigned long)engine); @@ -604,20 +808,26 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; cancel_fake_irq(engine); - spin_lock_irq(&b->lock); + spin_lock_irq(&b->irq_lock); - __intel_breadcrumbs_disable_irq(b); - if (intel_engine_has_waiter(engine)) { - b->timeout = wait_timeout(); - __intel_breadcrumbs_enable_irq(b); - if (READ_ONCE(b->irq_posted)) - wake_up_process(b->first_wait->tsk); - } else { - /* sanitize the IMR and unmask any auxiliary interrupts */ + if (b->irq_enabled) + irq_enable(engine); + else irq_disable(engine); - } - spin_unlock_irq(&b->lock); + /* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the + * GPU is active and may have already executed the MI_USER_INTERRUPT + * before the CPU is ready to receive. However, the engine is currently + * idle (we haven't started it yet), there is no possibility for a + * missed interrupt as we enabled the irq and so we can clear the + * immediate wakeup (until a real interrupt arrives for the waiter). + */ + clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); + + if (b->irq_armed) + enable_fake_irq(b); + + spin_unlock_irq(&b->irq_lock); } void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) @@ -625,9 +835,9 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; /* The engines should be idle and all requests accounted for! */ - WARN_ON(READ_ONCE(b->first_wait)); + WARN_ON(READ_ONCE(b->irq_wait)); WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); - WARN_ON(READ_ONCE(b->first_signal)); + WARN_ON(rcu_access_pointer(b->first_signal)); WARN_ON(!RB_EMPTY_ROOT(&b->signals)); if (!IS_ERR_OR_NULL(b->signaler)) @@ -636,29 +846,28 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) cancel_fake_irq(engine); } -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915) +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int mask = 0; - - for_each_engine(engine, i915, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock_irq(&b->lock); + struct intel_breadcrumbs *b = &engine->breadcrumbs; + bool busy = false; - if (b->first_wait) { - wake_up_process(b->first_wait->tsk); - mask |= intel_engine_flag(engine); - } + spin_lock_irq(&b->rb_lock); - if (b->first_signal) { - wake_up_process(b->signaler); - mask |= intel_engine_flag(engine); - } + if (b->irq_wait) { + wake_up_process(b->irq_wait->tsk); + busy = true; + } - spin_unlock_irq(&b->lock); + if (rcu_access_pointer(b->first_signal)) { + wake_up_process(b->signaler); + busy = true; } - return mask; + spin_unlock_irq(&b->rb_lock); + + return busy; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_breadcrumbs.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c new file mode 100644 index 000000000000..dd3ad52b7dfe --- /dev/null +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -0,0 +1,1908 @@ +/* + * Copyright © 2006-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "intel_drv.h" + +/** + * DOC: CDCLK / RAWCLK + * + * The display engine uses several different clocks to do its work. There + * are two main clocks involved that aren't directly related to the actual + * pixel clock or any symbol/bit clock of the actual output port. These + * are the core display clock (CDCLK) and RAWCLK. + * + * CDCLK clocks most of the display pipe logic, and thus its frequency + * must be high enough to support the rate at which pixels are flowing + * through the pipes. Downscaling must also be accounted as that increases + * the effective pixel rate. + * + * On several platforms the CDCLK frequency can be changed dynamically + * to minimize power consumption for a given display configuration. + * Typically changes to the CDCLK frequency require all the display pipes + * to be shut down while the frequency is being changed. + * + * On SKL+ the DMC will toggle the CDCLK off/on during DC5/6 entry/exit. + * DMC will not change the active CDCLK frequency however, so that part + * will still be performed by the driver directly. + * + * RAWCLK is a fixed frequency clock, often used by various auxiliary + * blocks such as AUX CH or backlight PWM. Hence the only thing we + * really need to know about RAWCLK is its frequency so that various + * dividers can be programmed correctly. + */ + +static void fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 133333; +} + +static void fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 200000; +} + +static void fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 266667; +} + +static void fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 333333; +} + +static void fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 400000; +} + +static void fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 450000; +} + +static void i85x_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 hpllcc = 0; + + /* + * 852GM/852GMV only supports 133 MHz and the HPLLCC + * encoding is different :( + * FIXME is this the right way to detect 852GM/852GMV? + */ + if (pdev->revision == 0x1) { + cdclk_state->cdclk = 133333; + return; + } + + pci_bus_read_config_word(pdev->bus, + PCI_DEVFN(0, 3), HPLLCC, &hpllcc); + + /* Assume that the hardware is in the high speed state. This + * should be the default. + */ + switch (hpllcc & GC_CLOCK_CONTROL_MASK) { + case GC_CLOCK_133_200: + case GC_CLOCK_133_200_2: + case GC_CLOCK_100_200: + cdclk_state->cdclk = 200000; + break; + case GC_CLOCK_166_250: + cdclk_state->cdclk = 250000; + break; + case GC_CLOCK_100_133: + cdclk_state->cdclk = 133333; + break; + case GC_CLOCK_133_266: + case GC_CLOCK_133_266_2: + case GC_CLOCK_166_266: + cdclk_state->cdclk = 266667; + break; + } +} + +static void i915gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) { + cdclk_state->cdclk = 133333; + return; + } + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_320_MHZ: + cdclk_state->cdclk = 333333; + break; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + cdclk_state->cdclk = 190000; + break; + } +} + +static void i945gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) { + cdclk_state->cdclk = 133333; + return; + } + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_320_MHZ: + cdclk_state->cdclk = 320000; + break; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + cdclk_state->cdclk = 200000; + break; + } +} + +static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) +{ + static const unsigned int blb_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + [4] = 6400000, + }; + static const unsigned int pnv_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + [4] = 2666667, + }; + static const unsigned int cl_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 6400000, + [4] = 3333333, + [5] = 3566667, + [6] = 4266667, + }; + static const unsigned int elk_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + }; + static const unsigned int ctg_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 6400000, + [4] = 2666667, + [5] = 4266667, + }; + const unsigned int *vco_table; + unsigned int vco; + uint8_t tmp = 0; + + /* FIXME other chipsets? */ + if (IS_GM45(dev_priv)) + vco_table = ctg_vco; + else if (IS_G45(dev_priv)) + vco_table = elk_vco; + else if (IS_I965GM(dev_priv)) + vco_table = cl_vco; + else if (IS_PINEVIEW(dev_priv)) + vco_table = pnv_vco; + else if (IS_G33(dev_priv)) + vco_table = blb_vco; + else + return 0; + + tmp = I915_READ(IS_MOBILE(dev_priv) ? HPLLVCO_MOBILE : HPLLVCO); + + vco = vco_table[tmp & 0x7]; + if (vco == 0) + DRM_ERROR("Bad HPLL VCO (HPLLVCO=0x%02x)\n", tmp); + else + DRM_DEBUG_KMS("HPLL VCO %u kHz\n", vco); + + return vco; +} + +static void g33_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; + static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; + static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; + static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; + const uint8_t *div_table; + unsigned int cdclk_sel; + uint16_t tmp = 0; + + cdclk_state->vco = intel_hpll_vco(dev_priv); + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = (tmp >> 4) & 0x7; + + if (cdclk_sel >= ARRAY_SIZE(div_3200)) + goto fail; + + switch (cdclk_state->vco) { + case 3200000: + div_table = div_3200; + break; + case 4000000: + div_table = div_4000; + break; + case 4800000: + div_table = div_4800; + break; + case 5333333: + div_table = div_5333; + break; + default: + goto fail; + } + + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, + div_table[cdclk_sel]); + return; + +fail: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%08x\n", + cdclk_state->vco, tmp); + cdclk_state->cdclk = 190476; +} + +static void pnv_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_267_MHZ_PNV: + cdclk_state->cdclk = 266667; + break; + case GC_DISPLAY_CLOCK_333_MHZ_PNV: + cdclk_state->cdclk = 333333; + break; + case GC_DISPLAY_CLOCK_444_MHZ_PNV: + cdclk_state->cdclk = 444444; + break; + case GC_DISPLAY_CLOCK_200_MHZ_PNV: + cdclk_state->cdclk = 200000; + break; + default: + DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); + case GC_DISPLAY_CLOCK_133_MHZ_PNV: + cdclk_state->cdclk = 133333; + break; + case GC_DISPLAY_CLOCK_167_MHZ_PNV: + cdclk_state->cdclk = 166667; + break; + } +} + +static void i965gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + static const uint8_t div_3200[] = { 16, 10, 8 }; + static const uint8_t div_4000[] = { 20, 12, 10 }; + static const uint8_t div_5333[] = { 24, 16, 14 }; + const uint8_t *div_table; + unsigned int cdclk_sel; + uint16_t tmp = 0; + + cdclk_state->vco = intel_hpll_vco(dev_priv); + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = ((tmp >> 8) & 0x1f) - 1; + + if (cdclk_sel >= ARRAY_SIZE(div_3200)) + goto fail; + + switch (cdclk_state->vco) { + case 3200000: + div_table = div_3200; + break; + case 4000000: + div_table = div_4000; + break; + case 5333333: + div_table = div_5333; + break; + default: + goto fail; + } + + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, + div_table[cdclk_sel]); + return; + +fail: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%04x\n", + cdclk_state->vco, tmp); + cdclk_state->cdclk = 200000; +} + +static void gm45_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + unsigned int cdclk_sel; + uint16_t tmp = 0; + + cdclk_state->vco = intel_hpll_vco(dev_priv); + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = (tmp >> 12) & 0x1; + + switch (cdclk_state->vco) { + case 2666667: + case 4000000: + case 5333333: + cdclk_state->cdclk = cdclk_sel ? 333333 : 222222; + break; + case 3200000: + cdclk_state->cdclk = cdclk_sel ? 320000 : 228571; + break; + default: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u, CFGC=0x%04x\n", + cdclk_state->vco, tmp); + cdclk_state->cdclk = 222222; + break; + } +} + +static void hsw_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + cdclk_state->cdclk = 800000; + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + cdclk_state->cdclk = 450000; + else if (freq == LCPLL_CLK_FREQ_450) + cdclk_state->cdclk = 450000; + else if (IS_HSW_ULT(dev_priv)) + cdclk_state->cdclk = 337500; + else + cdclk_state->cdclk = 540000; +} + +static int vlv_calc_cdclk(struct drm_i915_private *dev_priv, + int max_pixclk) +{ + int freq_320 = (dev_priv->hpll_freq << 1) % 320000 != 0 ? + 333333 : 320000; + int limit = IS_CHERRYVIEW(dev_priv) ? 95 : 90; + + /* + * We seem to get an unstable or solid color picture at 200MHz. + * Not sure what's wrong. For now use 200MHz only when all pipes + * are off. + */ + if (!IS_CHERRYVIEW(dev_priv) && + max_pixclk > freq_320*limit/100) + return 400000; + else if (max_pixclk > 266667*limit/100) + return freq_320; + else if (max_pixclk > 0) + return 266667; + else + return 200000; +} + +static void vlv_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->vco = vlv_get_hpll_vco(dev_priv); + cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk", + CCK_DISPLAY_CLOCK_CONTROL, + cdclk_state->vco); +} + +static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) +{ + unsigned int credits, default_credits; + + if (IS_CHERRYVIEW(dev_priv)) + default_credits = PFI_CREDIT(12); + else + default_credits = PFI_CREDIT(8); + + if (dev_priv->cdclk.hw.cdclk >= dev_priv->czclk_freq) { + /* CHV suggested value is 31 or 63 */ + if (IS_CHERRYVIEW(dev_priv)) + credits = PFI_CREDIT_63; + else + credits = PFI_CREDIT(15); + } else { + credits = default_credits; + } + + /* + * WA - write default credits before re-programming + * FIXME: should we also set the resend bit here? + */ + I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | + default_credits); + + I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | + credits | PFI_CREDIT_RESEND); + + /* + * FIXME is this guaranteed to clear + * immediately or should we poll for it? + */ + WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); +} + +static void vlv_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + u32 val, cmd; + + if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ + cmd = 2; + else if (cdclk == 266667) + cmd = 1; + else + cmd = 0; + + mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + val &= ~DSPFREQGUAR_MASK; + val |= (cmd << DSPFREQGUAR_SHIFT); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & + DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), + 50)) { + DRM_ERROR("timed out waiting for CDclk change\n"); + } + mutex_unlock(&dev_priv->rps.hw_lock); + + mutex_lock(&dev_priv->sb_lock); + + if (cdclk == 400000) { + u32 divider; + + divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, + cdclk) - 1; + + /* adjust cdclk divider */ + val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); + val &= ~CCK_FREQUENCY_VALUES; + val |= divider; + vlv_cck_write(dev_priv, CCK_DISPLAY_CLOCK_CONTROL, val); + + if (wait_for((vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL) & + CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), + 50)) + DRM_ERROR("timed out waiting for CDclk change\n"); + } + + /* adjust self-refresh exit latency value */ + val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC); + val &= ~0x7f; + + /* + * For high bandwidth configs, we set a higher latency in the bunit + * so that the core display fetch happens in time to avoid underruns. + */ + if (cdclk == 400000) + val |= 4500 / 250; /* 4.5 usec */ + else + val |= 3000 / 250; /* 3.0 usec */ + vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); + + mutex_unlock(&dev_priv->sb_lock); + + intel_update_cdclk(dev_priv); + + vlv_program_pfi_credits(dev_priv); +} + +static void chv_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + u32 val, cmd; + + switch (cdclk) { + case 333333: + case 320000: + case 266667: + case 200000: + break; + default: + MISSING_CASE(cdclk); + return; + } + + /* + * Specs are full of misinformation, but testing on actual + * hardware has shown that we just need to write the desired + * CCK divider into the Punit register. + */ + cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; + + mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + val &= ~DSPFREQGUAR_MASK_CHV; + val |= (cmd << DSPFREQGUAR_SHIFT_CHV); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & + DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), + 50)) { + DRM_ERROR("timed out waiting for CDclk change\n"); + } + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_update_cdclk(dev_priv); + + vlv_program_pfi_credits(dev_priv); +} + +static int bdw_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 540000) + return 675000; + else if (max_pixclk > 450000) + return 540000; + else if (max_pixclk > 337500) + return 450000; + else + return 337500; +} + +static void bdw_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + cdclk_state->cdclk = 800000; + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + cdclk_state->cdclk = 450000; + else if (freq == LCPLL_CLK_FREQ_450) + cdclk_state->cdclk = 450000; + else if (freq == LCPLL_CLK_FREQ_54O_BDW) + cdclk_state->cdclk = 540000; + else if (freq == LCPLL_CLK_FREQ_337_5_BDW) + cdclk_state->cdclk = 337500; + else + cdclk_state->cdclk = 675000; +} + +static void bdw_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + uint32_t val, data; + int ret; + + if (WARN((I915_READ(LCPLL_CTL) & + (LCPLL_PLL_DISABLE | LCPLL_PLL_LOCK | + LCPLL_CD_CLOCK_DISABLE | LCPLL_ROOT_CD_CLOCK_DISABLE | + LCPLL_CD2X_CLOCK_DISABLE | LCPLL_POWER_DOWN_ALLOW | + LCPLL_CD_SOURCE_FCLK)) != LCPLL_PLL_LOCK, + "trying to change cdclk frequency with cdclk not enabled\n")) + return; + + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, + BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("failed to inform pcode about cdclk change\n"); + return; + } + + val = I915_READ(LCPLL_CTL); + val |= LCPLL_CD_SOURCE_FCLK; + I915_WRITE(LCPLL_CTL, val); + + if (wait_for_us(I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE, 1)) + DRM_ERROR("Switching to FCLK failed\n"); + + val = I915_READ(LCPLL_CTL); + val &= ~LCPLL_CLK_FREQ_MASK; + + switch (cdclk) { + case 450000: + val |= LCPLL_CLK_FREQ_450; + data = 0; + break; + case 540000: + val |= LCPLL_CLK_FREQ_54O_BDW; + data = 1; + break; + case 337500: + val |= LCPLL_CLK_FREQ_337_5_BDW; + data = 2; + break; + case 675000: + val |= LCPLL_CLK_FREQ_675_BDW; + data = 3; + break; + default: + WARN(1, "invalid cdclk frequency\n"); + return; + } + + I915_WRITE(LCPLL_CTL, val); + + val = I915_READ(LCPLL_CTL); + val &= ~LCPLL_CD_SOURCE_FCLK; + I915_WRITE(LCPLL_CTL, val); + + if (wait_for_us((I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) + DRM_ERROR("Switching back to LCPLL failed\n"); + + mutex_lock(&dev_priv->rps.hw_lock); + sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); + mutex_unlock(&dev_priv->rps.hw_lock); + + I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); + + intel_update_cdclk(dev_priv); + + WARN(cdclk != dev_priv->cdclk.hw.cdclk, + "cdclk requested %d kHz but got %d kHz\n", + cdclk, dev_priv->cdclk.hw.cdclk); +} + +static int skl_calc_cdclk(int max_pixclk, int vco) +{ + if (vco == 8640000) { + if (max_pixclk > 540000) + return 617143; + else if (max_pixclk > 432000) + return 540000; + else if (max_pixclk > 308571) + return 432000; + else + return 308571; + } else { + if (max_pixclk > 540000) + return 675000; + else if (max_pixclk > 450000) + return 540000; + else if (max_pixclk > 337500) + return 450000; + else + return 337500; + } +} + +static void skl_dpll0_update(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 val; + + cdclk_state->ref = 24000; + cdclk_state->vco = 0; + + val = I915_READ(LCPLL1_CTL); + if ((val & LCPLL_PLL_ENABLE) == 0) + return; + + if (WARN_ON((val & LCPLL_PLL_LOCK) == 0)) + return; + + val = I915_READ(DPLL_CTRL1); + + if (WARN_ON((val & (DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | + DPLL_CTRL1_SSC(SKL_DPLL0) | + DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) != + DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) + return; + + switch (val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)) { + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0): + cdclk_state->vco = 8100000; + break; + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0): + cdclk_state->vco = 8640000; + break; + default: + MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); + break; + } +} + +static void skl_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 cdctl; + + skl_dpll0_update(dev_priv, cdclk_state); + + cdclk_state->cdclk = cdclk_state->ref; + + if (cdclk_state->vco == 0) + return; + + cdctl = I915_READ(CDCLK_CTL); + + if (cdclk_state->vco == 8640000) { + switch (cdctl & CDCLK_FREQ_SEL_MASK) { + case CDCLK_FREQ_450_432: + cdclk_state->cdclk = 432000; + break; + case CDCLK_FREQ_337_308: + cdclk_state->cdclk = 308571; + break; + case CDCLK_FREQ_540: + cdclk_state->cdclk = 540000; + break; + case CDCLK_FREQ_675_617: + cdclk_state->cdclk = 617143; + break; + default: + MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + break; + } + } else { + switch (cdctl & CDCLK_FREQ_SEL_MASK) { + case CDCLK_FREQ_450_432: + cdclk_state->cdclk = 450000; + break; + case CDCLK_FREQ_337_308: + cdclk_state->cdclk = 337500; + break; + case CDCLK_FREQ_540: + cdclk_state->cdclk = 540000; + break; + case CDCLK_FREQ_675_617: + cdclk_state->cdclk = 675000; + break; + default: + MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + break; + } + } +} + +/* convert from kHz to .1 fixpoint MHz with -1MHz offset */ +static int skl_cdclk_decimal(int cdclk) +{ + return DIV_ROUND_CLOSEST(cdclk - 1000, 500); +} + +static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, + int vco) +{ + bool changed = dev_priv->skl_preferred_vco_freq != vco; + + dev_priv->skl_preferred_vco_freq = vco; + + if (changed) + intel_update_max_cdclk(dev_priv); +} + +static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) +{ + int min_cdclk = skl_calc_cdclk(0, vco); + u32 val; + + WARN_ON(vco != 8100000 && vco != 8640000); + + /* select the minimum CDCLK before enabling DPLL 0 */ + val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); + I915_WRITE(CDCLK_CTL, val); + POSTING_READ(CDCLK_CTL); + + /* + * We always enable DPLL0 with the lowest link rate possible, but still + * taking into account the VCO required to operate the eDP panel at the + * desired frequency. The usual DP link rates operate with a VCO of + * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640. + * The modeset code is responsible for the selection of the exact link + * rate later on, with the constraint of choosing a frequency that + * works with vco. + */ + val = I915_READ(DPLL_CTRL1); + + val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | + DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); + val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); + if (vco == 8640000) + val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, + SKL_DPLL0); + else + val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, + SKL_DPLL0); + + I915_WRITE(DPLL_CTRL1, val); + POSTING_READ(DPLL_CTRL1); + + I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE); + + if (intel_wait_for_register(dev_priv, + LCPLL1_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, + 5)) + DRM_ERROR("DPLL0 not locked\n"); + + dev_priv->cdclk.hw.vco = vco; + + /* We'll want to keep using the current vco from now on. */ + skl_set_preferred_cdclk_vco(dev_priv, vco); +} + +static void skl_dpll0_disable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); + if (intel_wait_for_register(dev_priv, + LCPLL1_CTL, LCPLL_PLL_LOCK, 0, + 1)) + DRM_ERROR("Couldn't disable DPLL0\n"); + + dev_priv->cdclk.hw.vco = 0; +} + +static void skl_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + int vco = cdclk_state->vco; + u32 freq_select, pcu_ack; + int ret; + + WARN_ON((cdclk == 24000) != (vco == 0)); + + mutex_lock(&dev_priv->rps.hw_lock); + ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", + ret); + return; + } + + /* set CDCLK_CTL */ + switch (cdclk) { + case 450000: + case 432000: + freq_select = CDCLK_FREQ_450_432; + pcu_ack = 1; + break; + case 540000: + freq_select = CDCLK_FREQ_540; + pcu_ack = 2; + break; + case 308571: + case 337500: + default: + freq_select = CDCLK_FREQ_337_308; + pcu_ack = 0; + break; + case 617143: + case 675000: + freq_select = CDCLK_FREQ_675_617; + pcu_ack = 3; + break; + } + + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + skl_dpll0_disable(dev_priv); + + if (dev_priv->cdclk.hw.vco != vco) + skl_dpll0_enable(dev_priv, vco); + + I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); + POSTING_READ(CDCLK_CTL); + + /* inform PCU of the change */ + mutex_lock(&dev_priv->rps.hw_lock); + sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_update_cdclk(dev_priv); +} + +static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) +{ + uint32_t cdctl, expected; + + /* + * check if the pre-os initialized the display + * There is SWF18 scratchpad register defined which is set by the + * pre-os which can be used by the OS drivers to check the status + */ + if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0) + goto sanitize; + + intel_update_cdclk(dev_priv); + /* Is PLL enabled and locked ? */ + if (dev_priv->cdclk.hw.vco == 0 || + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) + goto sanitize; + + /* DPLL okay; verify the cdclock + * + * Noticed in some instances that the freq selection is correct but + * decimal part is programmed wrong from BIOS where pre-os does not + * enable display. Verify the same as well. + */ + cdctl = I915_READ(CDCLK_CTL); + expected = (cdctl & CDCLK_FREQ_SEL_MASK) | + skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; + +sanitize: + DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + + /* force cdclk programming */ + dev_priv->cdclk.hw.cdclk = 0; + /* force full PLL disable + enable */ + dev_priv->cdclk.hw.vco = -1; +} + +/** + * skl_init_cdclk - Initialize CDCLK on SKL + * @dev_priv: i915 device + * + * Initialize CDCLK for SKL and derivatives. This is generally + * done only during the display core initialization sequence, + * after which the DMC will take care of turning CDCLK off/on + * as needed. + */ +void skl_init_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state; + + skl_sanitize_cdclk(dev_priv); + + if (dev_priv->cdclk.hw.cdclk != 0 && + dev_priv->cdclk.hw.vco != 0) { + /* + * Use the current vco as our initial + * guess as to what the preferred vco is. + */ + if (dev_priv->skl_preferred_vco_freq == 0) + skl_set_preferred_cdclk_vco(dev_priv, + dev_priv->cdclk.hw.vco); + return; + } + + cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.vco = dev_priv->skl_preferred_vco_freq; + if (cdclk_state.vco == 0) + cdclk_state.vco = 8100000; + cdclk_state.cdclk = skl_calc_cdclk(0, cdclk_state.vco); + + skl_set_cdclk(dev_priv, &cdclk_state); +} + +/** + * skl_uninit_cdclk - Uninitialize CDCLK on SKL + * @dev_priv: i915 device + * + * Uninitialize CDCLK for SKL and derivatives. This is done only + * during the display core uninitialization sequence. + */ +void skl_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.vco = 0; + + skl_set_cdclk(dev_priv, &cdclk_state); +} + +static int bxt_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 576000) + return 624000; + else if (max_pixclk > 384000) + return 576000; + else if (max_pixclk > 288000) + return 384000; + else if (max_pixclk > 144000) + return 288000; + else + return 144000; +} + +static int glk_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 2 * 158400) + return 316800; + else if (max_pixclk > 2 * 79200) + return 158400; + else + return 79200; +} + +static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +{ + int ratio; + + if (cdclk == dev_priv->cdclk.hw.ref) + return 0; + + switch (cdclk) { + default: + MISSING_CASE(cdclk); + case 144000: + case 288000: + case 384000: + case 576000: + ratio = 60; + break; + case 624000: + ratio = 65; + break; + } + + return dev_priv->cdclk.hw.ref * ratio; +} + +static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +{ + int ratio; + + if (cdclk == dev_priv->cdclk.hw.ref) + return 0; + + switch (cdclk) { + default: + MISSING_CASE(cdclk); + case 79200: + case 158400: + case 316800: + ratio = 33; + break; + } + + return dev_priv->cdclk.hw.ref * ratio; +} + +static void bxt_de_pll_update(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 val; + + cdclk_state->ref = 19200; + cdclk_state->vco = 0; + + val = I915_READ(BXT_DE_PLL_ENABLE); + if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) + return; + + if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) + return; + + val = I915_READ(BXT_DE_PLL_CTL); + cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; +} + +static void bxt_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 divider; + int div; + + bxt_de_pll_update(dev_priv, cdclk_state); + + cdclk_state->cdclk = cdclk_state->ref; + + if (cdclk_state->vco == 0) + return; + + divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; + + switch (divider) { + case BXT_CDCLK_CD2X_DIV_SEL_1: + div = 2; + break; + case BXT_CDCLK_CD2X_DIV_SEL_1_5: + WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + div = 3; + break; + case BXT_CDCLK_CD2X_DIV_SEL_2: + div = 4; + break; + case BXT_CDCLK_CD2X_DIV_SEL_4: + div = 8; + break; + default: + MISSING_CASE(divider); + return; + } + + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); +} + +static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(BXT_DE_PLL_ENABLE, 0); + + /* Timeout 200us */ + if (intel_wait_for_register(dev_priv, + BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 0, + 1)) + DRM_ERROR("timeout waiting for DE PLL unlock\n"); + + dev_priv->cdclk.hw.vco = 0; +} + +static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) +{ + int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); + u32 val; + + val = I915_READ(BXT_DE_PLL_CTL); + val &= ~BXT_DE_PLL_RATIO_MASK; + val |= BXT_DE_PLL_RATIO(ratio); + I915_WRITE(BXT_DE_PLL_CTL, val); + + I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); + + /* Timeout 200us */ + if (intel_wait_for_register(dev_priv, + BXT_DE_PLL_ENABLE, + BXT_DE_PLL_LOCK, + BXT_DE_PLL_LOCK, + 1)) + DRM_ERROR("timeout waiting for DE PLL lock\n"); + + dev_priv->cdclk.hw.vco = vco; +} + +static void bxt_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + int vco = cdclk_state->vco; + u32 val, divider; + int ret; + + /* cdclk = vco / 2 / div{1,1.5,2,4} */ + switch (DIV_ROUND_CLOSEST(vco, cdclk)) { + case 8: + divider = BXT_CDCLK_CD2X_DIV_SEL_4; + break; + case 4: + divider = BXT_CDCLK_CD2X_DIV_SEL_2; + break; + case 3: + WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; + break; + case 2: + divider = BXT_CDCLK_CD2X_DIV_SEL_1; + break; + default: + WARN_ON(cdclk != dev_priv->cdclk.hw.ref); + WARN_ON(vco != 0); + + divider = BXT_CDCLK_CD2X_DIV_SEL_1; + break; + } + + /* Inform power controller of upcoming frequency change */ + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + 0x80000000); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret) { + DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", + ret, cdclk); + return; + } + + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + bxt_de_pll_disable(dev_priv); + + if (dev_priv->cdclk.hw.vco != vco) + bxt_de_pll_enable(dev_priv, vco); + + val = divider | skl_cdclk_decimal(cdclk); + /* + * FIXME if only the cd2x divider needs changing, it could be done + * without shutting off the pipe (if only one pipe is active). + */ + val |= BXT_CDCLK_CD2X_PIPE_NONE; + /* + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. + */ + if (cdclk >= 500000) + val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; + I915_WRITE(CDCLK_CTL, val); + + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + DIV_ROUND_UP(cdclk, 25000)); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret) { + DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", + ret, cdclk); + return; + } + + intel_update_cdclk(dev_priv); +} + +static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) +{ + u32 cdctl, expected; + + intel_update_cdclk(dev_priv); + + if (dev_priv->cdclk.hw.vco == 0 || + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) + goto sanitize; + + /* DPLL okay; verify the cdclock + * + * Some BIOS versions leave an incorrect decimal frequency value and + * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, + * so sanitize this register. + */ + cdctl = I915_READ(CDCLK_CTL); + /* + * Let's ignore the pipe field, since BIOS could have configured the + * dividers both synching to an active pipe, or asynchronously + * (PIPE_NONE). + */ + cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; + + expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | + skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); + /* + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. + */ + if (dev_priv->cdclk.hw.cdclk >= 500000) + expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; + + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; + +sanitize: + DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + + /* force cdclk programming */ + dev_priv->cdclk.hw.cdclk = 0; + + /* force full PLL disable + enable */ + dev_priv->cdclk.hw.vco = -1; +} + +/** + * bxt_init_cdclk - Initialize CDCLK on BXT + * @dev_priv: i915 device + * + * Initialize CDCLK for BXT and derivatives. This is generally + * done only during the display core initialization sequence, + * after which the DMC will take care of turning CDCLK off/on + * as needed. + */ +void bxt_init_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state; + + bxt_sanitize_cdclk(dev_priv); + + if (dev_priv->cdclk.hw.cdclk != 0 && + dev_priv->cdclk.hw.vco != 0) + return; + + cdclk_state = dev_priv->cdclk.hw; + + /* + * FIXME: + * - The initial CDCLK needs to be read from VBT. + * Need to make this change after VBT has changes for BXT. + */ + if (IS_GEMINILAKE(dev_priv)) { + cdclk_state.cdclk = glk_calc_cdclk(0); + cdclk_state.vco = glk_de_pll_vco(dev_priv, cdclk_state.cdclk); + } else { + cdclk_state.cdclk = bxt_calc_cdclk(0); + cdclk_state.vco = bxt_de_pll_vco(dev_priv, cdclk_state.cdclk); + } + + bxt_set_cdclk(dev_priv, &cdclk_state); +} + +/** + * bxt_uninit_cdclk - Uninitialize CDCLK on BXT + * @dev_priv: i915 device + * + * Uninitialize CDCLK for BXT and derivatives. This is done only + * during the display core uninitialization sequence. + */ +void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.vco = 0; + + bxt_set_cdclk(dev_priv, &cdclk_state); +} + +/** + * intel_cdclk_state_compare - Determine if two CDCLK states differ + * @a: first CDCLK state + * @b: second CDCLK state + * + * Returns: + * True if the CDCLK states are identical, false if they differ. + */ +bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) +{ + return memcmp(a, b, sizeof(*a)) == 0; +} + +/** + * intel_set_cdclk - Push the CDCLK state to the hardware + * @dev_priv: i915 device + * @cdclk_state: new CDCLK state + * + * Program the hardware based on the passed in CDCLK state, + * if necessary. + */ +void intel_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + if (intel_cdclk_state_compare(&dev_priv->cdclk.hw, cdclk_state)) + return; + + if (WARN_ON_ONCE(!dev_priv->display.set_cdclk)) + return; + + DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz, VCO %d kHz, ref %d kHz\n", + cdclk_state->cdclk, cdclk_state->vco, + cdclk_state->ref); + + dev_priv->display.set_cdclk(dev_priv, cdclk_state); +} + +static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, + int pixel_rate) +{ + struct drm_i915_private *dev_priv = + to_i915(crtc_state->base.crtc->dev); + + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) + pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); + + /* BSpec says "Do not use DisplayPort with CDCLK less than 432 MHz, + * audio enabled, port width x4, and link rate HBR2 (5.4 GHz), or else + * there may be audio corruption or screen corruption." This cdclk + * restriction for GLK is 316.8 MHz and since GLK can output two + * pixels per clock, the pixel rate becomes 2 * 316.8 MHz. + */ + if (intel_crtc_has_dp_encoder(crtc_state) && + crtc_state->has_audio && + crtc_state->port_clock >= 540000 && + crtc_state->lane_count == 4) { + if (IS_GEMINILAKE(dev_priv)) + pixel_rate = max(2 * 316800, pixel_rate); + else + pixel_rate = max(432000, pixel_rate); + } + + /* According to BSpec, "The CD clock frequency must be at least twice + * the frequency of the Azalia BCLK." and BCLK is 96 MHz by default. + * The check for GLK has to be adjusted as the platform can output + * two pixels per clock. + */ + if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) { + if (IS_GEMINILAKE(dev_priv)) + pixel_rate = max(2 * 2 * 96000, pixel_rate); + else + pixel_rate = max(2 * 96000, pixel_rate); + } + + return pixel_rate; +} + +/* compute the max rate for new configuration */ +static int intel_max_pixel_rate(struct drm_atomic_state *state) +{ + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(state->dev); + struct drm_crtc *crtc; + struct drm_crtc_state *cstate; + struct intel_crtc_state *crtc_state; + unsigned int max_pixel_rate = 0, i; + enum pipe pipe; + + memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, + sizeof(intel_state->min_pixclk)); + + for_each_new_crtc_in_state(state, crtc, cstate, i) { + int pixel_rate; + + crtc_state = to_intel_crtc_state(cstate); + if (!crtc_state->base.enable) { + intel_state->min_pixclk[i] = 0; + continue; + } + + pixel_rate = crtc_state->pixel_rate; + + if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) + pixel_rate = + bdw_adjust_min_pipe_pixel_rate(crtc_state, + pixel_rate); + + intel_state->min_pixclk[i] = pixel_rate; + } + + for_each_pipe(dev_priv, pipe) + max_pixel_rate = max(intel_state->min_pixclk[pipe], + max_pixel_rate); + + return max_pixel_rate; +} + +static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + int max_pixclk = intel_max_pixel_rate(state); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(state); + int cdclk; + + cdclk = vlv_calc_cdclk(dev_priv, max_pixclk); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = vlv_calc_cdclk(dev_priv, 0); + + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + +static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + int max_pixclk = intel_max_pixel_rate(state); + int cdclk; + + /* + * FIXME should also account for plane ratio + * once 64bpp pixel formats are supported. + */ + cdclk = bdw_calc_cdclk(max_pixclk); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = bdw_calc_cdclk(0); + + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + +static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(state->dev); + const int max_pixclk = intel_max_pixel_rate(state); + int cdclk, vco; + + vco = intel_state->cdclk.logical.vco; + if (!vco) + vco = dev_priv->skl_preferred_vco_freq; + + /* + * FIXME should also account for plane ratio + * once 64bpp pixel formats are supported. + */ + cdclk = skl_calc_cdclk(max_pixclk, vco); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.vco = vco; + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = skl_calc_cdclk(0, vco); + + intel_state->cdclk.actual.vco = vco; + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + +static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + int max_pixclk = intel_max_pixel_rate(state); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(state); + int cdclk, vco; + + if (IS_GEMINILAKE(dev_priv)) { + cdclk = glk_calc_cdclk(max_pixclk); + vco = glk_de_pll_vco(dev_priv, cdclk); + } else { + cdclk = bxt_calc_cdclk(max_pixclk); + vco = bxt_de_pll_vco(dev_priv, cdclk); + } + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.vco = vco; + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + if (IS_GEMINILAKE(dev_priv)) { + cdclk = glk_calc_cdclk(0); + vco = glk_de_pll_vco(dev_priv, cdclk); + } else { + cdclk = bxt_calc_cdclk(0); + vco = bxt_de_pll_vco(dev_priv, cdclk); + } + + intel_state->cdclk.actual.vco = vco; + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + +static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) +{ + int max_cdclk_freq = dev_priv->max_cdclk_freq; + + if (IS_GEMINILAKE(dev_priv)) + return 2 * max_cdclk_freq; + else if (INTEL_INFO(dev_priv)->gen >= 9 || + IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + return max_cdclk_freq; + else if (IS_CHERRYVIEW(dev_priv)) + return max_cdclk_freq*95/100; + else if (INTEL_INFO(dev_priv)->gen < 4) + return 2*max_cdclk_freq*90/100; + else + return max_cdclk_freq*90/100; +} + +/** + * intel_update_max_cdclk - Determine the maximum support CDCLK frequency + * @dev_priv: i915 device + * + * Determine the maximum CDCLK frequency the platform supports, and also + * derive the maximum dot clock frequency the maximum CDCLK frequency + * allows. + */ +void intel_update_max_cdclk(struct drm_i915_private *dev_priv) +{ + if (IS_GEN9_BC(dev_priv)) { + u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; + int max_cdclk, vco; + + vco = dev_priv->skl_preferred_vco_freq; + WARN_ON(vco != 8100000 && vco != 8640000); + + /* + * Use the lower (vco 8640) cdclk values as a + * first guess. skl_calc_cdclk() will correct it + * if the preferred vco is 8100 instead. + */ + if (limit == SKL_DFSM_CDCLK_LIMIT_675) + max_cdclk = 617143; + else if (limit == SKL_DFSM_CDCLK_LIMIT_540) + max_cdclk = 540000; + else if (limit == SKL_DFSM_CDCLK_LIMIT_450) + max_cdclk = 432000; + else + max_cdclk = 308571; + + dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco); + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->max_cdclk_freq = 316800; + } else if (IS_BROXTON(dev_priv)) { + dev_priv->max_cdclk_freq = 624000; + } else if (IS_BROADWELL(dev_priv)) { + /* + * FIXME with extra cooling we can allow + * 540 MHz for ULX and 675 Mhz for ULT. + * How can we know if extra cooling is + * available? PCI ID, VTB, something else? + */ + if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + dev_priv->max_cdclk_freq = 450000; + else if (IS_BDW_ULX(dev_priv)) + dev_priv->max_cdclk_freq = 450000; + else if (IS_BDW_ULT(dev_priv)) + dev_priv->max_cdclk_freq = 540000; + else + dev_priv->max_cdclk_freq = 675000; + } else if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->max_cdclk_freq = 320000; + } else if (IS_VALLEYVIEW(dev_priv)) { + dev_priv->max_cdclk_freq = 400000; + } else { + /* otherwise assume cdclk is fixed */ + dev_priv->max_cdclk_freq = dev_priv->cdclk.hw.cdclk; + } + + dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); + + DRM_DEBUG_DRIVER("Max CD clock rate: %d kHz\n", + dev_priv->max_cdclk_freq); + + DRM_DEBUG_DRIVER("Max dotclock rate: %d kHz\n", + dev_priv->max_dotclk_freq); +} + +/** + * intel_update_cdclk - Determine the current CDCLK frequency + * @dev_priv: i915 device + * + * Determine the current CDCLK frequency. + */ +void intel_update_cdclk(struct drm_i915_private *dev_priv) +{ + dev_priv->display.get_cdclk(dev_priv, &dev_priv->cdclk.hw); + + DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", + dev_priv->cdclk.hw.cdclk, dev_priv->cdclk.hw.vco, + dev_priv->cdclk.hw.ref); + + /* + * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): + * Programmng [sic] note: bit[9:2] should be programmed to the number + * of cdclk that generates 4MHz reference clock freq which is used to + * generate GMBus clock. This will vary with the cdclk freq. + */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + I915_WRITE(GMBUSFREQ_VLV, + DIV_ROUND_UP(dev_priv->cdclk.hw.cdclk, 1000)); +} + +static int pch_rawclk(struct drm_i915_private *dev_priv) +{ + return (I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; +} + +static int vlv_hrawclk(struct drm_i915_private *dev_priv) +{ + /* RAWCLK_FREQ_VLV register updated from power well code */ + return vlv_get_cck_clock_hpll(dev_priv, "hrawclk", + CCK_DISPLAY_REF_CLOCK_CONTROL); +} + +static int g4x_hrawclk(struct drm_i915_private *dev_priv) +{ + uint32_t clkcfg; + + /* hrawclock is 1/4 the FSB frequency */ + clkcfg = I915_READ(CLKCFG); + switch (clkcfg & CLKCFG_FSB_MASK) { + case CLKCFG_FSB_400: + return 100000; + case CLKCFG_FSB_533: + return 133333; + case CLKCFG_FSB_667: + return 166667; + case CLKCFG_FSB_800: + return 200000; + case CLKCFG_FSB_1067: + return 266667; + case CLKCFG_FSB_1333: + return 333333; + /* these two are just a guess; one of them might be right */ + case CLKCFG_FSB_1600: + case CLKCFG_FSB_1600_ALT: + return 400000; + default: + return 133333; + } +} + +/** + * intel_update_rawclk - Determine the current RAWCLK frequency + * @dev_priv: i915 device + * + * Determine the current RAWCLK frequency. RAWCLK is a fixed + * frequency clock so this needs to done only once. + */ +void intel_update_rawclk(struct drm_i915_private *dev_priv) +{ + if (HAS_PCH_SPLIT(dev_priv)) + dev_priv->rawclk_freq = pch_rawclk(dev_priv); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->rawclk_freq = vlv_hrawclk(dev_priv); + else if (IS_G4X(dev_priv) || IS_PINEVIEW(dev_priv)) + dev_priv->rawclk_freq = g4x_hrawclk(dev_priv); + else + /* no rawclk on other platforms, or no need to know it */ + return; + + DRM_DEBUG_DRIVER("rawclk rate: %d kHz\n", dev_priv->rawclk_freq); +} + +/** + * intel_init_cdclk_hooks - Initialize CDCLK related modesetting hooks + * @dev_priv: i915 device + */ +void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) +{ + if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->display.set_cdclk = chv_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + vlv_modeset_calc_cdclk; + } else if (IS_VALLEYVIEW(dev_priv)) { + dev_priv->display.set_cdclk = vlv_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + vlv_modeset_calc_cdclk; + } else if (IS_BROADWELL(dev_priv)) { + dev_priv->display.set_cdclk = bdw_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + bdw_modeset_calc_cdclk; + } else if (IS_GEN9_LP(dev_priv)) { + dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + bxt_modeset_calc_cdclk; + } else if (IS_GEN9_BC(dev_priv)) { + dev_priv->display.set_cdclk = skl_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + skl_modeset_calc_cdclk; + } + + if (IS_GEN9_BC(dev_priv)) + dev_priv->display.get_cdclk = skl_get_cdclk; + else if (IS_GEN9_LP(dev_priv)) + dev_priv->display.get_cdclk = bxt_get_cdclk; + else if (IS_BROADWELL(dev_priv)) + dev_priv->display.get_cdclk = bdw_get_cdclk; + else if (IS_HASWELL(dev_priv)) + dev_priv->display.get_cdclk = hsw_get_cdclk; + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->display.get_cdclk = vlv_get_cdclk; + else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_GEN5(dev_priv)) + dev_priv->display.get_cdclk = fixed_450mhz_get_cdclk; + else if (IS_GM45(dev_priv)) + dev_priv->display.get_cdclk = gm45_get_cdclk; + else if (IS_G45(dev_priv)) + dev_priv->display.get_cdclk = g33_get_cdclk; + else if (IS_I965GM(dev_priv)) + dev_priv->display.get_cdclk = i965gm_get_cdclk; + else if (IS_I965G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_PINEVIEW(dev_priv)) + dev_priv->display.get_cdclk = pnv_get_cdclk; + else if (IS_G33(dev_priv)) + dev_priv->display.get_cdclk = g33_get_cdclk; + else if (IS_I945GM(dev_priv)) + dev_priv->display.get_cdclk = i945gm_get_cdclk; + else if (IS_I945G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_I915GM(dev_priv)) + dev_priv->display.get_cdclk = i915gm_get_cdclk; + else if (IS_I915G(dev_priv)) + dev_priv->display.get_cdclk = fixed_333mhz_get_cdclk; + else if (IS_I865G(dev_priv)) + dev_priv->display.get_cdclk = fixed_266mhz_get_cdclk; + else if (IS_I85X(dev_priv)) + dev_priv->display.get_cdclk = i85x_get_cdclk; + else if (IS_I845G(dev_priv)) + dev_priv->display.get_cdclk = fixed_200mhz_get_cdclk; + else { /* 830 */ + WARN(!IS_I830(dev_priv), + "Unknown platform. Assuming 133 MHz CDCLK\n"); + dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; + } +} diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index d81232b79f00..306c6b06b330 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -340,20 +340,12 @@ static void haswell_load_luts(struct drm_crtc_state *crtc_state) hsw_enable_ips(intel_crtc); } -/* Loads the palette/gamma unit for the CRTC on Broadwell+. */ -static void broadwell_load_luts(struct drm_crtc_state *state) +static void bdw_load_degamma_lut(struct drm_crtc_state *state) { - struct drm_crtc *crtc = state->crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc_state *intel_state = to_intel_crtc_state(state); - enum pipe pipe = to_intel_crtc(crtc)->pipe; + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; - if (crtc_state_is_legacy(state)) { - haswell_load_luts(state); - return; - } - I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT); @@ -377,6 +369,20 @@ static void broadwell_load_luts(struct drm_crtc_state *state) (v << 20) | (v << 10) | v); } } +} + +static void bdw_load_gamma_lut(struct drm_crtc_state *state, u32 offset) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + + WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK); + + I915_WRITE(PREC_PAL_INDEX(pipe), + (offset ? PAL_PREC_SPLIT_MODE : 0) | + PAL_PREC_AUTO_INCREMENT | + offset); if (state->gamma_lut) { struct drm_color_lut *lut = @@ -410,6 +416,23 @@ static void broadwell_load_luts(struct drm_crtc_state *state) I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16) - 1); I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16) - 1); } +} + +/* Loads the palette/gamma unit for the CRTC on Broadwell+. */ +static void broadwell_load_luts(struct drm_crtc_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + struct intel_crtc_state *intel_state = to_intel_crtc_state(state); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + + if (crtc_state_is_legacy(state)) { + haswell_load_luts(state); + return; + } + + bdw_load_degamma_lut(state); + bdw_load_gamma_lut(state, + INTEL_INFO(dev_priv)->color.degamma_lut_size); intel_state->gamma_mode = GAMMA_MODE_MODE_SPLIT; I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_SPLIT); @@ -422,6 +445,58 @@ static void broadwell_load_luts(struct drm_crtc_state *state) I915_WRITE(PREC_PAL_INDEX(pipe), 0); } +static void glk_load_degamma_lut(struct drm_crtc_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + const uint32_t lut_size = 33; + uint32_t i; + + /* + * When setting the auto-increment bit, the hardware seems to + * ignore the index bits, so we need to reset it to index 0 + * separately. + */ + I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), 0); + I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), PRE_CSC_GAMC_AUTO_INCREMENT); + + /* + * FIXME: The pipe degamma table in geminilake doesn't support + * different values per channel, so this just loads a linear table. + */ + for (i = 0; i < lut_size; i++) { + uint32_t v = (i * (1 << 16)) / (lut_size - 1); + + I915_WRITE(PRE_CSC_GAMC_DATA(pipe), v); + } + + /* Clamp values > 1.0. */ + while (i++ < 35) + I915_WRITE(PRE_CSC_GAMC_DATA(pipe), (1 << 16)); +} + +static void glk_load_luts(struct drm_crtc_state *state) +{ + struct drm_crtc *crtc = state->crtc; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc_state *intel_state = to_intel_crtc_state(state); + enum pipe pipe = to_intel_crtc(crtc)->pipe; + + glk_load_degamma_lut(state); + + if (crtc_state_is_legacy(state)) { + haswell_load_luts(state); + return; + } + + bdw_load_gamma_lut(state, 0); + + intel_state->gamma_mode = GAMMA_MODE_MODE_10BIT; + I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_10BIT); + POSTING_READ(GAMMA_MODE(pipe)); +} + /* Loads the palette/gamma unit for the CRTC on CherryView. */ static void cherryview_load_luts(struct drm_crtc_state *state) { @@ -536,10 +611,13 @@ void intel_color_init(struct drm_crtc *crtc) } else if (IS_HASWELL(dev_priv)) { dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_luts = haswell_load_luts; - } else if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv) || - IS_BROXTON(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || + IS_BROXTON(dev_priv)) { dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_luts = broadwell_load_luts; + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; + dev_priv->display.load_luts = glk_load_luts; } else { dev_priv->display.load_luts = i9xx_load_luts; } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 2bf5aca6e37c..2797bf37c3ac 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -69,12 +69,11 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_encoder_to_crt(encoder); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -91,7 +90,7 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -670,16 +669,16 @@ static const struct dmi_system_id intel_spurious_crt_detect[] = { { } }; -static enum drm_connector_status -intel_crt_detect(struct drm_connector *connector, bool force) +static int +intel_crt_detect(struct drm_connector *connector, + struct drm_modeset_acquire_ctx *ctx, + bool force) { struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; - enum intel_display_power_domain power_domain; - enum drm_connector_status status; + int status, ret; struct intel_load_detect_pipe tmp; - struct drm_modeset_acquire_ctx ctx; DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force=%d\n", connector->base.id, connector->name, @@ -689,8 +688,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) if (dmi_check_system(intel_spurious_crt_detect)) return connector_status_disconnected; - power_domain = intel_display_port_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_encoder->power_domain); if (I915_HAS_HOTPLUG(dev_priv)) { /* We can not rely on the HPD pin always being correctly wired @@ -724,10 +722,9 @@ intel_crt_detect(struct drm_connector *connector, bool force) goto out; } - drm_modeset_acquire_init(&ctx, 0); - /* for pre-945g platforms use load detect */ - if (intel_get_load_detect_pipe(connector, NULL, &tmp, &ctx)) { + ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx); + if (ret > 0) { if (intel_crt_detect_ddc(connector)) status = connector_status_connected; else if (INTEL_GEN(dev_priv) < 4) @@ -737,15 +734,14 @@ intel_crt_detect(struct drm_connector *connector, bool force) status = connector_status_disconnected; else status = connector_status_unknown; - intel_release_load_detect_pipe(connector, &tmp, &ctx); - } else + intel_release_load_detect_pipe(connector, &tmp, ctx); + } else if (ret == 0) status = connector_status_unknown; - - drm_modeset_drop_locks(&ctx); - drm_modeset_acquire_fini(&ctx); + else if (ret < 0) + status = ret; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return status; } @@ -761,12 +757,10 @@ static int intel_crt_get_modes(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; - enum intel_display_power_domain power_domain; int ret; struct i2c_adapter *i2c; - power_domain = intel_display_port_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_encoder->power_domain); i2c = intel_gmbus_get_adapter(dev_priv, dev_priv->vbt.crt_ddc_pin); ret = intel_crt_ddc_get_modes(connector, i2c); @@ -778,7 +772,7 @@ static int intel_crt_get_modes(struct drm_connector *connector) ret = intel_crt_ddc_get_modes(connector, i2c); out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return ret; } @@ -816,7 +810,6 @@ void intel_crt_reset(struct drm_encoder *encoder) static const struct drm_connector_funcs intel_crt_connector_funcs = { .dpms = drm_atomic_helper_connector_dpms, - .detect = intel_crt_detect, .fill_modes = drm_helper_probe_single_connector_modes, .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, @@ -828,6 +821,7 @@ static const struct drm_connector_funcs intel_crt_connector_funcs = { }; static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs = { + .detect_ctx = intel_crt_detect, .mode_valid = intel_crt_mode_valid, .get_modes = intel_crt_get_modes, }; @@ -904,6 +898,8 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->adpa_reg = adpa_reg; + crt->base.power_domain = POWER_DOMAIN_PORT_CRT; + crt->base.compute_config = intel_crt_compute_config; if (HAS_PCH_SPLIT(dev_priv)) { crt->base.disable = pch_disable_crt; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index de219b71fb76..1575bde0cf90 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -34,8 +34,8 @@ * low-power state and comes back to normal. */ -#define I915_CSR_GLK "i915/glk_dmc_ver1_01.bin" -#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) +#define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin" +#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" MODULE_FIRMWARE(I915_CSR_KBL); @@ -49,7 +49,7 @@ MODULE_FIRMWARE(I915_CSR_SKL); MODULE_FIRMWARE(I915_CSR_BXT); #define BXT_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) -#define FIRMWARE_URL "https://01.org/linuxgraphics/intel-linux-graphics-firmwares" +#define FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" @@ -395,13 +395,11 @@ static void csr_load_work_fn(struct work_struct *work) struct drm_i915_private *dev_priv; struct intel_csr *csr; const struct firmware *fw = NULL; - int ret; dev_priv = container_of(work, typeof(*dev_priv), csr.work); csr = &dev_priv->csr; - ret = request_firmware(&fw, dev_priv->csr.fw_path, - &dev_priv->drm.pdev->dev); + request_firmware(&fw, dev_priv->csr.fw_path, &dev_priv->drm.pdev->dev); if (fw) dev_priv->csr.dmc_payload = parse_csr_fw(dev_priv, fw); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 66b367d0771a..0914ad96a71b 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -34,6 +34,19 @@ struct ddi_buf_trans { u8 i_boost; /* SKL: I_boost; valid: 0x0, 0x1, 0x3, 0x7 */ }; +static const u8 index_to_dp_signal_levels[] = { + [0] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [1] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [2] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_2, + [3] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_3, + [4] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [5] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [6] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_2, + [7] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [8] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [9] = DP_TRAIN_VOLTAGE_SWING_LEVEL_3 | DP_TRAIN_PRE_EMPH_LEVEL_0, +}; + /* HDMI/DVI modes ignore everything but the last 2 items. So we share * them for both DP and FDI transports, allowing those ports to * automatically adapt to HDMI connections as well @@ -445,7 +458,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por if (IS_GEN9_LP(dev_priv)) return hdmi_level; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); hdmi_default_entry = 8; } else if (IS_BROADWELL(dev_priv)) { @@ -468,85 +481,105 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por return hdmi_level; } +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_KABYLAKE(dev_priv)) { + return kbl_get_buf_trans_dp(dev_priv, n_entries); + } else if (IS_SKYLAKE(dev_priv)) { + return skl_get_buf_trans_dp(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + return bdw_ddi_translations_dp; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_edp(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv)) { + return skl_get_buf_trans_edp(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + return bdw_get_buf_trans_edp(dev_priv, n_entries); + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); + return hsw_ddi_translations_fdi; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); + return hsw_ddi_translations_fdi; + } + + *n_entries = 0; + return NULL; +} + /* * Starting with Haswell, DDI port buffers must be programmed with correct * values in advance. This function programs the correct values for * DP/eDP/FDI use cases. */ -void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) +static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int i, n_dp_entries, n_edp_entries, size; + int i, n_entries; enum port port = intel_ddi_get_encoder_port(encoder); - const struct ddi_buf_trans *ddi_translations_fdi; - const struct ddi_buf_trans *ddi_translations_dp; - const struct ddi_buf_trans *ddi_translations_edp; const struct ddi_buf_trans *ddi_translations; if (IS_GEN9_LP(dev_priv)) return; - if (IS_KABYLAKE(dev_priv)) { - ddi_translations_fdi = NULL; - ddi_translations_dp = - kbl_get_buf_trans_dp(dev_priv, &n_dp_entries); - ddi_translations_edp = - skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - } else if (IS_SKYLAKE(dev_priv)) { - ddi_translations_fdi = NULL; - ddi_translations_dp = - skl_get_buf_trans_dp(dev_priv, &n_dp_entries); - ddi_translations_edp = - skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - } else if (IS_BROADWELL(dev_priv)) { - ddi_translations_fdi = bdw_ddi_translations_fdi; - ddi_translations_dp = bdw_ddi_translations_dp; - ddi_translations_edp = bdw_get_buf_trans_edp(dev_priv, &n_edp_entries); - n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - } else if (IS_HASWELL(dev_priv)) { - ddi_translations_fdi = hsw_ddi_translations_fdi; - ddi_translations_dp = hsw_ddi_translations_dp; - ddi_translations_edp = hsw_ddi_translations_dp; - n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - } else { - WARN(1, "ddi translation table missing\n"); - ddi_translations_edp = bdw_ddi_translations_dp; - ddi_translations_fdi = bdw_ddi_translations_fdi; - ddi_translations_dp = bdw_ddi_translations_dp; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); - n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - } - - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) - iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; - - if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && - port != PORT_A && port != PORT_E && - n_edp_entries > 9)) - n_edp_entries = 9; - } - switch (encoder->type) { case INTEL_OUTPUT_EDP: - ddi_translations = ddi_translations_edp; - size = n_edp_entries; + ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, + &n_entries); break; case INTEL_OUTPUT_DP: - ddi_translations = ddi_translations_dp; - size = n_dp_entries; + ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, + &n_entries); break; case INTEL_OUTPUT_ANALOG: - ddi_translations = ddi_translations_fdi; - size = n_dp_entries; + ddi_translations = intel_ddi_get_buf_trans_fdi(dev_priv, + &n_entries); break; default: - BUG(); + MISSING_CASE(encoder->type); + return; } - for (i = 0; i < size; i++) { + if (IS_GEN9_BC(dev_priv)) { + /* If we're boosting the current, set bit 31 of trans1 */ + if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; + + if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && + port != PORT_A && port != PORT_E && + n_entries > 9)) + n_entries = 9; + } + + for (i = 0; i < n_entries; i++) { I915_WRITE(DDI_BUF_TRANS_LO(port, i), ddi_translations[i].trans1 | iboost_bit); I915_WRITE(DDI_BUF_TRANS_HI(port, i), @@ -572,7 +605,7 @@ static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder) hdmi_level = intel_ddi_hdmi_level(dev_priv, port); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); /* If we're boosting the current, set bit 31 of trans1 */ @@ -641,15 +674,15 @@ static uint32_t hsw_pll_to_ddi_pll_sel(struct intel_shared_dpll *pll) * DDI A (which is used for eDP) */ -void hsw_fdi_link_train(struct drm_crtc *crtc) +void hsw_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; u32 temp, i, rx_ctl_val, ddi_pll_sel; - for_each_encoder_on_crtc(dev, crtc, encoder) { + for_each_encoder_on_crtc(dev, &crtc->base, encoder) { WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG); intel_prepare_dp_ddi_buffers(encoder); } @@ -668,7 +701,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) /* Enable the PCH Receiver FDI PLL */ rx_ctl_val = dev_priv->fdi_rx_config | FDI_RX_ENHANCE_FRAME_ENABLE | FDI_RX_PLL_ENABLE | - FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); POSTING_READ(FDI_RX_CTL(PIPE_A)); udelay(220); @@ -678,7 +711,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); /* Configure Port Clock Select */ - ddi_pll_sel = hsw_pll_to_ddi_pll_sel(intel_crtc->config->shared_dpll); + ddi_pll_sel = hsw_pll_to_ddi_pll_sel(crtc_state->shared_dpll); I915_WRITE(PORT_CLK_SEL(PORT_E), ddi_pll_sel); WARN_ON(ddi_pll_sel != PORT_CLK_SEL_SPLL); @@ -698,7 +731,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) * port reversal bit */ I915_WRITE(DDI_BUF_CTL(PORT_E), DDI_BUF_CTL_ENABLE | - ((intel_crtc->config->fdi_lanes - 1) << 1) | + ((crtc_state->fdi_lanes - 1) << 1) | DDI_BUF_TRANS_SELECT(i / 2)); POSTING_READ(DDI_BUF_CTL(PORT_E)); @@ -773,7 +806,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) DP_TP_CTL_ENABLE); } -void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) +static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_digital_port *intel_dig_port = @@ -785,26 +818,26 @@ void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) } static struct intel_encoder * -intel_ddi_get_crtc_encoder(struct drm_crtc *crtc) +intel_ddi_get_crtc_encoder(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_encoder *intel_encoder, *ret = NULL; + struct drm_device *dev = crtc->base.dev; + struct intel_encoder *encoder, *ret = NULL; int num_encoders = 0; - for_each_encoder_on_crtc(dev, crtc, intel_encoder) { - ret = intel_encoder; + for_each_encoder_on_crtc(dev, &crtc->base, encoder) { + ret = encoder; num_encoders++; } if (num_encoders != 1) WARN(1, "%d encoders on crtc for pipe %c\n", num_encoders, - pipe_name(intel_crtc->pipe)); + pipe_name(crtc->pipe)); BUG_ON(ret == NULL); return ret; } +/* Finds the only possible encoder associated with the given CRTC. */ struct intel_encoder * intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state) { @@ -818,7 +851,7 @@ intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state) state = crtc_state->base.state; - for_each_connector_in_state(state, connector, connector_state, i) { + for_each_new_connector_in_state(state, connector, connector_state, i) { if (connector_state->crtc != crtc_state->base.crtc) continue; @@ -1089,92 +1122,26 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, if (INTEL_GEN(dev_priv) <= 8) hsw_ddi_clock_get(encoder, pipe_config); - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) skl_ddi_clock_get(encoder, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_clock_get(encoder, pipe_config); } -static bool -hsw_ddi_pll_select(struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state, - struct intel_encoder *intel_encoder) -{ - struct intel_shared_dpll *pll; - - pll = intel_get_shared_dpll(intel_crtc, crtc_state, - intel_encoder); - if (!pll) - DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", - pipe_name(intel_crtc->pipe)); - - return pll; -} - -static bool -skl_ddi_pll_select(struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state, - struct intel_encoder *intel_encoder) +void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) { - struct intel_shared_dpll *pll; - - pll = intel_get_shared_dpll(intel_crtc, crtc_state, intel_encoder); - if (pll == NULL) { - DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", - pipe_name(intel_crtc->pipe)); - return false; - } - - return true; -} - -static bool -bxt_ddi_pll_select(struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state, - struct intel_encoder *intel_encoder) -{ - return !!intel_get_shared_dpll(intel_crtc, crtc_state, intel_encoder); -} - -/* - * Tries to find a *shared* PLL for the CRTC and store it in - * intel_crtc->ddi_pll_sel. - * - * For private DPLLs, compute_config() should do the selection for us. This - * function should be folded into compute_config() eventually. - */ -bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - struct intel_encoder *intel_encoder = - intel_ddi_get_crtc_new_encoder(crtc_state); - - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - return skl_ddi_pll_select(intel_crtc, crtc_state, - intel_encoder); - else if (IS_GEN9_LP(dev_priv)) - return bxt_ddi_pll_select(intel_crtc, crtc_state, - intel_encoder); - else - return hsw_ddi_pll_select(intel_crtc, crtc_state, - intel_encoder); -} - -void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; - int type = intel_encoder->type; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + int type = encoder->type; uint32_t temp; if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP || type == INTEL_OUTPUT_DP_MST) { WARN_ON(transcoder_is_dsi(cpu_transcoder)); temp = TRANS_MSA_SYNC_CLK; - switch (intel_crtc->config->pipe_bpp) { + switch (crtc_state->pipe_bpp) { case 18: temp |= TRANS_MSA_6_BPC; break; @@ -1194,12 +1161,12 @@ void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) } } -void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state) +void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, + bool state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; uint32_t temp; temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (state == true) @@ -1209,23 +1176,22 @@ void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state) I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); } -void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) +void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe = intel_crtc->pipe; - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; - enum port port = intel_ddi_get_encoder_port(intel_encoder); - int type = intel_encoder->type; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + enum port port = intel_ddi_get_encoder_port(encoder); + int type = encoder->type; uint32_t temp; /* Enable TRANS_DDI_FUNC_CTL for the pipe to work in HDMI mode */ temp = TRANS_DDI_FUNC_ENABLE; temp |= TRANS_DDI_SELECT_PORT(port); - switch (intel_crtc->config->pipe_bpp) { + switch (crtc_state->pipe_bpp) { case 18: temp |= TRANS_DDI_BPC_6; break; @@ -1242,9 +1208,9 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) BUG(); } - if (intel_crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) + if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) temp |= TRANS_DDI_PVSYNC; - if (intel_crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) + if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) temp |= TRANS_DDI_PHSYNC; if (cpu_transcoder == TRANSCODER_EDP) { @@ -1255,8 +1221,8 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) * using motion blur mitigation (which we don't * support). */ if (IS_HASWELL(dev_priv) && - (intel_crtc->config->pch_pfit.enabled || - intel_crtc->config->pch_pfit.force_thru)) + (crtc_state->pch_pfit.enabled || + crtc_state->pch_pfit.force_thru)) temp |= TRANS_DDI_EDP_INPUT_A_ONOFF; else temp |= TRANS_DDI_EDP_INPUT_A_ON; @@ -1274,23 +1240,28 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) } if (type == INTEL_OUTPUT_HDMI) { - if (intel_crtc->config->has_hdmi_sink) + if (crtc_state->has_hdmi_sink) temp |= TRANS_DDI_MODE_SELECT_HDMI; else temp |= TRANS_DDI_MODE_SELECT_DVI; + + if (crtc_state->hdmi_scrambling) + temp |= TRANS_DDI_HDMI_SCRAMBLING_MASK; + if (crtc_state->hdmi_high_tmds_clock_ratio) + temp |= TRANS_DDI_HIGH_TMDS_CHAR_RATE; } else if (type == INTEL_OUTPUT_ANALOG) { temp |= TRANS_DDI_MODE_SELECT_FDI; - temp |= (intel_crtc->config->fdi_lanes - 1) << 1; + temp |= (crtc_state->fdi_lanes - 1) << 1; } else if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { temp |= TRANS_DDI_MODE_SELECT_DP_SST; - temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); + temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } else if (type == INTEL_OUTPUT_DP_MST) { temp |= TRANS_DDI_MODE_SELECT_DP_MST; - temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); + temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } else { WARN(1, "Invalid encoder type %d for pipe %c\n", - intel_encoder->type, pipe_name(pipe)); + encoder->type, pipe_name(pipe)); } I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); @@ -1311,20 +1282,19 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) { struct drm_device *dev = intel_connector->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_encoder *intel_encoder = intel_connector->encoder; + struct intel_encoder *encoder = intel_connector->encoder; int type = intel_connector->base.connector_type; - enum port port = intel_ddi_get_encoder_port(intel_encoder); + enum port port = intel_ddi_get_encoder_port(encoder); enum pipe pipe = 0; enum transcoder cpu_transcoder; - enum intel_display_power_domain power_domain; uint32_t tmp; bool ret; - power_domain = intel_display_port_power_domain(intel_encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; - if (!intel_encoder->get_hw_state(intel_encoder, &pipe)) { + if (!encoder->get_hw_state(encoder, &pipe)) { ret = false; goto out; } @@ -1363,7 +1333,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) } out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -1374,13 +1344,12 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum port port = intel_ddi_get_encoder_port(encoder); - enum intel_display_power_domain power_domain; u32 tmp; int i; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -1437,29 +1406,39 @@ out: "(PHY_CTL %08x)\n", port_name(port), tmp); } - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } -void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc) +static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder) { - struct drm_crtc *crtc = &intel_crtc->base; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - enum port port = intel_ddi_get_encoder_port(intel_encoder); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + enum pipe pipe; + + if (intel_ddi_get_hw_state(encoder, &pipe)) + return BIT_ULL(dig_port->ddi_io_power_domain); + + return 0; +} + +void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); + enum port port = intel_ddi_get_encoder_port(encoder); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (cpu_transcoder != TRANSCODER_EDP) I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), TRANS_CLK_SEL_PORT(port)); } -void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc) +void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (cpu_transcoder != TRANSCODER_EDP) I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), @@ -1582,50 +1561,38 @@ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, ddi_translations[level].deemphasis); } -static uint32_t translate_signal_level(int signal_levels) +u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) { - uint32_t level; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + int n_entries; - switch (signal_levels) { - default: - DRM_DEBUG_KMS("Unsupported voltage swing/pre-emphasis level: 0x%x\n", - signal_levels); - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 0; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 1; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_2: - level = 2; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_3: - level = 3; - break; + if (encoder->type == INTEL_OUTPUT_EDP) + intel_ddi_get_buf_trans_edp(dev_priv, &n_entries); + else + intel_ddi_get_buf_trans_dp(dev_priv, &n_entries); - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 4; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 5; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_2: - level = 6; - break; + if (WARN_ON(n_entries < 1)) + n_entries = 1; + if (WARN_ON(n_entries > ARRAY_SIZE(index_to_dp_signal_levels))) + n_entries = ARRAY_SIZE(index_to_dp_signal_levels); - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 7; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 8; - break; + return index_to_dp_signal_levels[n_entries - 1] & + DP_TRAIN_VOLTAGE_SWING_MASK; +} - case DP_TRAIN_VOLTAGE_SWING_LEVEL_3 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 9; - break; +static uint32_t translate_signal_level(int signal_levels) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(index_to_dp_signal_levels); i++) { + if (index_to_dp_signal_levels[i] == signal_levels) + return i; } - return level; + WARN(1, "Unsupported voltage swing/pre-emphasis level: 0x%x\n", + signal_levels); + + return 0; } uint32_t ddi_signal_levels(struct intel_dp *intel_dp) @@ -1641,7 +1608,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) level = translate_signal_level(signal_levels); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); @@ -1649,8 +1616,8 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) return DDI_BUF_TRANS_SELECT(level); } -void intel_ddi_clk_select(struct intel_encoder *encoder, - struct intel_shared_dpll *pll) +static void intel_ddi_clk_select(struct intel_encoder *encoder, + struct intel_shared_dpll *pll) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); @@ -1658,7 +1625,7 @@ void intel_ddi_clk_select(struct intel_encoder *encoder, if (WARN_ON(!pll)) return; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { uint32_t val; /* DDI -> PLL mapping */ @@ -1684,6 +1651,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + + WARN_ON(link_mst && (port == PORT_A || port == PORT_E)); intel_dp_set_link_params(intel_dp, link_rate, lane_count, link_mst); @@ -1691,6 +1661,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_edp_panel_on(intel_dp); intel_ddi_clk_select(encoder, pll); + + intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); + intel_prepare_dp_ddi_buffers(encoder); intel_ddi_init_dp_buf_reg(encoder); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); @@ -1710,11 +1683,15 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, struct drm_encoder *drm_encoder = &encoder->base; enum port port = intel_ddi_get_encoder_port(encoder); int level = intel_ddi_hdmi_level(dev_priv, port); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); intel_ddi_clk_select(encoder, pll); + + intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); + intel_prepare_hdmi_ddi_buffers(encoder); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, @@ -1725,27 +1702,25 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, crtc_state, conn_state); } -static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder, +static void intel_ddi_pre_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - struct intel_crtc *crtc = to_intel_crtc(encoder->crtc); - int type = intel_encoder->type; + int type = encoder->type; if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - intel_ddi_pre_enable_dp(intel_encoder, - crtc->config->port_clock, - crtc->config->lane_count, - crtc->config->shared_dpll, - intel_crtc_has_type(crtc->config, + intel_ddi_pre_enable_dp(encoder, + pipe_config->port_clock, + pipe_config->lane_count, + pipe_config->shared_dpll, + intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)); } if (type == INTEL_OUTPUT_HDMI) { - intel_ddi_pre_enable_hdmi(intel_encoder, + intel_ddi_pre_enable_hdmi(encoder, pipe_config->has_hdmi_sink, pipe_config, conn_state, - crtc->config->shared_dpll); + pipe_config->shared_dpll); } } @@ -1756,6 +1731,7 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, struct drm_encoder *encoder = &intel_encoder->base; struct drm_i915_private *dev_priv = to_i915(encoder->dev); enum port port = intel_ddi_get_encoder_port(intel_encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); int type = intel_encoder->type; uint32_t val; bool wait = false; @@ -1784,7 +1760,10 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, intel_edp_panel_off(intel_dp); } - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (dig_port) + intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + + if (IS_GEN9_BC(dev_priv)) I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) | DPLL_CTRL2_DDI_CLK_OFF(port))); else if (INTEL_GEN(dev_priv) < 9) @@ -1797,11 +1776,11 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, } } -void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, +void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state) { - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); uint32_t val; /* @@ -1814,7 +1793,7 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, val &= ~FDI_RX_ENABLE; I915_WRITE(FDI_RX_CTL(PIPE_A), val); - intel_ddi_post_disable(intel_encoder, old_crtc_state, old_conn_state); + intel_ddi_post_disable(encoder, old_crtc_state, old_conn_state); val = I915_READ(FDI_RX_MISC(PIPE_A)); val &= ~(FDI_RX_PWRDN_LANE1_MASK | FDI_RX_PWRDN_LANE0_MASK); @@ -1835,8 +1814,6 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, struct drm_connector_state *conn_state) { struct drm_encoder *encoder = &intel_encoder->base; - struct drm_crtc *crtc = encoder->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(encoder->dev); enum port port = intel_ddi_get_encoder_port(intel_encoder); int type = intel_encoder->type; @@ -1844,6 +1821,12 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, if (type == INTEL_OUTPUT_HDMI) { struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); + bool clock_ratio = pipe_config->hdmi_high_tmds_clock_ratio; + bool scrambling = pipe_config->hdmi_scrambling; + + intel_hdmi_handle_sink_scrambling(intel_encoder, + conn_state->connector, + clock_ratio, scrambling); /* In HDMI/DVI mode, the port width, and swing/emphasis values * are ignored so nothing special needs to be done besides @@ -1863,10 +1846,8 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, intel_edp_drrs_enable(intel_dp, pipe_config); } - if (intel_crtc->config->has_audio) { - intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + if (pipe_config->has_audio) intel_audio_codec_enable(intel_encoder, pipe_config, conn_state); - } } static void intel_disable_ddi(struct intel_encoder *intel_encoder, @@ -1874,15 +1855,15 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder, struct drm_connector_state *old_conn_state) { struct drm_encoder *encoder = &intel_encoder->base; - struct drm_crtc *crtc = encoder->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int type = intel_encoder->type; - struct drm_device *dev = encoder->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - if (intel_crtc->config->has_audio) { + if (old_crtc_state->has_audio) intel_audio_codec_disable(intel_encoder); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); + + if (type == INTEL_OUTPUT_HDMI) { + intel_hdmi_handle_sink_scrambling(intel_encoder, + old_conn_state->connector, + false, false); } if (type == INTEL_OUTPUT_EDP) { @@ -1898,8 +1879,7 @@ static void bxt_ddi_pre_pll_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - uint8_t mask = intel_crtc->config->lane_lat_optim_mask; + uint8_t mask = pipe_config->lane_lat_optim_mask; bxt_ddi_phy_set_lane_optim_mask(encoder, mask); } @@ -2012,6 +1992,12 @@ void intel_ddi_get_config(struct intel_encoder *encoder, if (intel_hdmi->infoframe_enabled(&encoder->base, pipe_config)) pipe_config->has_infoframe = true; + + if ((temp & TRANS_DDI_HDMI_SCRAMBLING_MASK) == + TRANS_DDI_HDMI_SCRAMBLING_MASK) + pipe_config->hdmi_scrambling = true; + if (temp & TRANS_DDI_HIGH_TMDS_CHAR_RATE) + pipe_config->hdmi_high_tmds_clock_ratio = true; /* fall through */ case TRANS_DDI_MODE_SELECT_DVI: pipe_config->lane_count = 4; @@ -2126,45 +2112,6 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) return connector; } -struct intel_shared_dpll * -intel_ddi_get_link_dpll(struct intel_dp *intel_dp, int clock) -{ - struct intel_connector *connector = intel_dp->attached_connector; - struct intel_encoder *encoder = connector->encoder; - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct intel_shared_dpll *pll = NULL; - struct intel_shared_dpll_state tmp_pll_state; - enum intel_dpll_id dpll_id; - - if (IS_GEN9_LP(dev_priv)) { - dpll_id = (enum intel_dpll_id)dig_port->port; - /* - * Select the required PLL. This works for platforms where - * there is no shared DPLL. - */ - pll = &dev_priv->shared_dplls[dpll_id]; - if (WARN_ON(pll->active_mask)) { - - DRM_ERROR("Shared DPLL in use. active_mask:%x\n", - pll->active_mask); - return NULL; - } - tmp_pll_state = pll->state; - if (!bxt_ddi_dp_set_dpll_hw_state(clock, - &pll->state.hw_state)) { - DRM_ERROR("Could not setup DPLL\n"); - pll->state = tmp_pll_state; - return NULL; - } - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - pll = skl_find_link_pll(dev_priv, clock); - } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - pll = hsw_ddi_dp_get_dpll(encoder, clock); - } - return pll; -} - void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) { struct intel_digital_port *intel_dig_port; @@ -2241,12 +2188,38 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->get_hw_state = intel_ddi_get_hw_state; intel_encoder->get_config = intel_ddi_get_config; intel_encoder->suspend = intel_dp_encoder_suspend; + intel_encoder->get_power_domains = intel_ddi_get_power_domains; intel_dig_port->port = port; intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); + switch (port) { + case PORT_A: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_A_IO; + break; + case PORT_B: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_B_IO; + break; + case PORT_C: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_C_IO; + break; + case PORT_D: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_D_IO; + break; + case PORT_E: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_E_IO; + break; + default: + MISSING_CASE(port); + } + /* * Bspec says that DDI_A_4_LANES is the only supported configuration * for Broxton. Yet some BIOS fail to set this bit on port A if eDP @@ -2265,6 +2238,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_dig_port->max_lanes = max_lanes; intel_encoder->type = INTEL_OUTPUT_UNKNOWN; + intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); intel_encoder->cloneable = 0; @@ -2274,14 +2248,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) goto err; intel_dig_port->hpd_pulse = intel_dp_hpd_pulse; - /* - * On BXT A0/A1, sw needs to activate DDIA HPD logic and - * interrupts to check the external panel connection. - */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) && port == PORT_B) - dev_priv->hotplug.irq_port[PORT_A] = intel_dig_port; - else - dev_priv->hotplug.irq_port[port] = intel_dig_port; + dev_priv->hotplug.irq_port[port] = intel_dig_port; } /* In theory we don't need the encoder->type check, but leave it just in diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index fcf81815daff..7d01dfe7faac 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -56,6 +56,8 @@ static const char * const platform_names[] = { const char *intel_platform_name(enum intel_platform platform) { + BUILD_BUG_ON(ARRAY_SIZE(platform_names) != INTEL_MAX_PLATFORMS); + if (WARN_ON_ONCE(platform >= ARRAY_SIZE(platform_names) || platform_names[platform] == NULL)) return "<unknown>"; @@ -195,8 +197,10 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1; sseu->has_eu_pg = sseu->eu_per_subslice > 2; - if (IS_BROXTON(dev_priv)) { + if (IS_GEN9_LP(dev_priv)) { #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) + info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3; + /* * There is a HW issue in 2x6 fused down parts that requires * Pooled EU to be enabled as a WA. The pool configuration @@ -204,9 +208,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) * doesn't affect if the device has all 3 subslices enabled. */ /* WaEnablePooledEuFor2x6:bxt */ - info->has_pooled_eu = ((hweight8(sseu->subslice_mask) == 3) || - (hweight8(sseu->subslice_mask) == 2 && - INTEL_REVID(dev_priv) < BXT_REVID_C0)); + info->has_pooled_eu |= (hweight8(sseu->subslice_mask) == 2 && + IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST)); sseu->min_eu_in_pool = 0; if (info->has_pooled_eu) { @@ -234,7 +237,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = BIT(ss_max) - 1; + sseu->subslice_mask = GENMASK(ss_max - 1, 0); sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> GEN8_F2_SS_DIS_SHIFT); @@ -410,10 +413,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->has_snoop = !info->has_llc; - /* Snooping is broken on BXT A stepping. */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - info->has_snoop = false; - DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask); DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask)); DRM_DEBUG_DRIVER("subslice total: %u\n", diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ed1f4f272b4f..3617927af269 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -37,6 +37,7 @@ #include "intel_frontbuffer.h" #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "intel_dsi.h" #include "i915_trace.h" #include <drm/drm_atomic.h> @@ -96,10 +97,9 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, static void ironlake_pch_clock_get(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); -static int intel_framebuffer_init(struct drm_device *dev, - struct intel_framebuffer *ifb, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj); +static int intel_framebuffer_init(struct intel_framebuffer *ifb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc); static void intel_set_pipe_timings(struct intel_crtc *intel_crtc); static void intel_set_pipe_src_size(struct intel_crtc *intel_crtc); @@ -122,9 +122,6 @@ static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force); static void ironlake_pfit_enable(struct intel_crtc *crtc); static void intel_modeset_setup_hw_state(struct drm_device *dev); static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc); -static int ilk_max_pixel_rate(struct drm_atomic_state *state); -static int glk_calc_cdclk(int max_pixclk); -static int bxt_calc_cdclk(int max_pixclk); struct intel_limit { struct { @@ -138,7 +135,7 @@ struct intel_limit { }; /* returns HPLL frequency in kHz */ -static int valleyview_get_vco(struct drm_i915_private *dev_priv) +int vlv_get_hpll_vco(struct drm_i915_private *dev_priv) { int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; @@ -170,73 +167,16 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, return DIV_ROUND_CLOSEST(ref_freq << 1, divider + 1); } -static int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, - const char *name, u32 reg) +int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, + const char *name, u32 reg) { if (dev_priv->hpll_freq == 0) - dev_priv->hpll_freq = valleyview_get_vco(dev_priv); + dev_priv->hpll_freq = vlv_get_hpll_vco(dev_priv); return vlv_get_cck_clock(dev_priv, name, reg, dev_priv->hpll_freq); } -static int -intel_pch_rawclk(struct drm_i915_private *dev_priv) -{ - return (I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; -} - -static int -intel_vlv_hrawclk(struct drm_i915_private *dev_priv) -{ - /* RAWCLK_FREQ_VLV register updated from power well code */ - return vlv_get_cck_clock_hpll(dev_priv, "hrawclk", - CCK_DISPLAY_REF_CLOCK_CONTROL); -} - -static int -intel_g4x_hrawclk(struct drm_i915_private *dev_priv) -{ - uint32_t clkcfg; - - /* hrawclock is 1/4 the FSB frequency */ - clkcfg = I915_READ(CLKCFG); - switch (clkcfg & CLKCFG_FSB_MASK) { - case CLKCFG_FSB_400: - return 100000; - case CLKCFG_FSB_533: - return 133333; - case CLKCFG_FSB_667: - return 166667; - case CLKCFG_FSB_800: - return 200000; - case CLKCFG_FSB_1067: - return 266667; - case CLKCFG_FSB_1333: - return 333333; - /* these two are just a guess; one of them might be right */ - case CLKCFG_FSB_1600: - case CLKCFG_FSB_1600_ALT: - return 400000; - default: - return 133333; - } -} - -void intel_update_rawclk(struct drm_i915_private *dev_priv) -{ - if (HAS_PCH_SPLIT(dev_priv)) - dev_priv->rawclk_freq = intel_pch_rawclk(dev_priv); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - dev_priv->rawclk_freq = intel_vlv_hrawclk(dev_priv); - else if (IS_G4X(dev_priv) || IS_PINEVIEW(dev_priv)) - dev_priv->rawclk_freq = intel_g4x_hrawclk(dev_priv); - else - return; /* no rawclk on other platforms, or no need to know it */ - - DRM_DEBUG_DRIVER("rawclk rate: %d kHz\n", dev_priv->rawclk_freq); -} - static void intel_update_czclk(struct drm_i915_private *dev_priv) { if (!(IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))) @@ -2050,11 +1990,14 @@ static unsigned int intel_tile_size(const struct drm_i915_private *dev_priv) return IS_GEN2(dev_priv) ? 2048 : 4096; } -static unsigned int intel_tile_width_bytes(const struct drm_i915_private *dev_priv, - uint64_t fb_modifier, unsigned int cpp) +static unsigned int +intel_tile_width_bytes(const struct drm_framebuffer *fb, int plane) { - switch (fb_modifier) { - case DRM_FORMAT_MOD_NONE: + struct drm_i915_private *dev_priv = to_i915(fb->dev); + unsigned int cpp = fb->format->cpp[plane]; + + switch (fb->modifier) { + case DRM_FORMAT_MOD_LINEAR: return cpp; case I915_FORMAT_MOD_X_TILED: if (IS_GEN2(dev_priv)) @@ -2082,41 +2025,38 @@ static unsigned int intel_tile_width_bytes(const struct drm_i915_private *dev_pr } break; default: - MISSING_CASE(fb_modifier); + MISSING_CASE(fb->modifier); return cpp; } } -unsigned int intel_tile_height(const struct drm_i915_private *dev_priv, - uint64_t fb_modifier, unsigned int cpp) +static unsigned int +intel_tile_height(const struct drm_framebuffer *fb, int plane) { - if (fb_modifier == DRM_FORMAT_MOD_NONE) + if (fb->modifier == DRM_FORMAT_MOD_LINEAR) return 1; else - return intel_tile_size(dev_priv) / - intel_tile_width_bytes(dev_priv, fb_modifier, cpp); + return intel_tile_size(to_i915(fb->dev)) / + intel_tile_width_bytes(fb, plane); } /* Return the tile dimensions in pixel units */ -static void intel_tile_dims(const struct drm_i915_private *dev_priv, +static void intel_tile_dims(const struct drm_framebuffer *fb, int plane, unsigned int *tile_width, - unsigned int *tile_height, - uint64_t fb_modifier, - unsigned int cpp) + unsigned int *tile_height) { - unsigned int tile_width_bytes = - intel_tile_width_bytes(dev_priv, fb_modifier, cpp); + unsigned int tile_width_bytes = intel_tile_width_bytes(fb, plane); + unsigned int cpp = fb->format->cpp[plane]; *tile_width = tile_width_bytes / cpp; - *tile_height = intel_tile_size(dev_priv) / tile_width_bytes; + *tile_height = intel_tile_size(to_i915(fb->dev)) / tile_width_bytes; } unsigned int -intel_fb_align_height(struct drm_device *dev, unsigned int height, - uint32_t pixel_format, uint64_t fb_modifier) +intel_fb_align_height(const struct drm_framebuffer *fb, + int plane, unsigned int height) { - unsigned int cpp = drm_format_plane_cpp(pixel_format, 0); - unsigned int tile_height = intel_tile_height(to_i915(dev), fb_modifier, cpp); + unsigned int tile_height = intel_tile_height(fb, plane); return ALIGN(height, tile_height); } @@ -2157,21 +2097,27 @@ static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_pr return 0; } -static unsigned int intel_surf_alignment(const struct drm_i915_private *dev_priv, - uint64_t fb_modifier) +static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, + int plane) { - switch (fb_modifier) { - case DRM_FORMAT_MOD_NONE: + struct drm_i915_private *dev_priv = to_i915(fb->dev); + + /* AUX_DIST needs only 4K alignment */ + if (fb->format->format == DRM_FORMAT_NV12 && plane == 1) + return 4096; + + switch (fb->modifier) { + case DRM_FORMAT_MOD_LINEAR: return intel_linear_alignment(dev_priv); case I915_FORMAT_MOD_X_TILED: - if (INTEL_INFO(dev_priv)->gen >= 9) + if (INTEL_GEN(dev_priv) >= 9) return 256 * 1024; return 0; case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: return 1 * 1024 * 1024; default: - MISSING_CASE(fb_modifier); + MISSING_CASE(fb->modifier); return 0; } } @@ -2188,7 +2134,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - alignment = intel_surf_alignment(dev_priv, fb->modifier); + alignment = intel_surf_alignment(fb, 0); intel_fill_fb_ggtt_view(&view, fb, rotation); @@ -2344,13 +2290,12 @@ static u32 intel_adjust_tile_offset(int *x, int *y, WARN_ON(new_offset > old_offset); - if (fb->modifier != DRM_FORMAT_MOD_NONE) { + if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { unsigned int tile_size, tile_width, tile_height; unsigned int pitch_tiles; tile_size = intel_tile_size(dev_priv); - intel_tile_dims(dev_priv, &tile_width, &tile_height, - fb->modifier, cpp); + intel_tile_dims(fb, plane, &tile_width, &tile_height); if (drm_rotation_90_or_270(rotation)) { pitch_tiles = pitch / tile_height; @@ -2400,13 +2345,12 @@ static u32 _intel_compute_tile_offset(const struct drm_i915_private *dev_priv, if (alignment) alignment--; - if (fb_modifier != DRM_FORMAT_MOD_NONE) { + if (fb_modifier != DRM_FORMAT_MOD_LINEAR) { unsigned int tile_size, tile_width, tile_height; unsigned int tile_rows, tiles, pitch_tiles; tile_size = intel_tile_size(dev_priv); - intel_tile_dims(dev_priv, &tile_width, &tile_height, - fb_modifier, cpp); + intel_tile_dims(fb, plane, &tile_width, &tile_height); if (drm_rotation_90_or_270(rotation)) { pitch_tiles = pitch / tile_height; @@ -2446,13 +2390,7 @@ u32 intel_compute_tile_offset(int *x, int *y, const struct drm_framebuffer *fb = state->base.fb; unsigned int rotation = state->base.rotation; int pitch = intel_fb_pitch(fb, plane, rotation); - u32 alignment; - - /* AUX_DIST needs only 4K alignment */ - if (fb->format->format == DRM_FORMAT_NV12 && plane == 1) - alignment = 4096; - else - alignment = intel_surf_alignment(dev_priv, fb->modifier); + u32 alignment = intel_surf_alignment(fb, plane); return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch, rotation, alignment); @@ -2516,8 +2454,8 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, */ if (i915_gem_object_is_tiled(intel_fb->obj) && (x + width) * cpp > fb->pitches[i]) { - DRM_DEBUG("bad fb plane %d offset: 0x%x\n", - i, fb->offsets[i]); + DRM_DEBUG_KMS("bad fb plane %d offset: 0x%x\n", + i, fb->offsets[i]); return -EINVAL; } @@ -2529,17 +2467,16 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, intel_fb->normal[i].y = y; offset = _intel_compute_tile_offset(dev_priv, &x, &y, - fb, 0, fb->pitches[i], + fb, i, fb->pitches[i], DRM_ROTATE_0, tile_size); offset /= tile_size; - if (fb->modifier != DRM_FORMAT_MOD_NONE) { + if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { unsigned int tile_width, tile_height; unsigned int pitch_tiles; struct drm_rect r; - intel_tile_dims(dev_priv, &tile_width, &tile_height, - fb->modifier, cpp); + intel_tile_dims(fb, i, &tile_width, &tile_height); rot_info->plane[i].offset = offset; rot_info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i], tile_width * cpp); @@ -2600,9 +2537,9 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, max_size = max(max_size, offset + size); } - if (max_size * tile_size > to_intel_framebuffer(fb)->obj->base.size) { - DRM_DEBUG("fb too big for bo (need %u bytes, have %zu bytes)\n", - max_size * tile_size, to_intel_framebuffer(fb)->obj->base.size); + if (max_size * tile_size > intel_fb->obj->base.size) { + DRM_DEBUG_KMS("fb too big for bo (need %u bytes, have %zu bytes)\n", + max_size * tile_size, intel_fb->obj->base.size); return -EINVAL; } @@ -2682,15 +2619,13 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; mutex_lock(&dev->struct_mutex); - obj = i915_gem_object_create_stolen_for_preallocated(dev_priv, base_aligned, base_aligned, size_aligned); - if (!obj) { - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); + if (!obj) return false; - } if (plane_config->tiling == I915_TILING_X) obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X; @@ -2702,20 +2637,17 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, mode_cmd.modifier[0] = fb->modifier; mode_cmd.flags = DRM_MODE_FB_MODIFIERS; - if (intel_framebuffer_init(dev, to_intel_framebuffer(fb), - &mode_cmd, obj)) { + if (intel_framebuffer_init(to_intel_framebuffer(fb), obj, &mode_cmd)) { DRM_DEBUG_KMS("intel fb init failed\n"); goto out_unref_obj; } - mutex_unlock(&dev->struct_mutex); DRM_DEBUG_KMS("initial plane fb obj %p\n", obj); return true; out_unref_obj: i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); return false; } @@ -2734,6 +2666,29 @@ update_state_fb(struct drm_plane *plane) } static void +intel_set_plane_visible(struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state, + bool visible) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + + plane_state->base.visible = visible; + + /* FIXME pre-g4x don't work like this */ + if (visible) { + crtc_state->base.plane_mask |= BIT(drm_plane_index(&plane->base)); + crtc_state->active_planes |= BIT(plane->id); + } else { + crtc_state->base.plane_mask &= ~BIT(drm_plane_index(&plane->base)); + crtc_state->active_planes &= ~BIT(plane->id); + } + + DRM_DEBUG_KMS("%s active planes 0x%x\n", + crtc_state->base.crtc->name, + crtc_state->active_planes); +} + +static void intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct intel_initial_plane_config *plane_config) { @@ -2790,9 +2745,11 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - plane_state->visible = false; - crtc_state->plane_mask &= ~(1 << drm_plane_index(primary)); + intel_set_plane_visible(to_intel_crtc_state(crtc_state), + to_intel_plane_state(plane_state), + false); intel_pre_disable_primary_noatomic(&intel_crtc->base); + trace_intel_disable_plane(primary, intel_crtc); intel_plane->disable_plane(primary, &intel_crtc->base); return; @@ -2831,7 +2788,11 @@ valid_fb: drm_framebuffer_reference(fb); primary->fb = primary->state->fb = fb; primary->crtc = primary->state->crtc = &intel_crtc->base; - intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary)); + + intel_set_plane_visible(to_intel_crtc_state(crtc_state), + to_intel_plane_state(plane_state), + true); + atomic_or(to_intel_plane(primary)->frontbuffer_bit, &obj->frontbuffer_bits); } @@ -2842,7 +2803,7 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane, int cpp = fb->format->cpp[plane]; switch (fb->modifier) { - case DRM_FORMAT_MOD_NONE: + case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: switch (cpp) { case 8: @@ -2880,7 +2841,6 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane, static int skl_check_main_surface(struct intel_plane_state *plane_state) { - const struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; int x = plane_state->base.src.x1 >> 16; @@ -2899,8 +2859,7 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) intel_add_fb_offsets(&x, &y, plane_state, 0); offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - - alignment = intel_surf_alignment(dev_priv, fb->modifier); + alignment = intel_surf_alignment(fb, 0); /* * AUX surface offset is specified as the distance from the @@ -3003,42 +2962,28 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) return 0; } -static void i9xx_update_primary_plane(struct drm_plane *primary, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(primary->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_framebuffer *fb = plane_state->base.fb; - int plane = intel_crtc->plane; - u32 linear_offset; - u32 dspcntr; - i915_reg_t reg = DSPCNTR(plane); + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; - int x = plane_state->base.src.x1 >> 16; - int y = plane_state->base.src.y1 >> 16; + u32 dspcntr; + + dspcntr = DISPLAY_PLANE_ENABLE | DISPPLANE_GAMMA_ENABLE; - dspcntr = DISPPLANE_GAMMA_ENABLE; + if (IS_G4X(dev_priv) || IS_GEN5(dev_priv) || + IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) + dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; - dspcntr |= DISPLAY_PLANE_ENABLE; + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; if (INTEL_GEN(dev_priv) < 4) { - if (intel_crtc->pipe == PIPE_B) + if (crtc->pipe == PIPE_B) dspcntr |= DISPPLANE_SEL_PIPE_B; - - /* pipesrc and dspsize control the size that is scaled from, - * which should always be the user's requested size. - */ - I915_WRITE(DSPSIZE(plane), - ((crtc_state->pipe_src_h - 1) << 16) | - (crtc_state->pipe_src_w - 1)); - I915_WRITE(DSPPOS(plane), 0); - } else if (IS_CHERRYVIEW(dev_priv) && plane == PLANE_B) { - I915_WRITE(PRIMSIZE(plane), - ((crtc_state->pipe_src_h - 1) << 16) | - (crtc_state->pipe_src_w - 1)); - I915_WRITE(PRIMPOS(plane), 0); - I915_WRITE(PRIMCNSTALPHA(plane), 0); } switch (fb->format->format) { @@ -3064,7 +3009,8 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, dspcntr |= DISPPLANE_RGBX101010; break; default: - BUG(); + MISSING_CASE(fb->format->format); + return 0; } if (INTEL_GEN(dev_priv) >= 4 && @@ -3077,45 +3023,111 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, if (rotation & DRM_REFLECT_X) dspcntr |= DISPPLANE_MIRROR; - if (IS_G4X(dev_priv)) - dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; + return dspcntr; +} - intel_add_fb_offsets(&x, &y, plane_state, 0); +int i9xx_check_plane_surface(struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + int src_x = plane_state->base.src.x1 >> 16; + int src_y = plane_state->base.src.y1 >> 16; + u32 offset; + + intel_add_fb_offsets(&src_x, &src_y, plane_state, 0); if (INTEL_GEN(dev_priv) >= 4) - intel_crtc->dspaddr_offset = - intel_compute_tile_offset(&x, &y, plane_state, 0); + offset = intel_compute_tile_offset(&src_x, &src_y, + plane_state, 0); + else + offset = 0; + + /* HSW/BDW do this automagically in hardware */ + if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) { + unsigned int rotation = plane_state->base.rotation; + int src_w = drm_rect_width(&plane_state->base.src) >> 16; + int src_h = drm_rect_height(&plane_state->base.src) >> 16; - if (rotation & DRM_ROTATE_180) { - x += crtc_state->pipe_src_w - 1; - y += crtc_state->pipe_src_h - 1; - } else if (rotation & DRM_REFLECT_X) { - x += crtc_state->pipe_src_w - 1; + if (rotation & DRM_ROTATE_180) { + src_x += src_w - 1; + src_y += src_h - 1; + } else if (rotation & DRM_REFLECT_X) { + src_x += src_w - 1; + } } + plane_state->main.offset = offset; + plane_state->main.x = src_x; + plane_state->main.y = src_y; + + return 0; +} + +static void i9xx_update_primary_plane(struct drm_plane *primary, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = to_i915(primary->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_framebuffer *fb = plane_state->base.fb; + int plane = intel_crtc->plane; + u32 linear_offset; + u32 dspcntr = plane_state->ctl; + i915_reg_t reg = DSPCNTR(plane); + int x = plane_state->main.x; + int y = plane_state->main.y; + unsigned long irqflags; + linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - if (INTEL_GEN(dev_priv) < 4) + if (INTEL_GEN(dev_priv) >= 4) + intel_crtc->dspaddr_offset = plane_state->main.offset; + else intel_crtc->dspaddr_offset = linear_offset; intel_crtc->adjusted_x = x; intel_crtc->adjusted_y = y; - I915_WRITE(reg, dspcntr); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); - if (INTEL_GEN(dev_priv) >= 4) { - I915_WRITE(DSPSURF(plane), - intel_plane_ggtt_offset(plane_state) + - intel_crtc->dspaddr_offset); - I915_WRITE(DSPTILEOFF(plane), (y << 16) | x); - I915_WRITE(DSPLINOFF(plane), linear_offset); + if (INTEL_GEN(dev_priv) < 4) { + /* pipesrc and dspsize control the size that is scaled from, + * which should always be the user's requested size. + */ + I915_WRITE_FW(DSPSIZE(plane), + ((crtc_state->pipe_src_h - 1) << 16) | + (crtc_state->pipe_src_w - 1)); + I915_WRITE_FW(DSPPOS(plane), 0); + } else if (IS_CHERRYVIEW(dev_priv) && plane == PLANE_B) { + I915_WRITE_FW(PRIMSIZE(plane), + ((crtc_state->pipe_src_h - 1) << 16) | + (crtc_state->pipe_src_w - 1)); + I915_WRITE_FW(PRIMPOS(plane), 0); + I915_WRITE_FW(PRIMCNSTALPHA(plane), 0); + } + + I915_WRITE_FW(reg, dspcntr); + + I915_WRITE_FW(DSPSTRIDE(plane), fb->pitches[0]); + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { + I915_WRITE_FW(DSPSURF(plane), + intel_plane_ggtt_offset(plane_state) + + intel_crtc->dspaddr_offset); + I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x); + } else if (INTEL_GEN(dev_priv) >= 4) { + I915_WRITE_FW(DSPSURF(plane), + intel_plane_ggtt_offset(plane_state) + + intel_crtc->dspaddr_offset); + I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x); + I915_WRITE_FW(DSPLINOFF(plane), linear_offset); } else { - I915_WRITE(DSPADDR(plane), - intel_plane_ggtt_offset(plane_state) + - intel_crtc->dspaddr_offset); + I915_WRITE_FW(DSPADDR(plane), + intel_plane_ggtt_offset(plane_state) + + intel_crtc->dspaddr_offset); } - POSTING_READ(reg); + POSTING_READ_FW(reg); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static void i9xx_disable_primary_plane(struct drm_plane *primary, @@ -3125,111 +3137,27 @@ static void i9xx_disable_primary_plane(struct drm_plane *primary, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int plane = intel_crtc->plane; + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - I915_WRITE(DSPCNTR(plane), 0); + I915_WRITE_FW(DSPCNTR(plane), 0); if (INTEL_INFO(dev_priv)->gen >= 4) - I915_WRITE(DSPSURF(plane), 0); + I915_WRITE_FW(DSPSURF(plane), 0); else - I915_WRITE(DSPADDR(plane), 0); - POSTING_READ(DSPCNTR(plane)); -} - -static void ironlake_update_primary_plane(struct drm_plane *primary, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - struct drm_device *dev = primary->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_framebuffer *fb = plane_state->base.fb; - int plane = intel_crtc->plane; - u32 linear_offset; - u32 dspcntr; - i915_reg_t reg = DSPCNTR(plane); - unsigned int rotation = plane_state->base.rotation; - int x = plane_state->base.src.x1 >> 16; - int y = plane_state->base.src.y1 >> 16; - - dspcntr = DISPPLANE_GAMMA_ENABLE; - dspcntr |= DISPLAY_PLANE_ENABLE; - - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; + I915_WRITE_FW(DSPADDR(plane), 0); + POSTING_READ_FW(DSPCNTR(plane)); - switch (fb->format->format) { - case DRM_FORMAT_C8: - dspcntr |= DISPPLANE_8BPP; - break; - case DRM_FORMAT_RGB565: - dspcntr |= DISPPLANE_BGRX565; - break; - case DRM_FORMAT_XRGB8888: - dspcntr |= DISPPLANE_BGRX888; - break; - case DRM_FORMAT_XBGR8888: - dspcntr |= DISPPLANE_RGBX888; - break; - case DRM_FORMAT_XRGB2101010: - dspcntr |= DISPPLANE_BGRX101010; - break; - case DRM_FORMAT_XBGR2101010: - dspcntr |= DISPPLANE_RGBX101010; - break; - default: - BUG(); - } - - if (fb->modifier == I915_FORMAT_MOD_X_TILED) - dspcntr |= DISPPLANE_TILED; - - if (rotation & DRM_ROTATE_180) - dspcntr |= DISPPLANE_ROTATE_180; - - if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) - dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; - - intel_add_fb_offsets(&x, &y, plane_state, 0); - - intel_crtc->dspaddr_offset = - intel_compute_tile_offset(&x, &y, plane_state, 0); - - /* HSW+ does this automagically in hardware */ - if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv) && - rotation & DRM_ROTATE_180) { - x += crtc_state->pipe_src_w - 1; - y += crtc_state->pipe_src_h - 1; - } - - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - - intel_crtc->adjusted_x = x; - intel_crtc->adjusted_y = y; - - I915_WRITE(reg, dspcntr); - - I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); - I915_WRITE(DSPSURF(plane), - intel_plane_ggtt_offset(plane_state) + - intel_crtc->dspaddr_offset); - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - I915_WRITE(DSPOFFSET(plane), (y << 16) | x); - } else { - I915_WRITE(DSPTILEOFF(plane), (y << 16) | x); - I915_WRITE(DSPLINOFF(plane), linear_offset); - } - POSTING_READ(reg); + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv, - uint64_t fb_modifier, uint32_t pixel_format) +static u32 +intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane) { - if (fb_modifier == DRM_FORMAT_MOD_NONE) { + if (fb->modifier == DRM_FORMAT_MOD_LINEAR) return 64; - } else { - int cpp = drm_format_plane_cpp(pixel_format, 0); - - return intel_tile_width_bytes(dev_priv, fb_modifier, cpp); - } + else + return intel_tile_width_bytes(fb, plane); } static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) @@ -3262,26 +3190,26 @@ static void skl_detach_scalers(struct intel_crtc *intel_crtc) u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, unsigned int rotation) { - const struct drm_i915_private *dev_priv = to_i915(fb->dev); - u32 stride = intel_fb_pitch(fb, plane, rotation); + u32 stride; + + if (plane >= fb->format->num_planes) + return 0; + + stride = intel_fb_pitch(fb, plane, rotation); /* * The stride is either expressed as a multiple of 64 bytes chunks for * linear buffers or in number of tiles for tiled buffers. */ - if (drm_rotation_90_or_270(rotation)) { - int cpp = fb->format->cpp[plane]; - - stride /= intel_tile_height(dev_priv, fb->modifier, cpp); - } else { - stride /= intel_fb_stride_alignment(dev_priv, fb->modifier, - fb->format->format); - } + if (drm_rotation_90_or_270(rotation)) + stride /= intel_tile_height(fb, plane); + else + stride /= intel_fb_stride_alignment(fb, plane); return stride; } -u32 skl_plane_ctl_format(uint32_t pixel_format) +static u32 skl_plane_ctl_format(uint32_t pixel_format) { switch (pixel_format) { case DRM_FORMAT_C8: @@ -3322,10 +3250,10 @@ u32 skl_plane_ctl_format(uint32_t pixel_format) return 0; } -u32 skl_plane_ctl_tiling(uint64_t fb_modifier) +static u32 skl_plane_ctl_tiling(uint64_t fb_modifier) { switch (fb_modifier) { - case DRM_FORMAT_MOD_NONE: + case DRM_FORMAT_MOD_LINEAR: break; case I915_FORMAT_MOD_X_TILED: return PLANE_CTL_TILED_X; @@ -3340,7 +3268,7 @@ u32 skl_plane_ctl_tiling(uint64_t fb_modifier) return 0; } -u32 skl_plane_ctl_rotation(unsigned int rotation) +static u32 skl_plane_ctl_rotation(unsigned int rotation) { switch (rotation) { case DRM_ROTATE_0: @@ -3362,6 +3290,37 @@ u32 skl_plane_ctl_rotation(unsigned int rotation) return 0; } +u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int rotation = plane_state->base.rotation; + const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; + u32 plane_ctl; + + plane_ctl = PLANE_CTL_ENABLE; + + if (!IS_GEMINILAKE(dev_priv)) { + plane_ctl |= + PLANE_CTL_PIPE_GAMMA_ENABLE | + PLANE_CTL_PIPE_CSC_ENABLE | + PLANE_CTL_PLANE_GAMMA_DISABLE; + } + + plane_ctl |= skl_plane_ctl_format(fb->format->format); + plane_ctl |= skl_plane_ctl_tiling(fb->modifier); + plane_ctl |= skl_plane_ctl_rotation(rotation); + + if (key->flags & I915_SET_COLORKEY_DESTINATION) + plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION; + else if (key->flags & I915_SET_COLORKEY_SOURCE) + plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE; + + return plane_ctl; +} + static void skylake_update_primary_plane(struct drm_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) @@ -3372,7 +3331,7 @@ static void skylake_update_primary_plane(struct drm_plane *plane, struct drm_framebuffer *fb = plane_state->base.fb; enum plane_id plane_id = to_intel_plane(plane)->id; enum pipe pipe = to_intel_plane(plane)->pipe; - u32 plane_ctl; + u32 plane_ctl = plane_state->ctl; unsigned int rotation = plane_state->base.rotation; u32 stride = skl_plane_stride(fb, 0, rotation); u32 surf_addr = plane_state->main.offset; @@ -3385,15 +3344,7 @@ static void skylake_update_primary_plane(struct drm_plane *plane, int dst_y = plane_state->base.dst.y1; int dst_w = drm_rect_width(&plane_state->base.dst); int dst_h = drm_rect_height(&plane_state->base.dst); - - plane_ctl = PLANE_CTL_ENABLE | - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE; - - plane_ctl |= skl_plane_ctl_format(fb->format->format); - plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; - plane_ctl |= skl_plane_ctl_rotation(rotation); + unsigned long irqflags; /* Sizes are 0 based */ src_w--; @@ -3406,10 +3357,19 @@ static void skylake_update_primary_plane(struct drm_plane *plane, intel_crtc->adjusted_x = src_x; intel_crtc->adjusted_y = src_y; - I915_WRITE(PLANE_CTL(pipe, plane_id), plane_ctl); - I915_WRITE(PLANE_OFFSET(pipe, plane_id), (src_y << 16) | src_x); - I915_WRITE(PLANE_STRIDE(pipe, plane_id), stride); - I915_WRITE(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), + PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PIPE_CSC_ENABLE | + PLANE_COLOR_PLANE_GAMMA_DISABLE); + } + + I915_WRITE_FW(PLANE_CTL(pipe, plane_id), plane_ctl); + I915_WRITE_FW(PLANE_OFFSET(pipe, plane_id), (src_y << 16) | src_x); + I915_WRITE_FW(PLANE_STRIDE(pipe, plane_id), stride); + I915_WRITE_FW(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w); if (scaler_id >= 0) { uint32_t ps_ctrl = 0; @@ -3417,19 +3377,21 @@ static void skylake_update_primary_plane(struct drm_plane *plane, WARN_ON(!dst_w || !dst_h); ps_ctrl = PS_SCALER_EN | PS_PLANE_SEL(plane_id) | crtc_state->scaler_state.scalers[scaler_id].mode; - I915_WRITE(SKL_PS_CTRL(pipe, scaler_id), ps_ctrl); - I915_WRITE(SKL_PS_PWR_GATE(pipe, scaler_id), 0); - I915_WRITE(SKL_PS_WIN_POS(pipe, scaler_id), (dst_x << 16) | dst_y); - I915_WRITE(SKL_PS_WIN_SZ(pipe, scaler_id), (dst_w << 16) | dst_h); - I915_WRITE(PLANE_POS(pipe, plane_id), 0); + I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), ps_ctrl); + I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0); + I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (dst_x << 16) | dst_y); + I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), (dst_w << 16) | dst_h); + I915_WRITE_FW(PLANE_POS(pipe, plane_id), 0); } else { - I915_WRITE(PLANE_POS(pipe, plane_id), (dst_y << 16) | dst_x); + I915_WRITE_FW(PLANE_POS(pipe, plane_id), (dst_y << 16) | dst_x); } - I915_WRITE(PLANE_SURF(pipe, plane_id), - intel_plane_ggtt_offset(plane_state) + surf_addr); + I915_WRITE_FW(PLANE_SURF(pipe, plane_id), + intel_plane_ggtt_offset(plane_state) + surf_addr); + + POSTING_READ_FW(PLANE_SURF(pipe, plane_id)); - POSTING_READ(PLANE_SURF(pipe, plane_id)); + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static void skylake_disable_primary_plane(struct drm_plane *primary, @@ -3439,21 +3401,15 @@ static void skylake_disable_primary_plane(struct drm_plane *primary, struct drm_i915_private *dev_priv = to_i915(dev); enum plane_id plane_id = to_intel_plane(primary)->id; enum pipe pipe = to_intel_plane(primary)->pipe; + unsigned long irqflags; - I915_WRITE(PLANE_CTL(pipe, plane_id), 0); - I915_WRITE(PLANE_SURF(pipe, plane_id), 0); - POSTING_READ(PLANE_SURF(pipe, plane_id)); -} + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); -/* Assume fb object is pinned & idle & fenced and just update base pointers */ -static int -intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, - int x, int y, enum mode_set_atomic state) -{ - /* Support for kgdboc is disabled, this needs a major rework. */ - DRM_ERROR("legacy panic handler not supported any more.\n"); + I915_WRITE_FW(PLANE_CTL(pipe, plane_id), 0); + I915_WRITE_FW(PLANE_SURF(pipe, plane_id), 0); + POSTING_READ_FW(PLANE_SURF(pipe, plane_id)); - return -ENODEV; + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static void intel_complete_page_flips(struct drm_i915_private *dev_priv) @@ -3473,16 +3429,21 @@ static void intel_update_primary_planes(struct drm_device *dev) struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); - if (plane_state->base.visible) + if (plane_state->base.visible) { + trace_intel_update_plane(&plane->base, + to_intel_crtc(crtc)); + plane->update_plane(&plane->base, to_intel_crtc_state(crtc->state), plane_state); + } } } static int __intel_display_resume(struct drm_device *dev, - struct drm_atomic_state *state) + struct drm_atomic_state *state, + struct drm_modeset_acquire_ctx *ctx) { struct drm_crtc_state *crtc_state; struct drm_crtc *crtc; @@ -3494,7 +3455,12 @@ __intel_display_resume(struct drm_device *dev, if (!state) return 0; - for_each_crtc_in_state(state, crtc, crtc_state, i) { + /* + * We've duplicated the state, pointers to the old state are invalid. + * + * Don't attempt to use the old state until we commit the duplicated state. + */ + for_each_new_crtc_in_state(state, crtc, crtc_state, i) { /* * Force recalculation even if we restore * current state. With fast modeset this may not result @@ -3504,9 +3470,10 @@ __intel_display_resume(struct drm_device *dev, } /* ignore any reset values/BIOS leftovers in the WM registers */ - to_intel_atomic_state(state)->skip_intermediate_wm = true; + if (!HAS_GMCH_DISPLAY(to_i915(dev))) + to_intel_atomic_state(state)->skip_intermediate_wm = true; - ret = drm_atomic_commit(state); + ret = drm_atomic_helper_commit_duplicated_state(state, ctx); WARN_ON(ret == -EDEADLK); return ret; @@ -3596,7 +3563,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) */ intel_update_primary_planes(dev); } else { - ret = __intel_display_resume(dev, state); + ret = __intel_display_resume(dev, state, ctx); if (ret) DRM_ERROR("Restoring old state failed with %i\n", ret); } @@ -3616,7 +3583,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); - ret = __intel_display_resume(dev, state); + ret = __intel_display_resume(dev, state, ctx); if (ret) DRM_ERROR("Restoring old state failed with %i\n", ret); @@ -3634,7 +3601,7 @@ static bool abort_flip_on_reset(struct intel_crtc *crtc) { struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error; - if (i915_reset_in_progress(error)) + if (i915_reset_backoff(error)) return true; if (crtc->reset_count != i915_reset_count(error)) @@ -3696,12 +3663,11 @@ static void intel_update_pipe_config(struct intel_crtc *crtc, } } -static void intel_fdi_normal_train(struct drm_crtc *crtc) +static void intel_fdi_normal_train(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp; @@ -3739,12 +3705,12 @@ static void intel_fdi_normal_train(struct drm_crtc *crtc) } /* The FDI link training functions for ILK/Ibexpeak. */ -static void ironlake_fdi_link_train(struct drm_crtc *crtc) +static void ironlake_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp, tries; @@ -3765,7 +3731,7 @@ static void ironlake_fdi_link_train(struct drm_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; I915_WRITE(reg, temp | FDI_TX_ENABLE); @@ -3840,12 +3806,12 @@ static const int snb_b_fdi_train_param[] = { }; /* The FDI link training functions for SNB/Cougarpoint. */ -static void gen6_fdi_link_train(struct drm_crtc *crtc) +static void gen6_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, retry; @@ -3864,7 +3830,7 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; @@ -3973,12 +3939,12 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc) } /* Manual link training for Ivy Bridge A0 parts */ -static void ivb_manual_fdi_link_train(struct drm_crtc *crtc) +static void ivb_manual_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, j; @@ -4016,7 +3982,7 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); temp |= FDI_LINK_TRAIN_PATTERN_1_IVB; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; temp |= snb_b_fdi_train_param[j/2]; @@ -4303,10 +4269,10 @@ void lpt_disable_iclkip(struct drm_i915_private *dev_priv) } /* Program iCLKIP clock to the desired frequency */ -static void lpt_program_iclkip(struct drm_crtc *crtc) +static void lpt_program_iclkip(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - int clock = to_intel_crtc(crtc)->config->base.adjusted_mode.crtc_clock; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + int clock = crtc->config->base.adjusted_mode.crtc_clock; u32 divsel, phaseinc, auxdiv, phasedir = 0; u32 temp; @@ -4487,12 +4453,12 @@ static void ivybridge_update_fdi_bc_bifurcation(struct intel_crtc *intel_crtc) /* Return which DP Port should be selected for Transcoder DP control */ static enum port -intel_trans_dp_port_sel(struct drm_crtc *crtc) +intel_trans_dp_port_sel(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct intel_encoder *encoder; - for_each_encoder_on_crtc(dev, crtc, encoder) { + for_each_encoder_on_crtc(dev, &crtc->base, encoder) { if (encoder->type == INTEL_OUTPUT_DP || encoder->type == INTEL_OUTPUT_EDP) return enc_to_dig_port(&encoder->base)->port; @@ -4509,18 +4475,18 @@ intel_trans_dp_port_sel(struct drm_crtc *crtc) * - DP transcoding bits * - transcoder */ -static void ironlake_pch_enable(struct drm_crtc *crtc) +static void ironlake_pch_enable(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; u32 temp; assert_pch_transcoder_disabled(dev_priv, pipe); if (IS_IVYBRIDGE(dev_priv)) - ivybridge_update_fdi_bc_bifurcation(intel_crtc); + ivybridge_update_fdi_bc_bifurcation(crtc); /* Write the TU size bits before fdi link training, so that error * detection works. */ @@ -4528,7 +4494,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) I915_READ(PIPE_DATA_M1(pipe)) & TU_SIZE_MASK); /* For PCH output, training FDI link */ - dev_priv->display.fdi_link_train(crtc); + dev_priv->display.fdi_link_train(crtc, crtc_state); /* We need to program the right clock selection before writing the pixel * mutliplier into the DPLL. */ @@ -4538,7 +4504,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) temp = I915_READ(PCH_DPLL_SEL); temp |= TRANS_DPLL_ENABLE(pipe); sel = TRANS_DPLLB_SEL(pipe); - if (intel_crtc->config->shared_dpll == + if (crtc_state->shared_dpll == intel_get_shared_dpll_by_id(dev_priv, DPLL_ID_PCH_PLL_B)) temp |= sel; else @@ -4553,19 +4519,19 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) * Note that enable_shared_dpll tries to do the right thing, but * get_shared_dpll unconditionally resets the pll - we need that to have * the right LVDS enable sequence. */ - intel_enable_shared_dpll(intel_crtc); + intel_enable_shared_dpll(crtc); /* set transcoder timing, panel must allow it */ assert_panel_unlocked(dev_priv, pipe); - ironlake_pch_transcoder_set_timings(intel_crtc, pipe); + ironlake_pch_transcoder_set_timings(crtc, pipe); intel_fdi_normal_train(crtc); /* For PCH DP, enable TRANS_DP_CTL */ if (HAS_PCH_CPT(dev_priv) && - intel_crtc_has_dp_encoder(intel_crtc->config)) { + intel_crtc_has_dp_encoder(crtc_state)) { const struct drm_display_mode *adjusted_mode = - &intel_crtc->config->base.adjusted_mode; + &crtc_state->base.adjusted_mode; u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPECONF_BPC_MASK) >> 5; i915_reg_t reg = TRANS_DP_CTL(pipe); temp = I915_READ(reg); @@ -4600,19 +4566,18 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) ironlake_enable_pch_transcoder(dev_priv, pipe); } -static void lpt_pch_enable(struct drm_crtc *crtc) +static void lpt_pch_enable(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; assert_pch_transcoder_disabled(dev_priv, TRANSCODER_A); lpt_program_iclkip(crtc); /* Set transcoder timing. */ - ironlake_pch_transcoder_set_timings(intel_crtc, PIPE_A); + ironlake_pch_transcoder_set_timings(crtc, PIPE_A); lpt_enable_pch_transcoder(dev_priv, cpu_transcoder); } @@ -5015,8 +4980,6 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits); - crtc->wm.cxsr_allowed = true; - if (pipe_config->update_wm_post && pipe_config->base.active) intel_update_watermarks(crtc); @@ -5035,13 +4998,12 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) } } -static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state) +static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *pipe_config) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc_state *pipe_config = - to_intel_crtc_state(crtc->base.state); struct drm_atomic_state *old_state = old_crtc_state->base.state; struct drm_plane *primary = crtc->base.primary; struct drm_plane_state *old_pri_state = @@ -5063,22 +5025,18 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state) intel_pre_disable_primary(&crtc->base); } - if (pipe_config->disable_cxsr && HAS_GMCH_DISPLAY(dev_priv)) { - crtc->wm.cxsr_allowed = false; - - /* - * Vblank time updates from the shadow to live plane control register - * are blocked if the memory self-refresh mode is active at that - * moment. So to make sure the plane gets truly disabled, disable - * first the self-refresh mode. The self-refresh enable bit in turn - * will be checked/applied by the HW only at the next frame start - * event which is after the vblank start event, so we need to have a - * wait-for-vblank between disabling the plane and the pipe. - */ - if (old_crtc_state->base.active && - intel_set_memory_cxsr(dev_priv, false)) - intel_wait_for_vblank(dev_priv, crtc->pipe); - } + /* + * Vblank time updates from the shadow to live plane control register + * are blocked if the memory self-refresh mode is active at that + * moment. So to make sure the plane gets truly disabled, disable + * first the self-refresh mode. The self-refresh enable bit in turn + * will be checked/applied by the HW only at the next frame start + * event which is after the vblank start event, so we need to have a + * wait-for-vblank between disabling the plane and the pipe. + */ + if (HAS_GMCH_DISPLAY(dev_priv) && old_crtc_state->base.active && + pipe_config->disable_cxsr && intel_set_memory_cxsr(dev_priv, false)) + intel_wait_for_vblank(dev_priv, crtc->pipe); /* * IVB workaround: must disable low power watermarks for at least @@ -5142,12 +5100,11 @@ static void intel_encoders_pre_pll_enable(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state, struct drm_atomic_state *old_state) { - struct drm_connector_state *old_conn_state; + struct drm_connector_state *conn_state; struct drm_connector *conn; int i; - for_each_connector_in_state(old_state, conn, old_conn_state, i) { - struct drm_connector_state *conn_state = conn->state; + for_each_new_connector_in_state(old_state, conn, conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(conn_state->best_encoder); @@ -5163,12 +5120,11 @@ static void intel_encoders_pre_enable(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state, struct drm_atomic_state *old_state) { - struct drm_connector_state *old_conn_state; + struct drm_connector_state *conn_state; struct drm_connector *conn; int i; - for_each_connector_in_state(old_state, conn, old_conn_state, i) { - struct drm_connector_state *conn_state = conn->state; + for_each_new_connector_in_state(old_state, conn, conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(conn_state->best_encoder); @@ -5184,12 +5140,11 @@ static void intel_encoders_enable(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state, struct drm_atomic_state *old_state) { - struct drm_connector_state *old_conn_state; + struct drm_connector_state *conn_state; struct drm_connector *conn; int i; - for_each_connector_in_state(old_state, conn, old_conn_state, i) { - struct drm_connector_state *conn_state = conn->state; + for_each_new_connector_in_state(old_state, conn, conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(conn_state->best_encoder); @@ -5209,7 +5164,7 @@ static void intel_encoders_disable(struct drm_crtc *crtc, struct drm_connector *conn; int i; - for_each_connector_in_state(old_state, conn, old_conn_state, i) { + for_each_old_connector_in_state(old_state, conn, old_conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(old_conn_state->best_encoder); @@ -5229,7 +5184,7 @@ static void intel_encoders_post_disable(struct drm_crtc *crtc, struct drm_connector *conn; int i; - for_each_connector_in_state(old_state, conn, old_conn_state, i) { + for_each_old_connector_in_state(old_state, conn, old_conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(old_conn_state->best_encoder); @@ -5249,7 +5204,7 @@ static void intel_encoders_post_pll_disable(struct drm_crtc *crtc, struct drm_connector *conn; int i; - for_each_connector_in_state(old_state, conn, old_conn_state, i) { + for_each_old_connector_in_state(old_state, conn, old_conn_state, i) { struct intel_encoder *encoder = to_intel_encoder(old_conn_state->best_encoder); @@ -5333,7 +5288,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) - ironlake_pch_enable(crtc); + ironlake_pch_enable(pipe_config); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5415,10 +5370,10 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_pre_enable(crtc, pipe_config, old_state); if (intel_crtc->config->has_pch_encoder) - dev_priv->display.fdi_link_train(crtc); + dev_priv->display.fdi_link_train(intel_crtc, pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_enable_pipe_clock(intel_crtc); + intel_ddi_enable_pipe_clock(pipe_config); if (INTEL_GEN(dev_priv) >= 9) skylake_pfit_enable(intel_crtc); @@ -5431,9 +5386,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, */ intel_color_load_luts(&pipe_config->base); - intel_ddi_set_pipe_settings(crtc); + intel_ddi_set_pipe_settings(pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_enable_transcoder_func(crtc); + intel_ddi_enable_transcoder_func(pipe_config); if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, pipe_config); @@ -5443,10 +5398,10 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) - lpt_pch_enable(crtc); + lpt_pch_enable(pipe_config); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(crtc, true); + intel_ddi_set_vc_payload_alloc(pipe_config, true); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5568,7 +5523,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_disable_pipe(intel_crtc); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(crtc, false); + intel_ddi_set_vc_payload_alloc(intel_crtc->config, false); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder); @@ -5579,7 +5534,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, ironlake_pfit_disable(intel_crtc, false); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_disable_pipe_clock(intel_crtc); + intel_ddi_disable_pipe_clock(intel_crtc->config); intel_encoders_post_disable(crtc, old_crtc_state, old_state); @@ -5612,7 +5567,7 @@ static void i9xx_pfit_enable(struct intel_crtc *crtc) I915_WRITE(BCLRPAT(crtc->pipe), 0); } -static enum intel_display_power_domain port_to_power_domain(enum port port) +enum intel_display_power_domain intel_port_to_power_domain(enum port port) { switch (port) { case PORT_A: @@ -5631,91 +5586,15 @@ static enum intel_display_power_domain port_to_power_domain(enum port port) } } -static enum intel_display_power_domain port_to_aux_power_domain(enum port port) -{ - switch (port) { - case PORT_A: - return POWER_DOMAIN_AUX_A; - case PORT_B: - return POWER_DOMAIN_AUX_B; - case PORT_C: - return POWER_DOMAIN_AUX_C; - case PORT_D: - return POWER_DOMAIN_AUX_D; - case PORT_E: - /* FIXME: Check VBT for actual wiring of PORT E */ - return POWER_DOMAIN_AUX_D; - default: - MISSING_CASE(port); - return POWER_DOMAIN_AUX_A; - } -} - -enum intel_display_power_domain -intel_display_port_power_domain(struct intel_encoder *intel_encoder) -{ - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_digital_port *intel_dig_port; - - switch (intel_encoder->type) { - case INTEL_OUTPUT_UNKNOWN: - /* Only DDI platforms should ever use this output type */ - WARN_ON_ONCE(!HAS_DDI(dev_priv)); - case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_HDMI: - case INTEL_OUTPUT_EDP: - intel_dig_port = enc_to_dig_port(&intel_encoder->base); - return port_to_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_DP_MST: - intel_dig_port = enc_to_mst(&intel_encoder->base)->primary; - return port_to_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_ANALOG: - return POWER_DOMAIN_PORT_CRT; - case INTEL_OUTPUT_DSI: - return POWER_DOMAIN_PORT_DSI; - default: - return POWER_DOMAIN_PORT_OTHER; - } -} - -enum intel_display_power_domain -intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder) -{ - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_digital_port *intel_dig_port; - - switch (intel_encoder->type) { - case INTEL_OUTPUT_UNKNOWN: - case INTEL_OUTPUT_HDMI: - /* - * Only DDI platforms should ever use these output types. - * We can get here after the HDMI detect code has already set - * the type of the shared encoder. Since we can't be sure - * what's the status of the given connectors, play safe and - * run the DP detection too. - */ - WARN_ON_ONCE(!HAS_DDI(dev_priv)); - case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_EDP: - intel_dig_port = enc_to_dig_port(&intel_encoder->base); - return port_to_aux_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_DP_MST: - intel_dig_port = enc_to_mst(&intel_encoder->base)->primary; - return port_to_aux_power_domain(intel_dig_port->port); - default: - MISSING_CASE(intel_encoder->type); - return POWER_DOMAIN_AUX_A; - } -} - -static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, - struct intel_crtc_state *crtc_state) +static u64 get_crtc_power_domains(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_encoder *encoder; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; - unsigned long mask; + u64 mask; enum transcoder transcoder = crtc_state->cpu_transcoder; if (!crtc_state->base.active) @@ -5725,28 +5604,31 @@ static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, mask |= BIT(POWER_DOMAIN_TRANSCODER(transcoder)); if (crtc_state->pch_pfit.enabled || crtc_state->pch_pfit.force_thru) - mask |= BIT(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); + mask |= BIT_ULL(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); drm_for_each_encoder_mask(encoder, dev, crtc_state->base.encoder_mask) { struct intel_encoder *intel_encoder = to_intel_encoder(encoder); - mask |= BIT(intel_display_port_power_domain(intel_encoder)); + mask |= BIT_ULL(intel_encoder->power_domain); } + if (HAS_DDI(dev_priv) && crtc_state->has_audio) + mask |= BIT(POWER_DOMAIN_AUDIO); + if (crtc_state->shared_dpll) - mask |= BIT(POWER_DOMAIN_PLLS); + mask |= BIT_ULL(POWER_DOMAIN_PLLS); return mask; } -static unsigned long +static u64 modeset_get_crtc_power_domains(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum intel_display_power_domain domain; - unsigned long domains, new_domains, old_domains; + u64 domains, new_domains, old_domains; old_domains = intel_crtc->enabled_power_domains; intel_crtc->enabled_power_domains = new_domains = @@ -5761,7 +5643,7 @@ modeset_get_crtc_power_domains(struct drm_crtc *crtc, } static void modeset_put_power_domains(struct drm_i915_private *dev_priv, - unsigned long domains) + u64 domains) { enum intel_display_power_domain domain; @@ -5769,922 +5651,11 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv, intel_display_power_put(dev_priv, domain); } -static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) -{ - int max_cdclk_freq = dev_priv->max_cdclk_freq; - - if (IS_GEMINILAKE(dev_priv)) - return 2 * max_cdclk_freq; - else if (INTEL_INFO(dev_priv)->gen >= 9 || - IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - return max_cdclk_freq; - else if (IS_CHERRYVIEW(dev_priv)) - return max_cdclk_freq*95/100; - else if (INTEL_INFO(dev_priv)->gen < 4) - return 2*max_cdclk_freq*90/100; - else - return max_cdclk_freq*90/100; -} - -static int skl_calc_cdclk(int max_pixclk, int vco); - -static void intel_update_max_cdclk(struct drm_i915_private *dev_priv) -{ - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; - int max_cdclk, vco; - - vco = dev_priv->skl_preferred_vco_freq; - WARN_ON(vco != 8100000 && vco != 8640000); - - /* - * Use the lower (vco 8640) cdclk values as a - * first guess. skl_calc_cdclk() will correct it - * if the preferred vco is 8100 instead. - */ - if (limit == SKL_DFSM_CDCLK_LIMIT_675) - max_cdclk = 617143; - else if (limit == SKL_DFSM_CDCLK_LIMIT_540) - max_cdclk = 540000; - else if (limit == SKL_DFSM_CDCLK_LIMIT_450) - max_cdclk = 432000; - else - max_cdclk = 308571; - - dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco); - } else if (IS_GEMINILAKE(dev_priv)) { - dev_priv->max_cdclk_freq = 316800; - } else if (IS_BROXTON(dev_priv)) { - dev_priv->max_cdclk_freq = 624000; - } else if (IS_BROADWELL(dev_priv)) { - /* - * FIXME with extra cooling we can allow - * 540 MHz for ULX and 675 Mhz for ULT. - * How can we know if extra cooling is - * available? PCI ID, VTB, something else? - */ - if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - dev_priv->max_cdclk_freq = 450000; - else if (IS_BDW_ULX(dev_priv)) - dev_priv->max_cdclk_freq = 450000; - else if (IS_BDW_ULT(dev_priv)) - dev_priv->max_cdclk_freq = 540000; - else - dev_priv->max_cdclk_freq = 675000; - } else if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->max_cdclk_freq = 320000; - } else if (IS_VALLEYVIEW(dev_priv)) { - dev_priv->max_cdclk_freq = 400000; - } else { - /* otherwise assume cdclk is fixed */ - dev_priv->max_cdclk_freq = dev_priv->cdclk_freq; - } - - dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); - - DRM_DEBUG_DRIVER("Max CD clock rate: %d kHz\n", - dev_priv->max_cdclk_freq); - - DRM_DEBUG_DRIVER("Max dotclock rate: %d kHz\n", - dev_priv->max_dotclk_freq); -} - -static void intel_update_cdclk(struct drm_i915_private *dev_priv) -{ - dev_priv->cdclk_freq = dev_priv->display.get_display_clock_speed(dev_priv); - - if (INTEL_GEN(dev_priv) >= 9) - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", - dev_priv->cdclk_freq, dev_priv->cdclk_pll.vco, - dev_priv->cdclk_pll.ref); - else - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz\n", - dev_priv->cdclk_freq); - - /* - * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): - * Programmng [sic] note: bit[9:2] should be programmed to the number - * of cdclk that generates 4MHz reference clock freq which is used to - * generate GMBus clock. This will vary with the cdclk freq. - */ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - I915_WRITE(GMBUSFREQ_VLV, DIV_ROUND_UP(dev_priv->cdclk_freq, 1000)); -} - -/* convert from kHz to .1 fixpoint MHz with -1MHz offset */ -static int skl_cdclk_decimal(int cdclk) -{ - return DIV_ROUND_CLOSEST(cdclk - 1000, 500); -} - -static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk_pll.ref) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - case 144000: - case 288000: - case 384000: - case 576000: - ratio = 60; - break; - case 624000: - ratio = 65; - break; - } - - return dev_priv->cdclk_pll.ref * ratio; -} - -static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk_pll.ref) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - case 79200: - case 158400: - case 316800: - ratio = 33; - break; - } - - return dev_priv->cdclk_pll.ref * ratio; -} - -static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) -{ - I915_WRITE(BXT_DE_PLL_ENABLE, 0); - - /* Timeout 200us */ - if (intel_wait_for_register(dev_priv, - BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 0, - 1)) - DRM_ERROR("timeout waiting for DE PLL unlock\n"); - - dev_priv->cdclk_pll.vco = 0; -} - -static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) -{ - int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk_pll.ref); - u32 val; - - val = I915_READ(BXT_DE_PLL_CTL); - val &= ~BXT_DE_PLL_RATIO_MASK; - val |= BXT_DE_PLL_RATIO(ratio); - I915_WRITE(BXT_DE_PLL_CTL, val); - - I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); - - /* Timeout 200us */ - if (intel_wait_for_register(dev_priv, - BXT_DE_PLL_ENABLE, - BXT_DE_PLL_LOCK, - BXT_DE_PLL_LOCK, - 1)) - DRM_ERROR("timeout waiting for DE PLL lock\n"); - - dev_priv->cdclk_pll.vco = vco; -} - -static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) -{ - u32 val, divider; - int vco, ret; - - if (IS_GEMINILAKE(dev_priv)) - vco = glk_de_pll_vco(dev_priv, cdclk); - else - vco = bxt_de_pll_vco(dev_priv, cdclk); - - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); - - /* cdclk = vco / 2 / div{1,1.5,2,4} */ - switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - case 8: - divider = BXT_CDCLK_CD2X_DIV_SEL_4; - break; - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; - break; - case 3: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); - divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; - break; - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - default: - WARN_ON(cdclk != dev_priv->cdclk_pll.ref); - WARN_ON(vco != 0); - - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - } - - /* Inform power controller of upcoming frequency change */ - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - 0x80000000); - mutex_unlock(&dev_priv->rps.hw_lock); - - if (ret) { - DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", - ret, cdclk); - return; - } - - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) - bxt_de_pll_disable(dev_priv); - - if (dev_priv->cdclk_pll.vco != vco) - bxt_de_pll_enable(dev_priv, vco); - - val = divider | skl_cdclk_decimal(cdclk); - /* - * FIXME if only the cd2x divider needs changing, it could be done - * without shutting off the pipe (if only one pipe is active). - */ - val |= BXT_CDCLK_CD2X_PIPE_NONE; - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (cdclk >= 500000) - val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - I915_WRITE(CDCLK_CTL, val); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - DIV_ROUND_UP(cdclk, 25000)); - mutex_unlock(&dev_priv->rps.hw_lock); - - if (ret) { - DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", - ret, cdclk); - return; - } - - intel_update_cdclk(dev_priv); -} - -static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) -{ - u32 cdctl, expected; - - intel_update_cdclk(dev_priv); - - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Some BIOS versions leave an incorrect decimal frequency value and - * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, - * so sanitize this register. - */ - cdctl = I915_READ(CDCLK_CTL); - /* - * Let's ignore the pipe field, since BIOS could have configured the - * dividers both synching to an active pipe, or asynchronously - * (PIPE_NONE). - */ - cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; - - expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (dev_priv->cdclk_freq >= 500000) - expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk_freq = 0; - - /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; -} - -void bxt_init_cdclk(struct drm_i915_private *dev_priv) -{ - int cdclk; - - bxt_sanitize_cdclk(dev_priv); - - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) - return; - - /* - * FIXME: - * - The initial CDCLK needs to be read from VBT. - * Need to make this change after VBT has changes for BXT. - */ - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(0); - else - cdclk = bxt_calc_cdclk(0); - - bxt_set_cdclk(dev_priv, cdclk); -} - -void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref); -} - -static int skl_calc_cdclk(int max_pixclk, int vco) -{ - if (vco == 8640000) { - if (max_pixclk > 540000) - return 617143; - else if (max_pixclk > 432000) - return 540000; - else if (max_pixclk > 308571) - return 432000; - else - return 308571; - } else { - if (max_pixclk > 540000) - return 675000; - else if (max_pixclk > 450000) - return 540000; - else if (max_pixclk > 337500) - return 450000; - else - return 337500; - } -} - -static void -skl_dpll0_update(struct drm_i915_private *dev_priv) -{ - u32 val; - - dev_priv->cdclk_pll.ref = 24000; - dev_priv->cdclk_pll.vco = 0; - - val = I915_READ(LCPLL1_CTL); - if ((val & LCPLL_PLL_ENABLE) == 0) - return; - - if (WARN_ON((val & LCPLL_PLL_LOCK) == 0)) - return; - - val = I915_READ(DPLL_CTRL1); - - if (WARN_ON((val & (DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | - DPLL_CTRL1_SSC(SKL_DPLL0) | - DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) != - DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) - return; - - switch (val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)) { - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8100000; - break; - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8640000; - break; - default: - MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); - break; - } -} - -void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco) -{ - bool changed = dev_priv->skl_preferred_vco_freq != vco; - - dev_priv->skl_preferred_vco_freq = vco; - - if (changed) - intel_update_max_cdclk(dev_priv); -} - -static void -skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) -{ - int min_cdclk = skl_calc_cdclk(0, vco); - u32 val; - - WARN_ON(vco != 8100000 && vco != 8640000); - - /* select the minimum CDCLK before enabling DPLL 0 */ - val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); - I915_WRITE(CDCLK_CTL, val); - POSTING_READ(CDCLK_CTL); - - /* - * We always enable DPLL0 with the lowest link rate possible, but still - * taking into account the VCO required to operate the eDP panel at the - * desired frequency. The usual DP link rates operate with a VCO of - * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640. - * The modeset code is responsible for the selection of the exact link - * rate later on, with the constraint of choosing a frequency that - * works with vco. - */ - val = I915_READ(DPLL_CTRL1); - - val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | - DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); - val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); - if (vco == 8640000) - val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, - SKL_DPLL0); - else - val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, - SKL_DPLL0); - - I915_WRITE(DPLL_CTRL1, val); - POSTING_READ(DPLL_CTRL1); - - I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE); - - if (intel_wait_for_register(dev_priv, - LCPLL1_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, - 5)) - DRM_ERROR("DPLL0 not locked\n"); - - dev_priv->cdclk_pll.vco = vco; - - /* We'll want to keep using the current vco from now on. */ - skl_set_preferred_cdclk_vco(dev_priv, vco); -} - -static void -skl_dpll0_disable(struct drm_i915_private *dev_priv) -{ - I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); - if (intel_wait_for_register(dev_priv, - LCPLL1_CTL, LCPLL_PLL_LOCK, 0, - 1)) - DRM_ERROR("Couldn't disable DPLL0\n"); - - dev_priv->cdclk_pll.vco = 0; -} - -static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) -{ - u32 freq_select, pcu_ack; - int ret; - - WARN_ON((cdclk == 24000) != (vco == 0)); - - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); - if (ret) { - DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", - ret); - return; - } - - /* set CDCLK_CTL */ - switch (cdclk) { - case 450000: - case 432000: - freq_select = CDCLK_FREQ_450_432; - pcu_ack = 1; - break; - case 540000: - freq_select = CDCLK_FREQ_540; - pcu_ack = 2; - break; - case 308571: - case 337500: - default: - freq_select = CDCLK_FREQ_337_308; - pcu_ack = 0; - break; - case 617143: - case 675000: - freq_select = CDCLK_FREQ_675_617; - pcu_ack = 3; - break; - } - - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) - skl_dpll0_disable(dev_priv); - - if (dev_priv->cdclk_pll.vco != vco) - skl_dpll0_enable(dev_priv, vco); - - I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); - POSTING_READ(CDCLK_CTL); - - /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); - - intel_update_cdclk(dev_priv); -} - -static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv); - -void skl_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - skl_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); -} - -void skl_init_cdclk(struct drm_i915_private *dev_priv) -{ - int cdclk, vco; - - skl_sanitize_cdclk(dev_priv); - - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) { - /* - * Use the current vco as our initial - * guess as to what the preferred vco is. - */ - if (dev_priv->skl_preferred_vco_freq == 0) - skl_set_preferred_cdclk_vco(dev_priv, - dev_priv->cdclk_pll.vco); - return; - } - - vco = dev_priv->skl_preferred_vco_freq; - if (vco == 0) - vco = 8100000; - cdclk = skl_calc_cdclk(0, vco); - - skl_set_cdclk(dev_priv, cdclk, vco); -} - -static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) -{ - uint32_t cdctl, expected; - - /* - * check if the pre-os intialized the display - * There is SWF18 scratchpad register defined which is set by the - * pre-os which can be used by the OS drivers to check the status - */ - if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0) - goto sanitize; - - intel_update_cdclk(dev_priv); - /* Is PLL enabled and locked ? */ - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Noticed in some instances that the freq selection is correct but - * decimal part is programmed wrong from BIOS where pre-os does not - * enable display. Verify the same as well. - */ - cdctl = I915_READ(CDCLK_CTL); - expected = (cdctl & CDCLK_FREQ_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk_freq = 0; - /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; -} - -/* Adjust CDclk dividers to allow high res or save power if possible */ -static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, cmd; - - WARN_ON(dev_priv->display.get_display_clock_speed(dev_priv) - != dev_priv->cdclk_freq); - - if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ - cmd = 2; - else if (cdclk == 266667) - cmd = 1; - else - cmd = 0; - - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); - val &= ~DSPFREQGUAR_MASK; - val |= (cmd << DSPFREQGUAR_SHIFT); - vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & - DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), - 50)) { - DRM_ERROR("timed out waiting for CDclk change\n"); - } - mutex_unlock(&dev_priv->rps.hw_lock); - - mutex_lock(&dev_priv->sb_lock); - - if (cdclk == 400000) { - u32 divider; - - divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - - /* adjust cdclk divider */ - val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); - val &= ~CCK_FREQUENCY_VALUES; - val |= divider; - vlv_cck_write(dev_priv, CCK_DISPLAY_CLOCK_CONTROL, val); - - if (wait_for((vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL) & - CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), - 50)) - DRM_ERROR("timed out waiting for CDclk change\n"); - } - - /* adjust self-refresh exit latency value */ - val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC); - val &= ~0x7f; - - /* - * For high bandwidth configs, we set a higher latency in the bunit - * so that the core display fetch happens in time to avoid underruns. - */ - if (cdclk == 400000) - val |= 4500 / 250; /* 4.5 usec */ - else - val |= 3000 / 250; /* 3.0 usec */ - vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); - - mutex_unlock(&dev_priv->sb_lock); - - intel_update_cdclk(dev_priv); -} - -static void cherryview_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, cmd; - - WARN_ON(dev_priv->display.get_display_clock_speed(dev_priv) - != dev_priv->cdclk_freq); - - switch (cdclk) { - case 333333: - case 320000: - case 266667: - case 200000: - break; - default: - MISSING_CASE(cdclk); - return; - } - - /* - * Specs are full of misinformation, but testing on actual - * hardware has shown that we just need to write the desired - * CCK divider into the Punit register. - */ - cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); - val &= ~DSPFREQGUAR_MASK_CHV; - val |= (cmd << DSPFREQGUAR_SHIFT_CHV); - vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & - DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), - 50)) { - DRM_ERROR("timed out waiting for CDclk change\n"); - } - mutex_unlock(&dev_priv->rps.hw_lock); - - intel_update_cdclk(dev_priv); -} - -static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv, - int max_pixclk) -{ - int freq_320 = (dev_priv->hpll_freq << 1) % 320000 != 0 ? 333333 : 320000; - int limit = IS_CHERRYVIEW(dev_priv) ? 95 : 90; - - /* - * Really only a few cases to deal with, as only 4 CDclks are supported: - * 200MHz - * 267MHz - * 320/333MHz (depends on HPLL freq) - * 400MHz (VLV only) - * So we check to see whether we're above 90% (VLV) or 95% (CHV) - * of the lower bin and adjust if needed. - * - * We seem to get an unstable or solid color picture at 200MHz. - * Not sure what's wrong. For now use 200MHz only when all pipes - * are off. - */ - if (!IS_CHERRYVIEW(dev_priv) && - max_pixclk > freq_320*limit/100) - return 400000; - else if (max_pixclk > 266667*limit/100) - return freq_320; - else if (max_pixclk > 0) - return 266667; - else - return 200000; -} - -static int glk_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 2 * 158400) - return 316800; - else if (max_pixclk > 2 * 79200) - return 158400; - else - return 79200; -} - -static int bxt_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 576000) - return 624000; - else if (max_pixclk > 384000) - return 576000; - else if (max_pixclk > 288000) - return 384000; - else if (max_pixclk > 144000) - return 288000; - else - return 144000; -} - -/* Compute the max pixel clock for new configuration. */ -static int intel_mode_max_pixclk(struct drm_device *dev, - struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; - unsigned max_pixclk = 0, i; - enum pipe pipe; - - memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, - sizeof(intel_state->min_pixclk)); - - for_each_crtc_in_state(state, crtc, crtc_state, i) { - int pixclk = 0; - - if (crtc_state->enable) - pixclk = crtc_state->adjusted_mode.crtc_clock; - - intel_state->min_pixclk[i] = pixclk; - } - - for_each_pipe(dev_priv, pipe) - max_pixclk = max(intel_state->min_pixclk[pipe], max_pixclk); - - return max_pixclk; -} - -static int valleyview_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_device *dev = state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - int max_pixclk = intel_mode_max_pixclk(dev, state); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - - intel_state->cdclk = intel_state->dev_cdclk = - valleyview_calc_cdclk(dev_priv, max_pixclk); - - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = valleyview_calc_cdclk(dev_priv, 0); - - return 0; -} - -static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->dev); - int max_pixclk = ilk_max_pixel_rate(state); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - int cdclk; - - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(max_pixclk); - else - cdclk = bxt_calc_cdclk(max_pixclk); - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - - if (!intel_state->active_crtcs) { - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(0); - else - cdclk = bxt_calc_cdclk(0); - - intel_state->dev_cdclk = cdclk; - } - - return 0; -} - -static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) -{ - unsigned int credits, default_credits; - - if (IS_CHERRYVIEW(dev_priv)) - default_credits = PFI_CREDIT(12); - else - default_credits = PFI_CREDIT(8); - - if (dev_priv->cdclk_freq >= dev_priv->czclk_freq) { - /* CHV suggested value is 31 or 63 */ - if (IS_CHERRYVIEW(dev_priv)) - credits = PFI_CREDIT_63; - else - credits = PFI_CREDIT(15); - } else { - credits = default_credits; - } - - /* - * WA - write default credits before re-programming - * FIXME: should we also set the resend bit here? - */ - I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | - default_credits); - - I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | - credits | PFI_CREDIT_RESEND); - - /* - * FIXME is this guaranteed to clear - * immediately or should we poll for it? - */ - WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); -} - -static void valleyview_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned req_cdclk = old_intel_state->dev_cdclk; - - /* - * FIXME: We can end up here with all power domains off, yet - * with a CDCLK frequency other than the minimum. To account - * for this take the PIPE-A power domain, which covers the HW - * blocks needed for the following programming. This can be - * removed once it's guaranteed that we get here either with - * the minimum CDCLK set, or the required power domains - * enabled. - */ - intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - - if (IS_CHERRYVIEW(dev_priv)) - cherryview_set_cdclk(dev, req_cdclk); - else - valleyview_set_cdclk(dev, req_cdclk); - - vlv_program_pfi_credits(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); -} - static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_atomic_state *old_state) { + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -6729,7 +5700,8 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, intel_color_load_luts(&pipe_config->base); - intel_update_watermarks(intel_crtc); + dev_priv->display.initial_watermarks(old_intel_state, + pipe_config); intel_enable_pipe(intel_crtc); assert_vblank_disabled(crtc); @@ -6846,6 +5818,9 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, if (!IS_GEN2(dev_priv)) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); + + if (!dev_priv->display.initial_watermarks) + intel_update_watermarks(intel_crtc); } static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) @@ -6854,7 +5829,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum intel_display_power_domain domain; - unsigned long domains; + u64 domains; struct drm_atomic_state *state; struct intel_crtc_state *crtc_state; int ret; @@ -7162,7 +6137,7 @@ static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv, * * Should measure whether using a lower cdclk w/o IPS */ - return ilk_pipe_pixel_rate(pipe_config) <= + return pipe_config->pixel_rate <= dev_priv->max_cdclk_freq * 95 / 100; } @@ -7186,6 +6161,54 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } +static uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) +{ + uint32_t pixel_rate; + + pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; + + /* + * We only use IF-ID interlacing. If we ever use + * PF-ID we'll need to adjust the pixel_rate here. + */ + + if (pipe_config->pch_pfit.enabled) { + uint64_t pipe_w, pipe_h, pfit_w, pfit_h; + uint32_t pfit_size = pipe_config->pch_pfit.size; + + pipe_w = pipe_config->pipe_src_w; + pipe_h = pipe_config->pipe_src_h; + + pfit_w = (pfit_size >> 16) & 0xFFFF; + pfit_h = pfit_size & 0xFFFF; + if (pipe_w < pfit_w) + pipe_w = pfit_w; + if (pipe_h < pfit_h) + pipe_h = pfit_h; + + if (WARN_ON(!pfit_w || !pfit_h)) + return pixel_rate; + + pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, + pfit_w * pfit_h); + } + + return pixel_rate; +} + +static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + if (HAS_GMCH_DISPLAY(dev_priv)) + /* FIXME calculate proper pipe pixel rate for GMCH pfit */ + crtc_state->pixel_rate = + crtc_state->base.adjusted_mode.crtc_clock; + else + crtc_state->pixel_rate = + ilk_pipe_pixel_rate(crtc_state); +} + static int intel_crtc_compute_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { @@ -7232,6 +6255,8 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, adjusted_mode->crtc_hsync_start == adjusted_mode->crtc_hdisplay) return -EINVAL; + intel_crtc_compute_pixel_rate(pipe_config); + if (HAS_IPS(dev_priv)) hsw_compute_ips_config(crtc, pipe_config); @@ -7241,428 +6266,6 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, return 0; } -static int skylake_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - u32 cdctl; - - skl_dpll0_update(dev_priv); - - if (dev_priv->cdclk_pll.vco == 0) - return dev_priv->cdclk_pll.ref; - - cdctl = I915_READ(CDCLK_CTL); - - if (dev_priv->cdclk_pll.vco == 8640000) { - switch (cdctl & CDCLK_FREQ_SEL_MASK) { - case CDCLK_FREQ_450_432: - return 432000; - case CDCLK_FREQ_337_308: - return 308571; - case CDCLK_FREQ_540: - return 540000; - case CDCLK_FREQ_675_617: - return 617143; - default: - MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); - } - } else { - switch (cdctl & CDCLK_FREQ_SEL_MASK) { - case CDCLK_FREQ_450_432: - return 450000; - case CDCLK_FREQ_337_308: - return 337500; - case CDCLK_FREQ_540: - return 540000; - case CDCLK_FREQ_675_617: - return 675000; - default: - MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); - } - } - - return dev_priv->cdclk_pll.ref; -} - -static void bxt_de_pll_update(struct drm_i915_private *dev_priv) -{ - u32 val; - - dev_priv->cdclk_pll.ref = 19200; - dev_priv->cdclk_pll.vco = 0; - - val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) - return; - - if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) - return; - - val = I915_READ(BXT_DE_PLL_CTL); - dev_priv->cdclk_pll.vco = (val & BXT_DE_PLL_RATIO_MASK) * - dev_priv->cdclk_pll.ref; -} - -static int broxton_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - u32 divider; - int div, vco; - - bxt_de_pll_update(dev_priv); - - vco = dev_priv->cdclk_pll.vco; - if (vco == 0) - return dev_priv->cdclk_pll.ref; - - divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; - - switch (divider) { - case BXT_CDCLK_CD2X_DIV_SEL_1: - div = 2; - break; - case BXT_CDCLK_CD2X_DIV_SEL_1_5: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); - div = 3; - break; - case BXT_CDCLK_CD2X_DIV_SEL_2: - div = 4; - break; - case BXT_CDCLK_CD2X_DIV_SEL_4: - div = 8; - break; - default: - MISSING_CASE(divider); - return dev_priv->cdclk_pll.ref; - } - - return DIV_ROUND_CLOSEST(vco, div); -} - -static int broadwell_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - - if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; - else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; - else if (freq == LCPLL_CLK_FREQ_450) - return 450000; - else if (freq == LCPLL_CLK_FREQ_54O_BDW) - return 540000; - else if (freq == LCPLL_CLK_FREQ_337_5_BDW) - return 337500; - else - return 675000; -} - -static int haswell_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - - if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; - else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; - else if (freq == LCPLL_CLK_FREQ_450) - return 450000; - else if (IS_HSW_ULT(dev_priv)) - return 337500; - else - return 540000; -} - -static int valleyview_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return vlv_get_cck_clock_hpll(dev_priv, "cdclk", - CCK_DISPLAY_CLOCK_CONTROL); -} - -static int ilk_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 450000; -} - -static int i945_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 400000; -} - -static int i915_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 333333; -} - -static int i9xx_misc_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 200000; -} - -static int pnv_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 gcfgc = 0; - - pci_read_config_word(pdev, GCFGC, &gcfgc); - - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_267_MHZ_PNV: - return 266667; - case GC_DISPLAY_CLOCK_333_MHZ_PNV: - return 333333; - case GC_DISPLAY_CLOCK_444_MHZ_PNV: - return 444444; - case GC_DISPLAY_CLOCK_200_MHZ_PNV: - return 200000; - default: - DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); - case GC_DISPLAY_CLOCK_133_MHZ_PNV: - return 133333; - case GC_DISPLAY_CLOCK_167_MHZ_PNV: - return 166667; - } -} - -static int i915gm_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 gcfgc = 0; - - pci_read_config_word(pdev, GCFGC, &gcfgc); - - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133333; - else { - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_333_MHZ: - return 333333; - default: - case GC_DISPLAY_CLOCK_190_200_MHZ: - return 190000; - } - } -} - -static int i865_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 266667; -} - -static int i85x_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 hpllcc = 0; - - /* - * 852GM/852GMV only supports 133 MHz and the HPLLCC - * encoding is different :( - * FIXME is this the right way to detect 852GM/852GMV? - */ - if (pdev->revision == 0x1) - return 133333; - - pci_bus_read_config_word(pdev->bus, - PCI_DEVFN(0, 3), HPLLCC, &hpllcc); - - /* Assume that the hardware is in the high speed state. This - * should be the default. - */ - switch (hpllcc & GC_CLOCK_CONTROL_MASK) { - case GC_CLOCK_133_200: - case GC_CLOCK_133_200_2: - case GC_CLOCK_100_200: - return 200000; - case GC_CLOCK_166_250: - return 250000; - case GC_CLOCK_100_133: - return 133333; - case GC_CLOCK_133_266: - case GC_CLOCK_133_266_2: - case GC_CLOCK_166_266: - return 266667; - } - - /* Shouldn't happen */ - return 0; -} - -static int i830_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 133333; -} - -static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) -{ - static const unsigned int blb_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - [4] = 6400000, - }; - static const unsigned int pnv_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - [4] = 2666667, - }; - static const unsigned int cl_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 6400000, - [4] = 3333333, - [5] = 3566667, - [6] = 4266667, - }; - static const unsigned int elk_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - }; - static const unsigned int ctg_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 6400000, - [4] = 2666667, - [5] = 4266667, - }; - const unsigned int *vco_table; - unsigned int vco; - uint8_t tmp = 0; - - /* FIXME other chipsets? */ - if (IS_GM45(dev_priv)) - vco_table = ctg_vco; - else if (IS_G4X(dev_priv)) - vco_table = elk_vco; - else if (IS_I965GM(dev_priv)) - vco_table = cl_vco; - else if (IS_PINEVIEW(dev_priv)) - vco_table = pnv_vco; - else if (IS_G33(dev_priv)) - vco_table = blb_vco; - else - return 0; - - tmp = I915_READ(IS_MOBILE(dev_priv) ? HPLLVCO_MOBILE : HPLLVCO); - - vco = vco_table[tmp & 0x7]; - if (vco == 0) - DRM_ERROR("Bad HPLL VCO (HPLLVCO=0x%02x)\n", tmp); - else - DRM_DEBUG_KMS("HPLL VCO %u kHz\n", vco); - - return vco; -} - -static int gm45_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = (tmp >> 12) & 0x1; - - switch (vco) { - case 2666667: - case 4000000: - case 5333333: - return cdclk_sel ? 333333 : 222222; - case 3200000: - return cdclk_sel ? 320000 : 228571; - default: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u, CFGC=0x%04x\n", vco, tmp); - return 222222; - } -} - -static int i965gm_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 16, 10, 8 }; - static const uint8_t div_4000[] = { 20, 12, 10 }; - static const uint8_t div_5333[] = { 24, 16, 14 }; - const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = ((tmp >> 8) & 0x1f) - 1; - - if (cdclk_sel >= ARRAY_SIZE(div_3200)) - goto fail; - - switch (vco) { - case 3200000: - div_table = div_3200; - break; - case 4000000: - div_table = div_4000; - break; - case 5333333: - div_table = div_5333; - break; - default: - goto fail; - } - - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); - -fail: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%04x\n", vco, tmp); - return 200000; -} - -static int g33_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; - static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; - static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; - static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; - const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = (tmp >> 4) & 0x7; - - if (cdclk_sel >= ARRAY_SIZE(div_3200)) - goto fail; - - switch (vco) { - case 3200000: - div_table = div_3200; - break; - case 4000000: - div_table = div_4000; - break; - case 4800000: - div_table = div_4800; - break; - case 5333333: - div_table = div_5333; - break; - default: - goto fail; - } - - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); - -fail: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%08x\n", vco, tmp); - return 190476; -} - static void intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den) { @@ -7676,6 +6279,17 @@ intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den) static void compute_m_n(unsigned int m, unsigned int n, uint32_t *ret_m, uint32_t *ret_n) { + /* + * Reduce M/N as much as possible without loss in precision. Several DP + * dongles in particular seem to be fussy about too large *link* M/N + * values. The passed in values are more likely to have the least + * significant bits zero than M after rounding below, so do this first. + */ + while ((m & 1) == 0 && (n & 1) == 0) { + m >>= 1; + n >>= 1; + } + *ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX); *ret_m = div_u64((uint64_t) m * *ret_n, n); intel_reduce_m_n_ratio(ret_m, ret_n); @@ -8757,9 +7371,7 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, val = I915_READ(DSPSTRIDE(pipe)); fb->pitches[0] = val & 0xffffffc0; - aligned_height = intel_fb_align_height(dev, fb->height, - fb->format->format, - fb->modifier); + aligned_height = intel_fb_align_height(fb, 0, fb->height); plane_config->size = fb->pitches[0] * aligned_height; @@ -9767,7 +8379,7 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, tiling = val & PLANE_CTL_TILED_MASK; switch (tiling) { case PLANE_CTL_TILED_LINEAR: - fb->modifier = DRM_FORMAT_MOD_NONE; + fb->modifier = DRM_FORMAT_MOD_LINEAR; break; case PLANE_CTL_TILED_X: plane_config->tiling = I915_TILING_X; @@ -9794,13 +8406,10 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, fb->width = ((val >> 0) & 0x1fff) + 1; val = I915_READ(PLANE_STRIDE(pipe, 0)); - stride_mult = intel_fb_stride_alignment(dev_priv, fb->modifier, - fb->format->format); + stride_mult = intel_fb_stride_alignment(fb, 0); fb->pitches[0] = (val & 0x3ff) * stride_mult; - aligned_height = intel_fb_align_height(dev, fb->height, - fb->format->format, - fb->modifier); + aligned_height = intel_fb_align_height(fb, 0, fb->height); plane_config->size = fb->pitches[0] * aligned_height; @@ -9896,9 +8505,7 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc, val = I915_READ(DSPSTRIDE(pipe)); fb->pitches[0] = val & 0xffffffc0; - aligned_height = intel_fb_align_height(dev, fb->height, - fb->format->format, - fb->modifier); + aligned_height = intel_fb_align_height(fb, 0, fb->height); plane_config->size = fb->pitches[0] * aligned_height; @@ -10224,251 +8831,18 @@ void hsw_disable_pc8(struct drm_i915_private *dev_priv) } } -static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = old_intel_state->dev_cdclk; - - bxt_set_cdclk(to_i915(dev), req_cdclk); -} - -static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, - int pixel_rate) -{ - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) - pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); - - /* BSpec says "Do not use DisplayPort with CDCLK less than - * 432 MHz, audio enabled, port width x4, and link rate - * HBR2 (5.4 GHz), or else there may be audio corruption or - * screen corruption." - */ - if (intel_crtc_has_dp_encoder(crtc_state) && - crtc_state->has_audio && - crtc_state->port_clock >= 540000 && - crtc_state->lane_count == 4) - pixel_rate = max(432000, pixel_rate); - - return pixel_rate; -} - -/* compute the max rate for new configuration */ -static int ilk_max_pixel_rate(struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(state->dev); - struct drm_crtc *crtc; - struct drm_crtc_state *cstate; - struct intel_crtc_state *crtc_state; - unsigned max_pixel_rate = 0, i; - enum pipe pipe; - - memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, - sizeof(intel_state->min_pixclk)); - - for_each_crtc_in_state(state, crtc, cstate, i) { - int pixel_rate; - - crtc_state = to_intel_crtc_state(cstate); - if (!crtc_state->base.enable) { - intel_state->min_pixclk[i] = 0; - continue; - } - - pixel_rate = ilk_pipe_pixel_rate(crtc_state); - - if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) - pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state, - pixel_rate); - - intel_state->min_pixclk[i] = pixel_rate; - } - - for_each_pipe(dev_priv, pipe) - max_pixel_rate = max(intel_state->min_pixclk[pipe], max_pixel_rate); - - return max_pixel_rate; -} - -static void broadwell_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t val, data; - int ret; - - if (WARN((I915_READ(LCPLL_CTL) & - (LCPLL_PLL_DISABLE | LCPLL_PLL_LOCK | - LCPLL_CD_CLOCK_DISABLE | LCPLL_ROOT_CD_CLOCK_DISABLE | - LCPLL_CD2X_CLOCK_DISABLE | LCPLL_POWER_DOWN_ALLOW | - LCPLL_CD_SOURCE_FCLK)) != LCPLL_PLL_LOCK, - "trying to change cdclk frequency with cdclk not enabled\n")) - return; - - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, - BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); - mutex_unlock(&dev_priv->rps.hw_lock); - if (ret) { - DRM_ERROR("failed to inform pcode about cdclk change\n"); - return; - } - - val = I915_READ(LCPLL_CTL); - val |= LCPLL_CD_SOURCE_FCLK; - I915_WRITE(LCPLL_CTL, val); - - if (wait_for_us(I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 1)) - DRM_ERROR("Switching to FCLK failed\n"); - - val = I915_READ(LCPLL_CTL); - val &= ~LCPLL_CLK_FREQ_MASK; - - switch (cdclk) { - case 450000: - val |= LCPLL_CLK_FREQ_450; - data = 0; - break; - case 540000: - val |= LCPLL_CLK_FREQ_54O_BDW; - data = 1; - break; - case 337500: - val |= LCPLL_CLK_FREQ_337_5_BDW; - data = 2; - break; - case 675000: - val |= LCPLL_CLK_FREQ_675_BDW; - data = 3; - break; - default: - WARN(1, "invalid cdclk frequency\n"); - return; - } - - I915_WRITE(LCPLL_CTL, val); - - val = I915_READ(LCPLL_CTL); - val &= ~LCPLL_CD_SOURCE_FCLK; - I915_WRITE(LCPLL_CTL, val); - - if (wait_for_us((I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) - DRM_ERROR("Switching back to LCPLL failed\n"); - - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); - mutex_unlock(&dev_priv->rps.hw_lock); - - I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); - - intel_update_cdclk(dev_priv); - - WARN(cdclk != dev_priv->cdclk_freq, - "cdclk requested %d kHz but got %d kHz\n", - cdclk, dev_priv->cdclk_freq); -} - -static int broadwell_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 540000) - return 675000; - else if (max_pixclk > 450000) - return 540000; - else if (max_pixclk > 337500) - return 450000; - else - return 337500; -} - -static int broadwell_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - int max_pixclk = ilk_max_pixel_rate(state); - int cdclk; - - /* - * FIXME should also account for plane ratio - * once 64bpp pixel formats are supported. - */ - cdclk = broadwell_calc_cdclk(max_pixclk); - - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - return -EINVAL; - } - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = broadwell_calc_cdclk(0); - - return 0; -} - -static void broadwell_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned req_cdclk = old_intel_state->dev_cdclk; - - broadwell_set_cdclk(dev, req_cdclk); -} - -static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(state->dev); - const int max_pixclk = ilk_max_pixel_rate(state); - int vco = intel_state->cdclk_pll_vco; - int cdclk; - - /* - * FIXME should also account for plane ratio - * once 64bpp pixel formats are supported. - */ - cdclk = skl_calc_cdclk(max_pixclk, vco); - - /* - * FIXME move the cdclk caclulation to - * compute_config() so we can fail gracegully. - */ - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_ERROR("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - cdclk = dev_priv->max_cdclk_freq; - } - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = skl_calc_cdclk(0, vco); - - return 0; -} - -static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(old_state); - unsigned int req_cdclk = intel_state->dev_cdclk; - unsigned int req_vco = intel_state->cdclk_pll_vco; - - skl_set_cdclk(dev_priv, req_cdclk, req_vco); -} - static int haswell_crtc_compute_clock(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state) { if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) { - if (!intel_ddi_pll_select(crtc, crtc_state)) + struct intel_encoder *encoder = + intel_ddi_get_crtc_new_encoder(crtc_state); + + if (!intel_get_shared_dpll(crtc, crtc_state, encoder)) { + DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", + pipe_name(crtc->pipe)); return -EINVAL; + } } crtc->lowfreq_avail = false; @@ -10554,7 +8928,7 @@ static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, static bool hsw_get_transcoder_state(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, - unsigned long *power_domain_mask) + u64 *power_domain_mask) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -10596,7 +8970,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) return false; - *power_domain_mask |= BIT(power_domain); + *power_domain_mask |= BIT_ULL(power_domain); tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder)); @@ -10605,7 +8979,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, - unsigned long *power_domain_mask) + u64 *power_domain_mask) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -10623,7 +8997,7 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) continue; - *power_domain_mask |= BIT(power_domain); + *power_domain_mask |= BIT_ULL(power_domain); /* * The PLL needs to be enabled with a valid divider @@ -10663,7 +9037,7 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, port = (tmp & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skylake_get_ddi_pll(dev_priv, port, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_get_ddi_pll(dev_priv, port, pipe_config); @@ -10698,13 +9072,13 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum intel_display_power_domain power_domain; - unsigned long power_domain_mask; + u64 power_domain_mask; bool active; power_domain = POWER_DOMAIN_PIPE(crtc->pipe); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) return false; - power_domain_mask = BIT(power_domain); + power_domain_mask = BIT_ULL(power_domain); pipe_config->shared_dpll = NULL; @@ -10738,7 +9112,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe); if (intel_display_power_get_if_enabled(dev_priv, power_domain)) { - power_domain_mask |= BIT(power_domain); + power_domain_mask |= BIT_ULL(power_domain); if (INTEL_GEN(dev_priv) >= 9) skylake_get_pfit_config(crtc, pipe_config); else @@ -10764,6 +9138,31 @@ out: return active; } +static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int width = plane_state->base.crtc_w; + unsigned int stride = roundup_pow_of_two(width) * 4; + + switch (stride) { + default: + WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n", + width, stride); + stride = 256; + /* fallthrough */ + case 256: + case 512: + case 1024: + case 2048: + break; + } + + return CURSOR_ENABLE | + CURSOR_GAMMA_ENABLE | + CURSOR_FORMAT_ARGB | + CURSOR_STRIDE(stride); +} + static void i845_update_cursor(struct drm_crtc *crtc, u32 base, const struct intel_plane_state *plane_state) { @@ -10775,26 +9174,8 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base, if (plane_state && plane_state->base.visible) { unsigned int width = plane_state->base.crtc_w; unsigned int height = plane_state->base.crtc_h; - unsigned int stride = roundup_pow_of_two(width) * 4; - - switch (stride) { - default: - WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n", - width, stride); - stride = 256; - /* fallthrough */ - case 256: - case 512: - case 1024: - case 2048: - break; - } - - cntl |= CURSOR_ENABLE | - CURSOR_GAMMA_ENABLE | - CURSOR_FORMAT_ARGB | - CURSOR_STRIDE(stride); + cntl = plane_state->ctl; size = (height << 12) | width; } @@ -10805,28 +9186,65 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base, /* On these chipsets we can only modify the base/size/stride * whilst the cursor is disabled. */ - I915_WRITE(CURCNTR(PIPE_A), 0); - POSTING_READ(CURCNTR(PIPE_A)); + I915_WRITE_FW(CURCNTR(PIPE_A), 0); + POSTING_READ_FW(CURCNTR(PIPE_A)); intel_crtc->cursor_cntl = 0; } if (intel_crtc->cursor_base != base) { - I915_WRITE(CURBASE(PIPE_A), base); + I915_WRITE_FW(CURBASE(PIPE_A), base); intel_crtc->cursor_base = base; } if (intel_crtc->cursor_size != size) { - I915_WRITE(CURSIZE, size); + I915_WRITE_FW(CURSIZE, size); intel_crtc->cursor_size = size; } if (intel_crtc->cursor_cntl != cntl) { - I915_WRITE(CURCNTR(PIPE_A), cntl); - POSTING_READ(CURCNTR(PIPE_A)); + I915_WRITE_FW(CURCNTR(PIPE_A), cntl); + POSTING_READ_FW(CURCNTR(PIPE_A)); intel_crtc->cursor_cntl = cntl; } } +static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum pipe pipe = crtc->pipe; + u32 cntl; + + cntl = MCURSOR_GAMMA_ENABLE; + + if (HAS_DDI(dev_priv)) + cntl |= CURSOR_PIPE_CSC_ENABLE; + + cntl |= pipe << 28; /* Connect to correct pipe */ + + switch (plane_state->base.crtc_w) { + case 64: + cntl |= CURSOR_MODE_64_ARGB_AX; + break; + case 128: + cntl |= CURSOR_MODE_128_ARGB_AX; + break; + case 256: + cntl |= CURSOR_MODE_256_ARGB_AX; + break; + default: + MISSING_CASE(plane_state->base.crtc_w); + return 0; + } + + if (plane_state->base.rotation & DRM_ROTATE_180) + cntl |= CURSOR_ROTATE_180; + + return cntl; +} + static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, const struct intel_plane_state *plane_state) { @@ -10836,40 +9254,18 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, int pipe = intel_crtc->pipe; uint32_t cntl = 0; - if (plane_state && plane_state->base.visible) { - cntl = MCURSOR_GAMMA_ENABLE; - switch (plane_state->base.crtc_w) { - case 64: - cntl |= CURSOR_MODE_64_ARGB_AX; - break; - case 128: - cntl |= CURSOR_MODE_128_ARGB_AX; - break; - case 256: - cntl |= CURSOR_MODE_256_ARGB_AX; - break; - default: - MISSING_CASE(plane_state->base.crtc_w); - return; - } - cntl |= pipe << 28; /* Connect to correct pipe */ - - if (HAS_DDI(dev_priv)) - cntl |= CURSOR_PIPE_CSC_ENABLE; - - if (plane_state->base.rotation & DRM_ROTATE_180) - cntl |= CURSOR_ROTATE_180; - } + if (plane_state && plane_state->base.visible) + cntl = plane_state->ctl; if (intel_crtc->cursor_cntl != cntl) { - I915_WRITE(CURCNTR(pipe), cntl); - POSTING_READ(CURCNTR(pipe)); + I915_WRITE_FW(CURCNTR(pipe), cntl); + POSTING_READ_FW(CURCNTR(pipe)); intel_crtc->cursor_cntl = cntl; } /* and commit changes on next vblank */ - I915_WRITE(CURBASE(pipe), base); - POSTING_READ(CURBASE(pipe)); + I915_WRITE_FW(CURBASE(pipe), base); + POSTING_READ_FW(CURBASE(pipe)); intel_crtc->cursor_base = base; } @@ -10883,6 +9279,7 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; u32 base = intel_crtc->cursor_addr; + unsigned long irqflags; u32 pos = 0; if (plane_state) { @@ -10909,12 +9306,16 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, } } - I915_WRITE(CURPOS(pipe), pos); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + + I915_WRITE_FW(CURPOS(pipe), pos); if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) i845_update_cursor(crtc, base, plane_state); else i9xx_update_cursor(crtc, base, plane_state); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static bool cursor_size_ok(struct drm_i915_private *dev_priv, @@ -10961,9 +9362,8 @@ static struct drm_display_mode load_detect_mode = { }; struct drm_framebuffer * -__intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) +intel_framebuffer_create(struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd) { struct intel_framebuffer *intel_fb; int ret; @@ -10972,7 +9372,7 @@ __intel_framebuffer_create(struct drm_device *dev, if (!intel_fb) return ERR_PTR(-ENOMEM); - ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj); + ret = intel_framebuffer_init(intel_fb, obj, mode_cmd); if (ret) goto err; @@ -10983,23 +9383,6 @@ err: return ERR_PTR(ret); } -static struct drm_framebuffer * -intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) -{ - struct drm_framebuffer *fb; - int ret; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ERR_PTR(ret); - fb = __intel_framebuffer_create(dev, mode_cmd, obj); - mutex_unlock(&dev->struct_mutex); - - return fb; -} - static u32 intel_framebuffer_pitch_for_width(int width, int bpp) { @@ -11034,7 +9417,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev, bpp); mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth); - fb = intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) i915_gem_object_put(obj); @@ -11109,10 +9492,10 @@ static int intel_modeset_setup_plane_state(struct drm_atomic_state *state, return 0; } -bool intel_get_load_detect_pipe(struct drm_connector *connector, - struct drm_display_mode *mode, - struct intel_load_detect_pipe *old, - struct drm_modeset_acquire_ctx *ctx) +int intel_get_load_detect_pipe(struct drm_connector *connector, + struct drm_display_mode *mode, + struct intel_load_detect_pipe *old, + struct drm_modeset_acquire_ctx *ctx) { struct intel_crtc *intel_crtc; struct intel_encoder *intel_encoder = @@ -11135,10 +9518,7 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector, old->restore_state = NULL; -retry: - ret = drm_modeset_lock(&config->connection_mutex, ctx); - if (ret) - goto fail; + WARN_ON(!drm_modeset_is_locked(&config->connection_mutex)); /* * Algorithm gets a little messy: @@ -11288,10 +9668,8 @@ fail: restore_state = NULL; } - if (ret == -EDEADLK) { - drm_modeset_backoff(ctx); - goto retry; - } + if (ret == -EDEADLK) + return ret; return false; } @@ -11313,7 +9691,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, if (!state) return; - ret = drm_atomic_commit(state); + ret = drm_atomic_helper_commit_duplicated_state(state, ctx); if (ret) DRM_DEBUG_KMS("Couldn't release load detect pipe: %i\n", ret); drm_atomic_state_put(state); @@ -11720,14 +10098,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 flip_mask; - int ret; + u32 flip_mask, *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Can't queue multiple flips, so wait for the previous * one to finish before executing the next. @@ -11736,13 +10112,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev, flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, 0); /* aux display base address, unused */ + *cs++ = MI_WAIT_FOR_EVENT | flip_mask; + *cs++ = MI_NOOP; + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = 0; /* aux display base address, unused */ return 0; } @@ -11754,26 +10129,23 @@ static int intel_gen3_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 flip_mask; - int ret; + u32 flip_mask, *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); if (intel_crtc->plane) flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_WAIT_FOR_EVENT | flip_mask; + *cs++ = MI_NOOP; + *cs++ = MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = MI_NOOP; return 0; } @@ -11785,25 +10157,22 @@ static int intel_gen4_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t pf, pipesrc; - int ret; + u32 pf, pipesrc, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* i965+ uses the linear or tiled offsets from the * Display Registers (which do not change across a page-flip) * so we need only reprogram the base address. */ - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | - intel_fb_modifier_to_tiling(fb->modifier)); + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset | + intel_fb_modifier_to_tiling(fb->modifier); /* XXX Enabling the panel-fitter across page-flip is so far * untested on non-native modes, so ignore it for now. @@ -11811,7 +10180,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(ring, pf | pipesrc); + *cs++ = pf | pipesrc; return 0; } @@ -11823,21 +10192,17 @@ static int intel_gen6_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t pf, pipesrc; - int ret; + u32 pf, pipesrc, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0] | - intel_fb_modifier_to_tiling(fb->modifier)); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier); + *cs++ = intel_crtc->flip_work->gtt_offset; /* Contrary to the suggestions in the documentation, * "Enable Panel Fitter" does not seem to be required when page @@ -11847,7 +10212,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(ring, pf | pipesrc); + *cs++ = pf | pipesrc; return 0; } @@ -11860,9 +10225,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev, uint32_t flags) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t plane_bit = 0; + u32 *cs, plane_bit = 0; int len, ret; switch (intel_crtc->plane) { @@ -11906,9 +10270,9 @@ static int intel_gen7_queue_flip(struct drm_device *dev, if (ret) return ret; - ret = intel_ring_begin(req, len); - if (ret) - return ret; + cs = intel_ring_begin(req, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Unmask the flip-done completion message. Note that the bspec says that * we should do this for both the BCS and RCS, and that we must not unmask @@ -11920,31 +10284,28 @@ static int intel_gen7_queue_flip(struct drm_device *dev, * to zero does lead to lockups within MI_DISPLAY_FLIP. */ if (req->engine->id == RCS) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE | - DERRMR_PIPEB_PRI_FLIP_DONE | - DERRMR_PIPEC_PRI_FLIP_DONE)); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(DERRMR); + *cs++ = ~(DERRMR_PIPEA_PRI_FLIP_DONE | + DERRMR_PIPEB_PRI_FLIP_DONE | + DERRMR_PIPEC_PRI_FLIP_DONE); if (IS_GEN8(dev_priv)) - intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT); + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT; else - intel_ring_emit(ring, MI_STORE_REGISTER_MEM | - MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, - i915_ggtt_offset(req->engine->scratch) + 256); + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(DERRMR); + *cs++ = i915_ggtt_offset(req->engine->scratch) + 256; if (IS_GEN8(dev_priv)) { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } } - intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); - intel_ring_emit(ring, fb->pitches[0] | - intel_fb_modifier_to_tiling(fb->modifier)); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, (MI_NOOP)); + *cs++ = MI_DISPLAY_FLIP_I915 | plane_bit; + *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier); + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = MI_NOOP; return 0; } @@ -11989,7 +10350,7 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, ctl = I915_READ(PLANE_CTL(pipe, 0)); ctl &= ~PLANE_CTL_TILED_MASK; switch (fb->modifier) { - case DRM_FORMAT_MOD_NONE: + case DRM_FORMAT_MOD_LINEAR: break; case I915_FORMAT_MOD_X_TILED: ctl |= PLANE_CTL_TILED_X; @@ -12136,6 +10497,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) spin_unlock(&dev->event_lock); } +__maybe_unused static int intel_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, @@ -12229,7 +10591,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, goto cleanup; intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error); - if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) { + if (i915_reset_backoff_or_wedged(&dev_priv->gpu_error)) { ret = -EIO; goto unlock; } @@ -12302,7 +10664,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, intel_mark_page_flip_active(intel_crtc, work); work->flip_queued_req = i915_gem_request_get(request); - i915_add_request_no_flush(request); + i915_add_request(request); } i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); @@ -12318,7 +10680,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, return 0; cleanup_request: - i915_add_request_no_flush(request); + i915_add_request(request); cleanup_unpin: to_intel_plane_state(primary->state)->vma = work->old_vma; intel_unpin_fb_vma(vma); @@ -12349,7 +10711,7 @@ out_hang: state = drm_atomic_state_alloc(dev); if (!state) return -ENOMEM; - state->acquire_ctx = drm_modeset_legacy_acquire_ctx(crtc); + state->acquire_ctx = dev->mode_config.acquire_ctx; retry: plane_state = drm_atomic_get_plane_state(state, primary); @@ -12430,11 +10792,11 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc_state); struct drm_crtc *crtc = crtc_state->crtc; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_plane *plane = plane_state->plane; + struct intel_plane *plane = to_intel_plane(plane_state->plane); struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane_state *old_plane_state = - to_intel_plane_state(plane->state); + to_intel_plane_state(plane->base.state); bool mode_changed = needs_modeset(crtc_state); bool was_crtc_enabled = crtc->state->active; bool is_crtc_enabled = crtc_state->active; @@ -12442,7 +10804,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, struct drm_framebuffer *fb = plane_state->fb; int ret; - if (INTEL_GEN(dev_priv) >= 9 && plane->type != DRM_PLANE_TYPE_CURSOR) { + if (INTEL_GEN(dev_priv) >= 9 && plane->id != PLANE_CURSOR) { ret = skl_update_scaler_plane( to_intel_crtc_state(crtc_state), to_intel_plane_state(plane_state)); @@ -12466,8 +10828,10 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, * per-plane wm computation to the .check_plane() hook, and * only combine the results from all planes in the current place? */ - if (!is_crtc_enabled) + if (!is_crtc_enabled) { plane_state->visible = visible = false; + to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id); + } if (!was_visible && !visible) return 0; @@ -12479,41 +10843,39 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, turn_on = visible && (!was_visible || mode_changed); DRM_DEBUG_ATOMIC("[CRTC:%d:%s] has [PLANE:%d:%s] with fb %i\n", - intel_crtc->base.base.id, - intel_crtc->base.name, - plane->base.id, plane->name, + intel_crtc->base.base.id, intel_crtc->base.name, + plane->base.base.id, plane->base.name, fb ? fb->base.id : -1); DRM_DEBUG_ATOMIC("[PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n", - plane->base.id, plane->name, + plane->base.base.id, plane->base.name, was_visible, visible, turn_off, turn_on, mode_changed); if (turn_on) { - pipe_config->update_wm_pre = true; + if (INTEL_GEN(dev_priv) < 5) + pipe_config->update_wm_pre = true; /* must disable cxsr around plane enable/disable */ - if (plane->type != DRM_PLANE_TYPE_CURSOR) + if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; } else if (turn_off) { - pipe_config->update_wm_post = true; + if (INTEL_GEN(dev_priv) < 5) + pipe_config->update_wm_post = true; /* must disable cxsr around plane enable/disable */ - if (plane->type != DRM_PLANE_TYPE_CURSOR) + if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; - } else if (intel_wm_need_update(plane, plane_state)) { - /* FIXME bollocks */ - pipe_config->update_wm_pre = true; - pipe_config->update_wm_post = true; + } else if (intel_wm_need_update(&plane->base, plane_state)) { + if (INTEL_GEN(dev_priv) < 5) { + /* FIXME bollocks */ + pipe_config->update_wm_pre = true; + pipe_config->update_wm_post = true; + } } - /* Pre-gen9 platforms need two-step watermark updates */ - if ((pipe_config->update_wm_pre || pipe_config->update_wm_post) && - INTEL_GEN(dev_priv) < 9 && dev_priv->display.optimize_watermarks) - to_intel_crtc_state(crtc_state)->wm.need_postvbl_update = true; - if (visible || was_visible) - pipe_config->fb_bits |= to_intel_plane(plane)->frontbuffer_bit; + pipe_config->fb_bits |= plane->frontbuffer_bit; /* * WaCxSRDisabledForSpriteScaling:ivb @@ -12521,7 +10883,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, * cstate->update_wm was already set above, so this flag will * take effect when we commit and program watermarks. */ - if (plane->type == DRM_PLANE_TYPE_OVERLAY && IS_IVYBRIDGE(dev_priv) && + if (plane->id == PLANE_SPRITE0 && IS_IVYBRIDGE(dev_priv) && needs_scaling(to_intel_plane_state(plane_state)) && !needs_scaling(old_plane_state)) pipe_config->disable_lp_wm = true; @@ -12546,7 +10908,7 @@ static bool check_single_encoder_cloning(struct drm_atomic_state *state, struct drm_connector_state *connector_state; int i; - for_each_connector_in_state(state, connector, connector_state, i) { + for_each_new_connector_in_state(state, connector, connector_state, i) { if (connector_state->crtc != &crtc->base) continue; @@ -12631,7 +10993,7 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, ret = skl_update_scaler_crtc(pipe_config); if (!ret) - ret = intel_atomic_setup_scalers(dev, intel_crtc, + ret = intel_atomic_setup_scalers(dev_priv, intel_crtc, pipe_config); } @@ -12639,7 +11001,6 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, } static const struct drm_crtc_helper_funcs intel_helper_funcs = { - .mode_set_base_atomic = intel_pipe_set_base_atomic, .atomic_begin = intel_begin_crtc_commit, .atomic_flush = intel_finish_crtc_commit, .atomic_check = intel_crtc_atomic_check, @@ -12648,8 +11009,10 @@ static const struct drm_crtc_helper_funcs intel_helper_funcs = { static void intel_modeset_update_connector_atomic_state(struct drm_device *dev) { struct intel_connector *connector; + struct drm_connector_list_iter conn_iter; - for_each_intel_connector(dev, connector) { + drm_connector_list_iter_begin(dev, &conn_iter); + for_each_intel_connector_iter(connector, &conn_iter) { if (connector->base.state->crtc) drm_connector_unreference(&connector->base); @@ -12665,6 +11028,7 @@ static void intel_modeset_update_connector_atomic_state(struct drm_device *dev) connector->base.state->crtc = NULL; } } + drm_connector_list_iter_end(&conn_iter); } static void @@ -12717,7 +11081,7 @@ compute_baseline_pipe_bpp(struct intel_crtc *crtc, state = pipe_config->base.state; /* Clamp display bpp to EDID value */ - for_each_connector_in_state(state, connector, connector_state, i) { + for_each_new_connector_in_state(state, connector, connector_state, i) { if (connector_state->crtc != &crtc->base) continue; @@ -12789,9 +11153,10 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, DRM_DEBUG_KMS("adjusted mode:\n"); drm_mode_debug_printmodeline(&pipe_config->base.adjusted_mode); intel_dump_crtc_timings(&pipe_config->base.adjusted_mode); - DRM_DEBUG_KMS("port clock: %d, pipe src size: %dx%d\n", + DRM_DEBUG_KMS("port clock: %d, pipe src size: %dx%d, pixel rate %d\n", pipe_config->port_clock, - pipe_config->pipe_src_w, pipe_config->pipe_src_h); + pipe_config->pipe_src_w, pipe_config->pipe_src_h, + pipe_config->pixel_rate); if (INTEL_GEN(dev_priv) >= 9) DRM_DEBUG_KMS("num_scalers: %d, scaler_users: 0x%x, scaler_id: %d\n", @@ -12909,10 +11274,12 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) static void clear_intel_crtc_state(struct intel_crtc_state *crtc_state) { - struct drm_crtc_state tmp_state; + struct drm_i915_private *dev_priv = + to_i915(crtc_state->base.crtc->dev); struct intel_crtc_scaler_state scaler_state; struct intel_dpll_hw_state dpll_hw_state; struct intel_shared_dpll *shared_dpll; + struct intel_crtc_wm_state wm_state; bool force_thru; /* FIXME: before the switch to atomic started, a new pipe_config was @@ -12920,19 +11287,24 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) * fixed, so that the crtc_state can be safely duplicated. For now, * only fields that are know to not cause problems are preserved. */ - tmp_state = crtc_state->base; scaler_state = crtc_state->scaler_state; shared_dpll = crtc_state->shared_dpll; dpll_hw_state = crtc_state->dpll_hw_state; force_thru = crtc_state->pch_pfit.force_thru; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + wm_state = crtc_state->wm; - memset(crtc_state, 0, sizeof *crtc_state); + /* Keep base drm_crtc_state intact, only clear our extended struct */ + BUILD_BUG_ON(offsetof(struct intel_crtc_state, base)); + memset(&crtc_state->base + 1, 0, + sizeof(*crtc_state) - sizeof(crtc_state->base)); - crtc_state->base = tmp_state; crtc_state->scaler_state = scaler_state; crtc_state->shared_dpll = shared_dpll; crtc_state->dpll_hw_state = dpll_hw_state; crtc_state->pch_pfit.force_thru = force_thru; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + crtc_state->wm = wm_state; } static int @@ -12982,7 +11354,7 @@ intel_modeset_pipe_config(struct drm_crtc *crtc, &pipe_config->pipe_src_w, &pipe_config->pipe_src_h); - for_each_connector_in_state(state, connector, connector_state, i) { + for_each_new_connector_in_state(state, connector, connector_state, i) { if (connector_state->crtc != crtc) continue; @@ -13013,7 +11385,7 @@ encoder_retry: * adjust it according to limitations or connector properties, and also * a chance to reject the mode entirely. */ - for_each_connector_in_state(state, connector, connector_state, i) { + for_each_new_connector_in_state(state, connector, connector_state, i) { if (connector_state->crtc != crtc) continue; @@ -13049,8 +11421,11 @@ encoder_retry: } /* Dithering seems to not pass-through bits correctly when it should, so - * only enable it on 6bpc panels. */ - pipe_config->dither = pipe_config->pipe_bpp == 6*3; + * only enable it on 6bpc panels and when its not a compliance + * test requesting 6bpc video pattern. + */ + pipe_config->dither = (pipe_config->pipe_bpp == 6*3) && + !pipe_config->dither_force_disable; DRM_DEBUG_KMS("hw max bpp: %i, pipe bpp: %i, dithering: %i\n", base_bpp, pipe_config->pipe_bpp, pipe_config->dither); @@ -13062,16 +11437,16 @@ static void intel_modeset_update_crtc_state(struct drm_atomic_state *state) { struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; + struct drm_crtc_state *new_crtc_state; int i; /* Double check state. */ - for_each_crtc_in_state(state, crtc, crtc_state, i) { - to_intel_crtc(crtc)->config = to_intel_crtc_state(crtc->state); + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + to_intel_crtc(crtc)->config = to_intel_crtc_state(new_crtc_state); /* Update hwmode for vblank functions */ - if (crtc->state->active) - crtc->hwmode = crtc->state->adjusted_mode; + if (new_crtc_state->active) + crtc->hwmode = new_crtc_state->adjusted_mode; else crtc->hwmode.crtc_clock = 0; @@ -13329,6 +11704,9 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) PIPE_CONF_CHECK_I(limited_color_range); + + PIPE_CONF_CHECK_I(hdmi_scrambling); + PIPE_CONF_CHECK_I(hdmi_high_tmds_clock_ratio); PIPE_CONF_CHECK_I(has_infoframe); PIPE_CONF_CHECK_I(has_audio); @@ -13364,6 +11742,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, } PIPE_CONF_CHECK_I(scaler_state.scaler_id); + PIPE_CONF_CHECK_CLOCK_FUZZY(pixel_rate); } /* BDW+ don't expose a synchronous way to read the state */ @@ -13543,47 +11922,55 @@ verify_connector_state(struct drm_device *dev, struct drm_crtc *crtc) { struct drm_connector *connector; - struct drm_connector_state *old_conn_state; + struct drm_connector_state *new_conn_state; int i; - for_each_connector_in_state(state, connector, old_conn_state, i) { + for_each_new_connector_in_state(state, connector, new_conn_state, i) { struct drm_encoder *encoder = connector->encoder; - struct drm_connector_state *state = connector->state; - if (state->crtc != crtc) + if (new_conn_state->crtc != crtc) continue; intel_connector_verify_state(to_intel_connector(connector)); - I915_STATE_WARN(state->best_encoder != encoder, + I915_STATE_WARN(new_conn_state->best_encoder != encoder, "connector's atomic encoder doesn't match legacy encoder\n"); } } static void -verify_encoder_state(struct drm_device *dev) +verify_encoder_state(struct drm_device *dev, struct drm_atomic_state *state) { struct intel_encoder *encoder; - struct intel_connector *connector; + struct drm_connector *connector; + struct drm_connector_state *old_conn_state, *new_conn_state; + int i; for_each_intel_encoder(dev, encoder) { - bool enabled = false; + bool enabled = false, found = false; enum pipe pipe; DRM_DEBUG_KMS("[ENCODER:%d:%s]\n", encoder->base.base.id, encoder->base.name); - for_each_intel_connector(dev, connector) { - if (connector->base.state->best_encoder != &encoder->base) + for_each_oldnew_connector_in_state(state, connector, old_conn_state, + new_conn_state, i) { + if (old_conn_state->best_encoder == &encoder->base) + found = true; + + if (new_conn_state->best_encoder != &encoder->base) continue; - enabled = true; + found = enabled = true; - I915_STATE_WARN(connector->base.state->crtc != + I915_STATE_WARN(new_conn_state->crtc != encoder->base.crtc, "connector's crtc doesn't match encoder crtc\n"); } + if (!found) + continue; + I915_STATE_WARN(!!encoder->base.crtc != enabled, "encoder's enabled state mismatch " "(expected %i, found %i)\n", @@ -13655,6 +12042,8 @@ verify_crtc_state(struct drm_crtc *crtc, } } + intel_crtc_compute_pixel_rate(pipe_config); + if (!new_crtc_state->active) return; @@ -13782,7 +12171,7 @@ static void intel_modeset_verify_disabled(struct drm_device *dev, struct drm_atomic_state *state) { - verify_encoder_state(dev); + verify_encoder_state(dev, state); verify_connector_state(dev, state, NULL); verify_disabled_dpll_state(dev); } @@ -13830,21 +12219,21 @@ static void intel_modeset_clear_plls(struct drm_atomic_state *state) struct drm_device *dev = state->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; int i; if (!dev_priv->display.crtc_compute_clock) return; - for_each_crtc_in_state(state, crtc, crtc_state, i) { + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_shared_dpll *old_dpll = - to_intel_crtc_state(crtc->state)->shared_dpll; + to_intel_crtc_state(old_crtc_state)->shared_dpll; - if (!needs_modeset(crtc_state)) + if (!needs_modeset(new_crtc_state)) continue; - to_intel_crtc_state(crtc_state)->shared_dpll = NULL; + to_intel_crtc_state(new_crtc_state)->shared_dpll = NULL; if (!old_dpll) continue; @@ -13870,7 +12259,7 @@ static int haswell_mode_set_planes_workaround(struct drm_atomic_state *state) int i; /* look at all crtc's that are going to be enabled in during modeset */ - for_each_crtc_in_state(state, crtc, crtc_state, i) { + for_each_new_crtc_in_state(state, crtc, crtc_state, i) { intel_crtc = to_intel_crtc(crtc); if (!crtc_state->active || !needs_modeset(crtc_state)) @@ -13972,7 +12361,7 @@ static int intel_modeset_checks(struct drm_atomic_state *state) struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_i915_private *dev_priv = to_i915(state->dev); struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; int ret = 0, i; if (!check_digital_port_conflicts(state)) { @@ -13982,14 +12371,16 @@ static int intel_modeset_checks(struct drm_atomic_state *state) intel_state->modeset = true; intel_state->active_crtcs = dev_priv->active_crtcs; + intel_state->cdclk.logical = dev_priv->cdclk.logical; + intel_state->cdclk.actual = dev_priv->cdclk.actual; - for_each_crtc_in_state(state, crtc, crtc_state, i) { - if (crtc_state->active) + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + if (new_crtc_state->active) intel_state->active_crtcs |= 1 << i; else intel_state->active_crtcs &= ~(1 << i); - if (crtc_state->active != crtc->state->active) + if (old_crtc_state->active != new_crtc_state->active) intel_state->active_pipe_changes |= drm_crtc_mask(crtc); } @@ -14001,38 +12392,35 @@ static int intel_modeset_checks(struct drm_atomic_state *state) * adjusted_mode bits in the crtc directly. */ if (dev_priv->display.modeset_calc_cdclk) { - if (!intel_state->cdclk_pll_vco) - intel_state->cdclk_pll_vco = dev_priv->cdclk_pll.vco; - if (!intel_state->cdclk_pll_vco) - intel_state->cdclk_pll_vco = dev_priv->skl_preferred_vco_freq; - ret = dev_priv->display.modeset_calc_cdclk(state); if (ret < 0) return ret; /* - * Writes to dev_priv->atomic_cdclk_freq must protected by + * Writes to dev_priv->cdclk.logical must protected by * holding all the crtc locks, even if we don't end up * touching the hardware */ - if (intel_state->cdclk != dev_priv->atomic_cdclk_freq) { + if (!intel_cdclk_state_compare(&dev_priv->cdclk.logical, + &intel_state->cdclk.logical)) { ret = intel_lock_all_pipes(state); if (ret < 0) return ret; } /* All pipes must be switched off while we change the cdclk. */ - if (intel_state->dev_cdclk != dev_priv->cdclk_freq || - intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco) { + if (!intel_cdclk_state_compare(&dev_priv->cdclk.actual, + &intel_state->cdclk.actual)) { ret = intel_modeset_all_pipes(state); if (ret < 0) return ret; } - DRM_DEBUG_KMS("New cdclk calculated to be atomic %u, actual %u\n", - intel_state->cdclk, intel_state->dev_cdclk); + DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", + intel_state->cdclk.logical.cdclk, + intel_state->cdclk.actual.cdclk); } else { - to_intel_atomic_state(state)->cdclk = dev_priv->atomic_cdclk_freq; + to_intel_atomic_state(state)->cdclk.logical = dev_priv->cdclk.logical; } intel_modeset_clear_plls(state); @@ -14071,7 +12459,7 @@ static int intel_atomic_check(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; + struct drm_crtc_state *old_crtc_state, *crtc_state; int ret, i; bool any_ms = false; @@ -14079,12 +12467,12 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) return ret; - for_each_crtc_in_state(state, crtc, crtc_state, i) { + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, crtc_state, i) { struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc_state); /* Catch I915_MODE_FLAG_INHERITED */ - if (crtc_state->mode.private_flags != crtc->state->mode.private_flags) + if (crtc_state->mode.private_flags != old_crtc_state->mode.private_flags) crtc_state->mode_changed = true; if (!needs_modeset(crtc_state)) @@ -14111,10 +12499,10 @@ static int intel_atomic_check(struct drm_device *dev, if (i915.fastboot && intel_pipe_config_compare(dev_priv, - to_intel_crtc_state(crtc->state), + to_intel_crtc_state(old_crtc_state), pipe_config, true)) { crtc_state->mode_changed = false; - to_intel_crtc_state(crtc_state)->update_pipe = true; + pipe_config->update_pipe = true; } if (needs_modeset(crtc_state)) @@ -14135,7 +12523,7 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) return ret; } else { - intel_state->cdclk = dev_priv->atomic_cdclk_freq; + intel_state->cdclk.logical = dev_priv->cdclk.logical; } ret = drm_atomic_helper_check_planes(dev, state); @@ -14154,7 +12542,7 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, struct drm_crtc *crtc; int i, ret; - for_each_crtc_in_state(state, crtc, crtc_state, i) { + for_each_new_crtc_in_state(state, crtc, crtc_state, i) { if (state->legacy_cursor_update) continue; @@ -14242,12 +12630,7 @@ static bool needs_vblank_wait(struct intel_crtc_state *crtc_state) if (crtc_state->update_wm_post) return true; - /* - * cxsr is re-enabled after vblank. - * This is already handled by crtc_state->update_wm_post, - * but added for clarity. - */ - if (crtc_state->disable_cxsr) + if (crtc_state->wm.need_postvbl_update) return true; return false; @@ -14256,19 +12639,21 @@ static bool needs_vblank_wait(struct intel_crtc_state *crtc_state) static void intel_update_crtc(struct drm_crtc *crtc, struct drm_atomic_state *state, struct drm_crtc_state *old_crtc_state, + struct drm_crtc_state *new_crtc_state, unsigned int *crtc_vblank_mask) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc->state); - bool modeset = needs_modeset(crtc->state); + struct intel_crtc_state *pipe_config = to_intel_crtc_state(new_crtc_state); + bool modeset = needs_modeset(new_crtc_state); if (modeset) { update_scanline_offset(intel_crtc); dev_priv->display.crtc_enable(pipe_config, state); } else { - intel_pre_plane_update(to_intel_crtc_state(old_crtc_state)); + intel_pre_plane_update(to_intel_crtc_state(old_crtc_state), + pipe_config); } if (drm_atomic_get_existing_plane_state(state, crtc->primary)) { @@ -14287,15 +12672,15 @@ static void intel_update_crtcs(struct drm_atomic_state *state, unsigned int *crtc_vblank_mask) { struct drm_crtc *crtc; - struct drm_crtc_state *old_crtc_state; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; int i; - for_each_crtc_in_state(state, crtc, old_crtc_state, i) { - if (!crtc->state->active) + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + if (!new_crtc_state->active) continue; intel_update_crtc(crtc, state, old_crtc_state, - crtc_vblank_mask); + new_crtc_state, crtc_vblank_mask); } } @@ -14306,7 +12691,7 @@ static void skl_update_crtcs(struct drm_atomic_state *state, struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_crtc *crtc; struct intel_crtc *intel_crtc; - struct drm_crtc_state *old_crtc_state; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct intel_crtc_state *cstate; unsigned int updated = 0; bool progress; @@ -14315,9 +12700,9 @@ static void skl_update_crtcs(struct drm_atomic_state *state, const struct skl_ddb_entry *entries[I915_MAX_PIPES] = {}; - for_each_crtc_in_state(state, crtc, old_crtc_state, i) + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) /* ignore allocations for crtc's that have been turned off. */ - if (crtc->state->active) + if (new_crtc_state->active) entries[i] = &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb; /* @@ -14329,7 +12714,7 @@ static void skl_update_crtcs(struct drm_atomic_state *state, do { progress = false; - for_each_crtc_in_state(state, crtc, old_crtc_state, i) { + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { bool vbl_wait = false; unsigned int cmask = drm_crtc_mask(crtc); @@ -14354,12 +12739,12 @@ static void skl_update_crtcs(struct drm_atomic_state *state, */ if (!skl_ddb_entry_equal(&cstate->wm.skl.ddb, &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb) && - !crtc->state->active_changed && + !new_crtc_state->active_changed && intel_state->wm_results.dirty_pipes != updated) vbl_wait = true; intel_update_crtc(crtc, state, old_crtc_state, - crtc_vblank_mask); + new_crtc_state, crtc_vblank_mask); if (vbl_wait) intel_wait_for_vblank(dev_priv, pipe); @@ -14392,11 +12777,11 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) struct drm_device *dev = state->dev; struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc_state *old_crtc_state; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct drm_crtc *crtc; struct intel_crtc_state *intel_cstate; bool hw_check = intel_state->modeset; - unsigned long put_domains[I915_MAX_PIPES] = {}; + u64 put_domains[I915_MAX_PIPES] = {}; unsigned crtc_vblank_mask = 0; int i; @@ -14405,22 +12790,23 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (intel_state->modeset) intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET); - for_each_crtc_in_state(state, crtc, old_crtc_state, i) { + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - if (needs_modeset(crtc->state) || - to_intel_crtc_state(crtc->state)->update_pipe) { + if (needs_modeset(new_crtc_state) || + to_intel_crtc_state(new_crtc_state)->update_pipe) { hw_check = true; put_domains[to_intel_crtc(crtc)->pipe] = modeset_get_crtc_power_domains(crtc, - to_intel_crtc_state(crtc->state)); + to_intel_crtc_state(new_crtc_state)); } - if (!needs_modeset(crtc->state)) + if (!needs_modeset(new_crtc_state)) continue; - intel_pre_plane_update(to_intel_crtc_state(old_crtc_state)); + intel_pre_plane_update(to_intel_crtc_state(old_crtc_state), + to_intel_crtc_state(new_crtc_state)); if (old_crtc_state->active) { intel_crtc_disable_planes(crtc, old_crtc_state->plane_mask); @@ -14440,12 +12826,12 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) /* * Make sure we don't call initial_watermarks * for ILK-style watermark updates. + * + * No clue what this is supposed to achieve. */ - if (dev_priv->display.atomic_update_watermarks) + if (INTEL_GEN(dev_priv) >= 9) dev_priv->display.initial_watermarks(intel_state, to_intel_crtc_state(crtc->state)); - else - intel_update_watermarks(intel_crtc); } } } @@ -14457,10 +12843,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (intel_state->modeset) { drm_atomic_helper_update_legacy_modeset_state(state->dev, state); - if (dev_priv->display.modeset_commit_cdclk && - (intel_state->dev_cdclk != dev_priv->cdclk_freq || - intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco)) - dev_priv->display.modeset_commit_cdclk(state); + intel_set_cdclk(dev_priv, &dev_priv->cdclk.actual); /* * SKL workaround: bspec recommends we disable the SAGV when we @@ -14473,16 +12856,16 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) } /* Complete the events for pipes that have now been disabled */ - for_each_crtc_in_state(state, crtc, old_crtc_state, i) { - bool modeset = needs_modeset(crtc->state); + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + bool modeset = needs_modeset(new_crtc_state); /* Complete events for now disable pipes here. */ - if (modeset && !crtc->state->active && crtc->state->event) { + if (modeset && !new_crtc_state->active && new_crtc_state->event) { spin_lock_irq(&dev->event_lock); - drm_crtc_send_vblank_event(crtc, crtc->state->event); + drm_crtc_send_vblank_event(crtc, new_crtc_state->event); spin_unlock_irq(&dev->event_lock); - crtc->state->event = NULL; + new_crtc_state->event = NULL; } } @@ -14508,21 +12891,21 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * * TODO: Move this (and other cleanup) to an async worker eventually. */ - for_each_crtc_in_state(state, crtc, old_crtc_state, i) { - intel_cstate = to_intel_crtc_state(crtc->state); + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + intel_cstate = to_intel_crtc_state(new_crtc_state); if (dev_priv->display.optimize_watermarks) dev_priv->display.optimize_watermarks(intel_state, intel_cstate); } - for_each_crtc_in_state(state, crtc, old_crtc_state, i) { + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { intel_post_plane_update(to_intel_crtc_state(old_crtc_state)); if (put_domains[i]) modeset_put_power_domains(dev_priv, put_domains[i]); - intel_modeset_verify_crtc(crtc, state, old_crtc_state, crtc->state); + intel_modeset_verify_crtc(crtc, state, old_crtc_state, new_crtc_state); } if (intel_state->modeset && intel_can_enable_sagv(state)) @@ -14594,13 +12977,13 @@ intel_atomic_commit_ready(struct i915_sw_fence *fence, static void intel_atomic_track_fbs(struct drm_atomic_state *state) { - struct drm_plane_state *old_plane_state; + struct drm_plane_state *old_plane_state, *new_plane_state; struct drm_plane *plane; int i; - for_each_plane_in_state(state, plane, old_plane_state, i) + for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) i915_gem_track_fb(intel_fb_obj(old_plane_state->fb), - intel_fb_obj(plane->state->fb), + intel_fb_obj(new_plane_state->fb), to_intel_plane(plane)->frontbuffer_bit); } @@ -14639,6 +13022,26 @@ static int intel_atomic_commit(struct drm_device *dev, return ret; } + /* + * The intel_legacy_cursor_update() fast path takes care + * of avoiding the vblank waits for simple cursor + * movement and flips. For cursor on/off and size changes, + * we want to perform the vblank waits so that watermark + * updates happen during the correct frames. Gen9+ have + * double buffered watermarks and so shouldn't need this. + * + * Do this after drm_atomic_helper_setup_commit() and + * intel_atomic_prepare_commit() because we still want + * to skip the flip and fb cleanup waits. Although that + * does risk yanking the mapping from under the display + * engine. + * + * FIXME doing watermarks and fb cleanup from a vblank worker + * (assuming we had any) would solve these problems. + */ + if (INTEL_GEN(dev_priv) < 9) + state->legacy_cursor_update = false; + drm_atomic_helper_swap_state(state, true); dev_priv->wm.distrust_bios_wm = false; intel_shared_dpll_swap_state(state); @@ -14648,7 +13051,8 @@ static int intel_atomic_commit(struct drm_device *dev, memcpy(dev_priv->min_pixclk, intel_state->min_pixclk, sizeof(intel_state->min_pixclk)); dev_priv->active_crtcs = intel_state->active_crtcs; - dev_priv->atomic_cdclk_freq = intel_state->cdclk; + dev_priv->cdclk.logical = intel_state->cdclk.logical; + dev_priv->cdclk.actual = intel_state->cdclk.actual; } drm_atomic_state_get(state); @@ -14678,7 +13082,7 @@ void intel_crtc_restore_mode(struct drm_crtc *crtc) return; } - state->acquire_ctx = drm_modeset_legacy_acquire_ctx(crtc); + state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; retry: crtc_state = drm_atomic_get_crtc_state(state, crtc); @@ -14701,54 +13105,12 @@ out: drm_atomic_state_put(state); } -/* - * FIXME: Remove this once i915 is fully DRIVER_ATOMIC by calling - * drm_atomic_helper_legacy_gamma_set() directly. - */ -static int intel_atomic_legacy_gamma_set(struct drm_crtc *crtc, - u16 *red, u16 *green, u16 *blue, - uint32_t size) -{ - struct drm_device *dev = crtc->dev; - struct drm_mode_config *config = &dev->mode_config; - struct drm_crtc_state *state; - int ret; - - ret = drm_atomic_helper_legacy_gamma_set(crtc, red, green, blue, size); - if (ret) - return ret; - - /* - * Make sure we update the legacy properties so this works when - * atomic is not enabled. - */ - - state = crtc->state; - - drm_object_property_set_value(&crtc->base, - config->degamma_lut_property, - (state->degamma_lut) ? - state->degamma_lut->base.id : 0); - - drm_object_property_set_value(&crtc->base, - config->ctm_property, - (state->ctm) ? - state->ctm->base.id : 0); - - drm_object_property_set_value(&crtc->base, - config->gamma_lut_property, - (state->gamma_lut) ? - state->gamma_lut->base.id : 0); - - return 0; -} - static const struct drm_crtc_funcs intel_crtc_funcs = { - .gamma_set = intel_atomic_legacy_gamma_set, + .gamma_set = drm_atomic_helper_legacy_gamma_set, .set_config = drm_atomic_helper_set_config, .set_property = drm_atomic_helper_crtc_set_property, .destroy = intel_crtc_destroy, - .page_flip = intel_crtc_page_flip, + .page_flip = drm_atomic_helper_page_flip, .atomic_duplicate_state = intel_crtc_duplicate_state, .atomic_destroy_state = intel_crtc_destroy_state, .set_crc_source = intel_crtc_set_crc_source, @@ -14780,6 +13142,29 @@ intel_prepare_plane_fb(struct drm_plane *plane, struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); int ret; + if (obj) { + if (plane->type == DRM_PLANE_TYPE_CURSOR && + INTEL_INFO(dev_priv)->cursor_needs_physical) { + const int align = IS_I830(dev_priv) ? 16 * 1024 : 256; + + ret = i915_gem_object_attach_phys(obj, align); + if (ret) { + DRM_DEBUG_KMS("failed to attach phys object\n"); + return ret; + } + } else { + struct i915_vma *vma; + + vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); + if (IS_ERR(vma)) { + DRM_DEBUG_KMS("failed to pin object\n"); + return PTR_ERR(vma); + } + + to_intel_plane_state(new_state)->vma = vma; + } + } + if (!obj && !old_obj) return 0; @@ -14832,26 +13217,6 @@ intel_prepare_plane_fb(struct drm_plane *plane, i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); } - if (plane->type == DRM_PLANE_TYPE_CURSOR && - INTEL_INFO(dev_priv)->cursor_needs_physical) { - int align = IS_I830(dev_priv) ? 16 * 1024 : 256; - ret = i915_gem_object_attach_phys(obj, align); - if (ret) { - DRM_DEBUG_KMS("failed to attach phys object\n"); - return ret; - } - } else { - struct i915_vma *vma; - - vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); - if (IS_ERR(vma)) { - DRM_DEBUG_KMS("failed to pin object\n"); - return PTR_ERR(vma); - } - - to_intel_plane_state(new_state)->vma = vma; - } - return 0; } @@ -14879,16 +13244,22 @@ intel_cleanup_plane_fb(struct drm_plane *plane, int skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv; int max_scale; - int crtc_clock, cdclk; + int crtc_clock, max_dotclk; if (!intel_crtc || !crtc_state->base.enable) return DRM_PLANE_HELPER_NO_SCALING; + dev_priv = to_i915(intel_crtc->base.dev); + crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; - cdclk = to_intel_atomic_state(crtc_state->base.state)->cdclk; + max_dotclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; - if (WARN_ON_ONCE(!crtc_clock || cdclk < crtc_clock)) + if (IS_GEMINILAKE(dev_priv)) + max_dotclk *= 2; + + if (WARN_ON_ONCE(!crtc_clock || max_dotclk < crtc_clock)) return DRM_PLANE_HELPER_NO_SCALING; /* @@ -14897,7 +13268,8 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state * or * cdclk/crtc_clock */ - max_scale = min((1 << 16) * 3 - 1, (1 << 8) * ((cdclk << 8) / crtc_clock)); + max_scale = min((1 << 16) * 3 - 1, + (1 << 8) * ((max_dotclk << 8) / crtc_clock)); return max_scale; } @@ -14937,6 +13309,14 @@ intel_check_primary_plane(struct drm_plane *plane, ret = skl_check_plane_surface(state); if (ret) return ret; + + state->ctl = skl_plane_ctl(crtc_state, state); + } else { + ret = i9xx_check_plane_surface(state); + if (ret) + return ret; + + state->ctl = i9xx_plane_ctl(crtc_state, state); } return 0; @@ -15019,7 +13399,8 @@ intel_legacy_cursor_update(struct drm_plane *plane, int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h) + uint32_t src_w, uint32_t src_h, + struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(crtc->dev); int ret; @@ -15049,8 +13430,7 @@ intel_legacy_cursor_update(struct drm_plane *plane, old_plane_state->src_h != src_h || old_plane_state->crtc_w != crtc_w || old_plane_state->crtc_h != crtc_h || - !old_plane_state->visible || - old_plane_state->fb->modifier != fb->modifier) + !old_plane_state->fb != !fb) goto slow; new_plane_state = intel_plane_duplicate_state(plane); @@ -15073,10 +13453,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, if (ret) goto out_free; - /* Visibility changed, must take slowpath. */ - if (!new_plane_state->visible) - goto slow_free; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); if (ret) goto out_free; @@ -15116,9 +13492,15 @@ intel_legacy_cursor_update(struct drm_plane *plane, new_plane_state->fb = old_fb; to_intel_plane_state(new_plane_state)->vma = old_vma; - intel_plane->update_plane(plane, - to_intel_crtc_state(crtc->state), - to_intel_plane_state(plane->state)); + if (plane->state->visible) { + trace_intel_update_plane(plane, to_intel_crtc(crtc)); + intel_plane->update_plane(plane, + to_intel_crtc_state(crtc->state), + to_intel_plane_state(plane->state)); + } else { + trace_intel_disable_plane(plane, to_intel_crtc(crtc)); + intel_plane->disable_plane(plane, crtc); + } intel_cleanup_plane_fb(plane, new_plane_state); @@ -15128,12 +13510,10 @@ out_free: intel_plane_destroy_state(plane, new_plane_state); return ret; -slow_free: - intel_plane_destroy_state(plane, new_plane_state); slow: return drm_atomic_helper_update_plane(plane, crtc, fb, crtc_x, crtc_y, crtc_w, crtc_h, - src_x, src_y, src_w, src_h); + src_x, src_y, src_w, src_h, ctx); } static const struct drm_plane_funcs intel_cursor_plane_funcs = { @@ -15196,12 +13576,6 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = skylake_update_primary_plane; primary->disable_plane = skylake_disable_primary_plane; - } else if (HAS_PCH_SPLIT(dev_priv)) { - intel_primary_formats = i965_primary_formats; - num_formats = ARRAY_SIZE(i965_primary_formats); - - primary->update_plane = ironlake_update_primary_plane; - primary->disable_plane = i9xx_disable_primary_plane; } else if (INTEL_GEN(dev_priv) >= 4) { intel_primary_formats = i965_primary_formats; num_formats = ARRAY_SIZE(i965_primary_formats); @@ -15273,6 +13647,7 @@ intel_check_cursor_plane(struct drm_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state) { + struct drm_i915_private *dev_priv = to_i915(plane->dev); struct drm_framebuffer *fb = state->base.fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); enum pipe pipe = to_intel_plane(plane)->pipe; @@ -15292,7 +13667,7 @@ intel_check_cursor_plane(struct drm_plane *plane, return 0; /* Check for which cursor types we support */ - if (!cursor_size_ok(to_i915(plane->dev), state->base.crtc_w, + if (!cursor_size_ok(dev_priv, state->base.crtc_w, state->base.crtc_h)) { DRM_DEBUG("Cursor dimension %dx%d not supported\n", state->base.crtc_w, state->base.crtc_h); @@ -15305,7 +13680,7 @@ intel_check_cursor_plane(struct drm_plane *plane, return -ENOMEM; } - if (fb->modifier != DRM_FORMAT_MOD_NONE) { + if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { DRM_DEBUG_KMS("cursor cannot be tiled\n"); return -EINVAL; } @@ -15320,12 +13695,17 @@ intel_check_cursor_plane(struct drm_plane *plane, * display power well must be turned off and on again. * Refuse the put the cursor into that compromised position. */ - if (IS_CHERRYVIEW(to_i915(plane->dev)) && pipe == PIPE_C && + if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C && state->base.visible && state->base.crtc_x < 0) { DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n"); return -EINVAL; } + if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) + state->ctl = i845_cursor_ctl(crtc_state, state); + else + state->ctl = i9xx_cursor_ctl(crtc_state, state); + return 0; } @@ -15503,8 +13883,6 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) intel_crtc->cursor_cntl = ~0; intel_crtc->cursor_size = ~0; - intel_crtc->wm.cxsr_allowed = true; - /* initialize shared scalers */ intel_crtc_init_scalers(intel_crtc, crtc_state); @@ -15534,15 +13912,14 @@ fail: enum pipe intel_get_pipe_from_connector(struct intel_connector *connector) { - struct drm_encoder *encoder = connector->base.encoder; struct drm_device *dev = connector->base.dev; WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); - if (!encoder || WARN_ON(!encoder->crtc)) + if (!connector->base.state->crtc) return INVALID_PIPE; - return to_intel_crtc(encoder->crtc)->pipe; + return to_intel_crtc(connector->base.state->crtc)->pipe; } int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, @@ -15692,7 +14069,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) */ found = I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_INIT_DISPLAY_DETECTED; /* WaIgnoreDDIAStrap: skl */ - if (found || IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (found || IS_GEN9_BC(dev_priv)) intel_ddi_init(dev_priv, PORT_A); /* DDI B, C and D detection is indicated by the SFUSE_STRAP @@ -15708,7 +14085,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) /* * On SKL we don't have a way to detect DDI-E so we rely on VBT. */ - if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && + if (IS_GEN9_BC(dev_priv) && (dev_priv->vbt.ddi_port_info[PORT_E].supports_dp || dev_priv->vbt.ddi_port_info[PORT_E].supports_dvi || dev_priv->vbt.ddi_port_info[PORT_E].supports_hdmi)) @@ -15841,14 +14218,16 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) { - struct drm_device *dev = fb->dev; struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); drm_framebuffer_cleanup(fb); - mutex_lock(&dev->struct_mutex); + + i915_gem_object_lock(intel_fb->obj); WARN_ON(!intel_fb->obj->framebuffer_references--); + i915_gem_object_unlock(intel_fb->obj); + i915_gem_object_put(intel_fb->obj); - mutex_unlock(&dev->struct_mutex); + kfree(intel_fb); } @@ -15873,15 +14252,10 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, struct drm_clip_rect *clips, unsigned num_clips) { - struct drm_device *dev = fb->dev; - struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); - struct drm_i915_gem_object *obj = intel_fb->obj; + struct drm_i915_gem_object *obj = intel_fb_obj(fb); - mutex_lock(&dev->struct_mutex); - if (obj->pin_display && obj->cache_dirty) - i915_gem_clflush_object(obj, true); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); - mutex_unlock(&dev->struct_mutex); + i915_gem_object_flush_if_display(obj); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); return 0; } @@ -15896,7 +14270,7 @@ static u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, uint64_t fb_modifier, uint32_t pixel_format) { - u32 gen = INTEL_INFO(dev_priv)->gen; + u32 gen = INTEL_GEN(dev_priv); if (gen >= 9) { int cpp = drm_format_plane_cpp(pixel_format, 0); @@ -15905,8 +14279,7 @@ u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, * pixels and 32K bytes." */ return min(8192 * cpp, 32768); - } else if (gen >= 5 && !IS_VALLEYVIEW(dev_priv) && - !IS_CHERRYVIEW(dev_priv)) { + } else if (gen >= 5 && !HAS_GMCH_DISPLAY(dev_priv)) { return 32*1024; } else if (gen >= 4) { if (fb_modifier == I915_FORMAT_MOD_X_TILED) @@ -15924,18 +14297,21 @@ u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, } } -static int intel_framebuffer_init(struct drm_device *dev, - struct intel_framebuffer *intel_fb, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) +static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd) { - struct drm_i915_private *dev_priv = to_i915(dev); - unsigned int tiling = i915_gem_object_get_tiling(obj); - int ret; - u32 pitch_limit, stride_alignment; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct drm_format_name_buf format_name; + u32 pitch_limit, stride_alignment; + unsigned int tiling, stride; + int ret = -EINVAL; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + i915_gem_object_lock(obj); + obj->framebuffer_references++; + tiling = i915_gem_object_get_tiling(obj); + stride = i915_gem_object_get_stride(obj); + i915_gem_object_unlock(obj); if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { /* @@ -15944,15 +14320,15 @@ static int intel_framebuffer_init(struct drm_device *dev, */ if (tiling != I915_TILING_NONE && tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { - DRM_DEBUG("tiling_mode doesn't match fb modifier\n"); - return -EINVAL; + DRM_DEBUG_KMS("tiling_mode doesn't match fb modifier\n"); + goto err; } } else { if (tiling == I915_TILING_X) { mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; } else if (tiling == I915_TILING_Y) { - DRM_DEBUG("No Y tiling for legacy addfb\n"); - return -EINVAL; + DRM_DEBUG_KMS("No Y tiling for legacy addfb\n"); + goto err; } } @@ -15961,17 +14337,17 @@ static int intel_framebuffer_init(struct drm_device *dev, case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: if (INTEL_GEN(dev_priv) < 9) { - DRM_DEBUG("Unsupported tiling 0x%llx!\n", - mode_cmd->modifier[0]); - return -EINVAL; + DRM_DEBUG_KMS("Unsupported tiling 0x%llx!\n", + mode_cmd->modifier[0]); + goto err; } - case DRM_FORMAT_MOD_NONE: + case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: break; default: - DRM_DEBUG("Unsupported fb modifier 0x%llx!\n", - mode_cmd->modifier[0]); - return -EINVAL; + DRM_DEBUG_KMS("Unsupported fb modifier 0x%llx!\n", + mode_cmd->modifier[0]); + goto err; } /* @@ -15980,39 +14356,28 @@ static int intel_framebuffer_init(struct drm_device *dev, */ if (INTEL_INFO(dev_priv)->gen < 4 && tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { - DRM_DEBUG("tiling_mode must match fb modifier exactly on gen2/3\n"); - return -EINVAL; - } - - stride_alignment = intel_fb_stride_alignment(dev_priv, - mode_cmd->modifier[0], - mode_cmd->pixel_format); - if (mode_cmd->pitches[0] & (stride_alignment - 1)) { - DRM_DEBUG("pitch (%d) must be at least %u byte aligned\n", - mode_cmd->pitches[0], stride_alignment); - return -EINVAL; + DRM_DEBUG_KMS("tiling_mode must match fb modifier exactly on gen2/3\n"); + goto err; } pitch_limit = intel_fb_pitch_limit(dev_priv, mode_cmd->modifier[0], mode_cmd->pixel_format); if (mode_cmd->pitches[0] > pitch_limit) { - DRM_DEBUG("%s pitch (%u) must be at less than %d\n", - mode_cmd->modifier[0] != DRM_FORMAT_MOD_NONE ? - "tiled" : "linear", - mode_cmd->pitches[0], pitch_limit); - return -EINVAL; + DRM_DEBUG_KMS("%s pitch (%u) must be at most %d\n", + mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ? + "tiled" : "linear", + mode_cmd->pitches[0], pitch_limit); + goto err; } /* * If there's a fence, enforce that * the fb pitch and fence stride match. */ - if (tiling != I915_TILING_NONE && - mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) { - DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", - mode_cmd->pitches[0], - i915_gem_object_get_stride(obj)); - return -EINVAL; + if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) { + DRM_DEBUG_KMS("pitch (%d) must match tiling stride (%d)\n", + mode_cmd->pitches[0], stride); + goto err; } /* Reject formats not supported by any plane early. */ @@ -16024,33 +14389,33 @@ static int intel_framebuffer_init(struct drm_device *dev, break; case DRM_FORMAT_XRGB1555: if (INTEL_GEN(dev_priv) > 3) { - DRM_DEBUG("unsupported pixel format: %s\n", - drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + DRM_DEBUG_KMS("unsupported pixel format: %s\n", + drm_get_format_name(mode_cmd->pixel_format, &format_name)); + goto err; } break; case DRM_FORMAT_ABGR8888: if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv) && INTEL_GEN(dev_priv) < 9) { - DRM_DEBUG("unsupported pixel format: %s\n", - drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + DRM_DEBUG_KMS("unsupported pixel format: %s\n", + drm_get_format_name(mode_cmd->pixel_format, &format_name)); + goto err; } break; case DRM_FORMAT_XBGR8888: case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_XBGR2101010: if (INTEL_GEN(dev_priv) < 4) { - DRM_DEBUG("unsupported pixel format: %s\n", - drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + DRM_DEBUG_KMS("unsupported pixel format: %s\n", + drm_get_format_name(mode_cmd->pixel_format, &format_name)); + goto err; } break; case DRM_FORMAT_ABGR2101010: if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) { - DRM_DEBUG("unsupported pixel format: %s\n", - drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + DRM_DEBUG_KMS("unsupported pixel format: %s\n", + drm_get_format_name(mode_cmd->pixel_format, &format_name)); + goto err; } break; case DRM_FORMAT_YUYV: @@ -16058,37 +14423,52 @@ static int intel_framebuffer_init(struct drm_device *dev, case DRM_FORMAT_YVYU: case DRM_FORMAT_VYUY: if (INTEL_GEN(dev_priv) < 5) { - DRM_DEBUG("unsupported pixel format: %s\n", - drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + DRM_DEBUG_KMS("unsupported pixel format: %s\n", + drm_get_format_name(mode_cmd->pixel_format, &format_name)); + goto err; } break; default: - DRM_DEBUG("unsupported pixel format: %s\n", - drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + DRM_DEBUG_KMS("unsupported pixel format: %s\n", + drm_get_format_name(mode_cmd->pixel_format, &format_name)); + goto err; } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ if (mode_cmd->offsets[0] != 0) - return -EINVAL; + goto err; + + drm_helper_mode_fill_fb_struct(&dev_priv->drm, + &intel_fb->base, mode_cmd); + + stride_alignment = intel_fb_stride_alignment(&intel_fb->base, 0); + if (mode_cmd->pitches[0] & (stride_alignment - 1)) { + DRM_DEBUG_KMS("pitch (%d) must be at least %u byte aligned\n", + mode_cmd->pitches[0], stride_alignment); + goto err; + } - drm_helper_mode_fill_fb_struct(dev, &intel_fb->base, mode_cmd); intel_fb->obj = obj; ret = intel_fill_fb_info(dev_priv, &intel_fb->base); if (ret) - return ret; + goto err; - ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs); + ret = drm_framebuffer_init(obj->base.dev, + &intel_fb->base, + &intel_fb_funcs); if (ret) { DRM_ERROR("framebuffer init failed %d\n", ret); - return ret; + goto err; } - intel_fb->obj->framebuffer_references++; - return 0; + +err: + i915_gem_object_lock(obj); + obj->framebuffer_references--; + i915_gem_object_unlock(obj); + return ret; } static struct drm_framebuffer * @@ -16104,7 +14484,7 @@ intel_user_framebuffer_create(struct drm_device *dev, if (!obj) return ERR_PTR(-ENOENT); - fb = intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) i915_gem_object_put(obj); @@ -16138,6 +14518,8 @@ static const struct drm_mode_config_funcs intel_mode_funcs = { */ void intel_init_display_hooks(struct drm_i915_private *dev_priv) { + intel_init_cdclk_hooks(dev_priv); + if (INTEL_INFO(dev_priv)->gen >= 9) { dev_priv->display.get_pipe_config = haswell_get_pipe_config; dev_priv->display.get_initial_plane_config = @@ -16206,62 +14588,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.crtc_disable = i9xx_crtc_disable; } - /* Returns the core display clock speed */ - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - dev_priv->display.get_display_clock_speed = - skylake_get_display_clock_speed; - else if (IS_GEN9_LP(dev_priv)) - dev_priv->display.get_display_clock_speed = - broxton_get_display_clock_speed; - else if (IS_BROADWELL(dev_priv)) - dev_priv->display.get_display_clock_speed = - broadwell_get_display_clock_speed; - else if (IS_HASWELL(dev_priv)) - dev_priv->display.get_display_clock_speed = - haswell_get_display_clock_speed; - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - dev_priv->display.get_display_clock_speed = - valleyview_get_display_clock_speed; - else if (IS_GEN5(dev_priv)) - dev_priv->display.get_display_clock_speed = - ilk_get_display_clock_speed; - else if (IS_I945G(dev_priv) || IS_I965G(dev_priv) || - IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) - dev_priv->display.get_display_clock_speed = - i945_get_display_clock_speed; - else if (IS_GM45(dev_priv)) - dev_priv->display.get_display_clock_speed = - gm45_get_display_clock_speed; - else if (IS_I965GM(dev_priv)) - dev_priv->display.get_display_clock_speed = - i965gm_get_display_clock_speed; - else if (IS_PINEVIEW(dev_priv)) - dev_priv->display.get_display_clock_speed = - pnv_get_display_clock_speed; - else if (IS_G33(dev_priv) || IS_G4X(dev_priv)) - dev_priv->display.get_display_clock_speed = - g33_get_display_clock_speed; - else if (IS_I915G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i915_get_display_clock_speed; - else if (IS_I945GM(dev_priv) || IS_I845G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i9xx_misc_get_display_clock_speed; - else if (IS_I915GM(dev_priv)) - dev_priv->display.get_display_clock_speed = - i915gm_get_display_clock_speed; - else if (IS_I865G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i865_get_display_clock_speed; - else if (IS_I85X(dev_priv)) - dev_priv->display.get_display_clock_speed = - i85x_get_display_clock_speed; - else { /* 830 */ - WARN(!IS_I830(dev_priv), "Unknown platform. Assuming 133 MHz CDCLK\n"); - dev_priv->display.get_display_clock_speed = - i830_get_display_clock_speed; - } - if (IS_GEN5(dev_priv)) { dev_priv->display.fdi_link_train = ironlake_fdi_link_train; } else if (IS_GEN6(dev_priv)) { @@ -16273,28 +14599,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.fdi_link_train = hsw_fdi_link_train; } - if (IS_BROADWELL(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - broadwell_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - broadwell_modeset_calc_cdclk; - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - valleyview_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - valleyview_modeset_calc_cdclk; - } else if (IS_GEN9_LP(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - bxt_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - bxt_modeset_calc_cdclk; - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - skl_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - skl_modeset_calc_cdclk; - } - if (dev_priv->info.gen >= 9) dev_priv->display.update_crtcs = skl_update_crtcs; else @@ -16521,8 +14825,7 @@ void intel_modeset_init_hw(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); intel_update_cdclk(dev_priv); - - dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; + dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; intel_init_clock_gating(dev_priv); } @@ -16577,7 +14880,8 @@ retry: * intermediate watermarks (since we don't trust the current * watermarks). */ - intel_state->skip_intermediate_wm = true; + if (!HAS_GMCH_DISPLAY(dev_priv)) + intel_state->skip_intermediate_wm = true; ret = intel_atomic_check(dev, state); if (ret) { @@ -16597,7 +14901,7 @@ retry: } /* Write calculated watermark values back */ - for_each_crtc_in_state(state, crtc, cstate, i) { + for_each_new_crtc_in_state(state, crtc, cstate, i) { struct intel_crtc_state *cs = to_intel_crtc_state(cstate); cs->wm.need_postvbl_update = true; @@ -16740,7 +15044,8 @@ int intel_modeset_init(struct drm_device *dev) * Note that we need to do this after reconstructing the BIOS fb's * since the watermark calculation done here will use pstate->fb. */ - sanitize_watermarks(dev); + if (!HAS_GMCH_DISPLAY(dev_priv)) + sanitize_watermarks(dev); return 0; } @@ -16748,24 +15053,31 @@ int intel_modeset_init(struct drm_device *dev) static void intel_enable_pipe_a(struct drm_device *dev) { struct intel_connector *connector; + struct drm_connector_list_iter conn_iter; struct drm_connector *crt = NULL; struct intel_load_detect_pipe load_detect_temp; struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx; + int ret; /* We can't just switch on the pipe A, we need to set things up with a * proper mode and output configuration. As a gross hack, enable pipe A * by enabling the load detect pipe once. */ - for_each_intel_connector(dev, connector) { + drm_connector_list_iter_begin(dev, &conn_iter); + for_each_intel_connector_iter(connector, &conn_iter) { if (connector->encoder->type == INTEL_OUTPUT_ANALOG) { crt = &connector->base; break; } } + drm_connector_list_iter_end(&conn_iter); if (!crt) return; - if (intel_get_load_detect_pipe(crt, NULL, &load_detect_temp, ctx)) + ret = intel_get_load_detect_pipe(crt, NULL, &load_detect_temp, ctx); + WARN(ret < 0, "All modeset mutexes are locked, but intel_get_load_detect_pipe failed\n"); + + if (ret > 0) intel_release_load_detect_pipe(crt, &load_detect_temp, ctx); } @@ -16842,6 +15154,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) continue; + trace_intel_disable_plane(&plane->base, crtc); plane->disable_plane(&plane->base, &crtc->base); } } @@ -16988,15 +15301,14 @@ static bool primary_get_hw_state(struct intel_plane *plane) /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct intel_crtc *crtc) { - struct drm_plane *primary = crtc->base.primary; - struct intel_plane_state *plane_state = - to_intel_plane_state(primary->state); + struct intel_plane *primary = to_intel_plane(crtc->base.primary); + bool visible; - plane_state->base.visible = crtc->active && - primary_get_hw_state(to_intel_plane(primary)); + visible = crtc->active && primary_get_hw_state(primary); - if (plane_state->base.visible) - crtc->base.state->plane_mask |= 1 << drm_plane_index(primary); + intel_set_plane_visible(to_intel_crtc_state(crtc->base.state), + to_intel_plane_state(primary->base.state), + visible); } static void intel_modeset_readout_hw_state(struct drm_device *dev) @@ -17006,6 +15318,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) struct intel_crtc *crtc; struct intel_encoder *encoder; struct intel_connector *connector; + struct drm_connector_list_iter conn_iter; int i; dev_priv->active_crtcs = 0; @@ -17076,7 +15389,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) pipe_name(pipe)); } - for_each_intel_connector(dev, connector) { + drm_connector_list_iter_begin(dev, &conn_iter); + for_each_intel_connector_iter(connector, &conn_iter) { if (connector->get_hw_state(connector)) { connector->base.dpms = DRM_MODE_DPMS_ON; @@ -17104,6 +15418,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) connector->base.base.id, connector->base.name, enableddisabled(connector->base.encoder)); } + drm_connector_list_iter_end(&conn_iter); for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = @@ -17129,10 +15444,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) */ crtc_state->base.mode.private_flags = I915_MODE_FLAG_INHERITED; - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - pixclk = ilk_pipe_pixel_rate(crtc_state); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - pixclk = crtc_state->base.adjusted_mode.crtc_clock; + intel_crtc_compute_pixel_rate(crtc_state); + + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + pixclk = crtc_state->pixel_rate; else WARN_ON(dev_priv->display.modeset_calc_cdclk); @@ -17150,6 +15466,24 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) } } +static void +get_encoder_power_domains(struct drm_i915_private *dev_priv) +{ + struct intel_encoder *encoder; + + for_each_intel_encoder(&dev_priv->drm, encoder) { + u64 get_domains; + enum intel_display_power_domain domain; + + if (!encoder->get_power_domains) + continue; + + get_domains = encoder->get_power_domains(encoder); + for_each_power_domain(domain, get_domains) + intel_display_power_get(dev_priv, domain); + } +} + /* Scan out the current hw modeset state, * and sanitizes it to the current state */ @@ -17165,6 +15499,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev) intel_modeset_readout_hw_state(dev); /* HW state is read out, now we need to sanitize this mess. */ + get_encoder_power_domains(dev_priv); + for_each_intel_encoder(dev, encoder) { intel_sanitize_encoder(encoder); } @@ -17191,15 +15527,17 @@ intel_modeset_setup_hw_state(struct drm_device *dev) pll->on = false; } - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_wm_get_hw_state(dev); - else if (IS_GEN9(dev_priv)) + vlv_wm_sanitize(dev_priv); + } else if (IS_GEN9(dev_priv)) { skl_wm_get_hw_state(dev); - else if (HAS_PCH_SPLIT(dev_priv)) + } else if (HAS_PCH_SPLIT(dev_priv)) { ilk_wm_get_hw_state(dev); + } for_each_intel_crtc(dev, crtc) { - unsigned long put_domains; + u64 put_domains; put_domains = modeset_get_crtc_power_domains(&crtc->base, crtc->config); if (WARN_ON(put_domains)) @@ -17207,6 +15545,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev) } intel_display_set_init_power(dev_priv, false); + intel_power_domains_verify_state(dev_priv); + intel_fbc_init_pipe_state(dev_priv); } @@ -17239,7 +15579,7 @@ void intel_display_resume(struct drm_device *dev) } if (!ret) - ret = __intel_display_resume(dev, state); + ret = __intel_display_resume(dev, state, &ctx); drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); @@ -17488,9 +15828,9 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv) void intel_display_print_error_state(struct drm_i915_error_state_buf *m, - struct drm_i915_private *dev_priv, struct intel_display_error_state *error) { + struct drm_i915_private *dev_priv = m->i915; int i; if (!error) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d1670b8afbf5..ee77b519835c 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -28,8 +28,10 @@ #include <linux/i2c.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/types.h> #include <linux/notifier.h> #include <linux/reboot.h> +#include <asm/byteorder.h> #include <drm/drmP.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> @@ -226,7 +228,7 @@ intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates) if (IS_GEN9_LP(dev_priv)) { *source_rates = bxt_rates; size = ARRAY_SIZE(bxt_rates); - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { *source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); } else { @@ -394,14 +396,12 @@ static void pps_lock(struct intel_dp *intel_dp) struct intel_encoder *encoder = &intel_dig_port->base; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; /* * See vlv_power_sequencer_reset() why we need * a power domain reference here. */ - power_domain = intel_display_port_aux_power_domain(encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); mutex_lock(&dev_priv->pps_mutex); } @@ -412,12 +412,10 @@ static void pps_unlock(struct intel_dp *intel_dp) struct intel_encoder *encoder = &intel_dig_port->base; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; mutex_unlock(&dev_priv->pps_mutex); - power_domain = intel_display_port_aux_power_domain(encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } static void @@ -916,7 +914,7 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * divide by 2000 and use that */ if (intel_dig_port->port == PORT_A) - return DIV_ROUND_CLOSEST(dev_priv->cdclk_freq, 2000); + return DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.cdclk, 2000); else return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); } @@ -1593,6 +1591,13 @@ static int intel_dp_compute_bpp(struct intel_dp *intel_dp, if (bpc > 0) bpp = min(bpp, 3*bpc); + /* For DP Compliance we override the computed bpp for the pipe */ + if (intel_dp->compliance.test_data.bpc != 0) { + pipe_config->pipe_bpp = 3*intel_dp->compliance.test_data.bpc; + pipe_config->dither_force_disable = pipe_config->pipe_bpp == 6*3; + DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", + pipe_config->pipe_bpp); + } return bpp; } @@ -1613,6 +1618,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Conveniently, the link BW constants become indices with a shift...*/ int min_clock = 0; int max_clock; + int link_rate_index; int bpp, mode_rate; int link_avail, link_clock; int common_rates[DP_MAX_SUPPORTED_RATES] = {}; @@ -1654,6 +1660,15 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return false; + /* Use values requested by Compliance Test Request */ + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + link_rate_index = intel_dp_link_rate_index(intel_dp, + common_rates, + intel_dp->compliance.test_link_rate); + if (link_rate_index >= 0) + min_clock = max_clock = link_rate_index; + min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; + } DRM_DEBUG_KMS("DP link computation with max lane count %i " "max bw %d pixel clock %iKHz\n", max_lane_count, common_rates[max_clock], @@ -1753,8 +1768,7 @@ found: * DPLL0 VCO may need to be adjusted to get the correct * clock for eDP. This will affect cdclk as well. */ - if (is_edp(intel_dp) && - (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) { + if (is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) { int vco; switch (pipe_config->port_clock / 2) { @@ -1767,7 +1781,7 @@ found: break; } - to_intel_atomic_state(pipe_config->base.state)->cdclk_pll_vco = vco; + to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco; } if (!HAS_DDI(dev_priv)) @@ -1987,9 +2001,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_stat_reg, pp_ctrl_reg; bool need_to_disable = !intel_dp->want_panel_vdd; @@ -2005,8 +2017,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) if (edp_have_panel_vdd(intel_dp)) return need_to_disable; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); DRM_DEBUG_KMS("Turning eDP port %c VDD on\n", port_name(intel_dig_port->port)); @@ -2064,8 +2075,6 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_stat_reg, pp_ctrl_reg; @@ -2095,8 +2104,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) if ((pp & PANEL_POWER_ON) == 0) intel_dp->panel_power_off_time = ktime_get_boottime(); - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } static void edp_panel_vdd_work(struct work_struct *__work) @@ -2209,11 +2217,8 @@ void intel_edp_panel_on(struct intel_dp *intel_dp) static void edp_panel_off(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_dp_to_dev(intel_dp); struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_ctrl_reg; @@ -2245,8 +2250,7 @@ static void edp_panel_off(struct intel_dp *intel_dp) wait_panel_off(intel_dp); /* We got a reference when we enabled the VDD. */ - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } void intel_edp_panel_off(struct intel_dp *intel_dp) @@ -2492,12 +2496,11 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, enum port port = dp_to_dig_port(intel_dp)->port; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -2533,7 +2536,7 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -3080,9 +3083,8 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) if (IS_GEN9_LP(dev_priv)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else if (INTEL_GEN(dev_priv) >= 9) { - if (dev_priv->vbt.edp.low_vswing && port == PORT_A) - return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; - return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + return intel_ddi_dp_voltage_max(encoder); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else if (IS_GEN7(dev_priv) && port == PORT_A) @@ -3922,19 +3924,112 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_ACK; - return test_result; + int status = 0; + int min_lane_count = 1; + int common_rates[DP_MAX_SUPPORTED_RATES] = {}; + int link_rate_index, test_link_rate; + uint8_t test_lane_count, test_link_bw; + /* (DP CTS 1.2) + * 4.3.1.11 + */ + /* Read the TEST_LANE_COUNT and TEST_LINK_RTAE fields (DP CTS 3.1.4) */ + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LANE_COUNT, + &test_lane_count); + + if (status <= 0) { + DRM_DEBUG_KMS("Lane count read failed\n"); + return DP_TEST_NAK; + } + test_lane_count &= DP_MAX_LANE_COUNT_MASK; + /* Validate the requested lane count */ + if (test_lane_count < min_lane_count || + test_lane_count > intel_dp->max_sink_lane_count) + return DP_TEST_NAK; + + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE, + &test_link_bw); + if (status <= 0) { + DRM_DEBUG_KMS("Link Rate read failed\n"); + return DP_TEST_NAK; + } + /* Validate the requested link rate */ + test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw); + link_rate_index = intel_dp_link_rate_index(intel_dp, + common_rates, + test_link_rate); + if (link_rate_index < 0) + return DP_TEST_NAK; + + intel_dp->compliance.test_lane_count = test_lane_count; + intel_dp->compliance.test_link_rate = test_link_rate; + + return DP_TEST_ACK; } static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_NAK; - return test_result; + uint8_t test_pattern; + uint16_t test_misc; + __be16 h_width, v_height; + int status = 0; + + /* Read the TEST_PATTERN (DP CTS 3.1.5) */ + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_PATTERN, + &test_pattern, 1); + if (status <= 0) { + DRM_DEBUG_KMS("Test pattern read failed\n"); + return DP_TEST_NAK; + } + if (test_pattern != DP_COLOR_RAMP) + return DP_TEST_NAK; + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_H_WIDTH_HI, + &h_width, 2); + if (status <= 0) { + DRM_DEBUG_KMS("H Width read failed\n"); + return DP_TEST_NAK; + } + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_V_HEIGHT_HI, + &v_height, 2); + if (status <= 0) { + DRM_DEBUG_KMS("V Height read failed\n"); + return DP_TEST_NAK; + } + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_MISC0, + &test_misc, 1); + if (status <= 0) { + DRM_DEBUG_KMS("TEST MISC read failed\n"); + return DP_TEST_NAK; + } + if ((test_misc & DP_TEST_COLOR_FORMAT_MASK) != DP_COLOR_FORMAT_RGB) + return DP_TEST_NAK; + if (test_misc & DP_TEST_DYNAMIC_RANGE_CEA) + return DP_TEST_NAK; + switch (test_misc & DP_TEST_BIT_DEPTH_MASK) { + case DP_TEST_BIT_DEPTH_6: + intel_dp->compliance.test_data.bpc = 6; + break; + case DP_TEST_BIT_DEPTH_8: + intel_dp->compliance.test_data.bpc = 8; + break; + default: + return DP_TEST_NAK; + } + + intel_dp->compliance.test_data.video_pattern = test_pattern; + intel_dp->compliance.test_data.hdisplay = be16_to_cpu(h_width); + intel_dp->compliance.test_data.vdisplay = be16_to_cpu(v_height); + /* Set test active flag here so userspace doesn't interrupt things */ + intel_dp->compliance.test_active = 1; + + return DP_TEST_ACK; } static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_NAK; + uint8_t test_result = DP_TEST_ACK; struct intel_connector *intel_connector = intel_dp->attached_connector; struct drm_connector *connector = &intel_connector->base; @@ -3969,7 +4064,7 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) DRM_DEBUG_KMS("Failed to write EDID checksum\n"); test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE; - intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_STANDARD; + intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_PREFERRED; } /* Set test active flag here so userspace doesn't interrupt things */ @@ -3987,45 +4082,42 @@ static uint8_t intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) static void intel_dp_handle_test_request(struct intel_dp *intel_dp) { uint8_t response = DP_TEST_NAK; - uint8_t rxdata = 0; - int status = 0; + uint8_t request = 0; + int status; - status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_REQUEST, &rxdata, 1); + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_REQUEST, &request); if (status <= 0) { DRM_DEBUG_KMS("Could not read test request from sink\n"); goto update_status; } - switch (rxdata) { + switch (request) { case DP_TEST_LINK_TRAINING: DRM_DEBUG_KMS("LINK_TRAINING test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_TRAINING; response = intel_dp_autotest_link_training(intel_dp); break; case DP_TEST_LINK_VIDEO_PATTERN: DRM_DEBUG_KMS("TEST_PATTERN test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_VIDEO_PATTERN; response = intel_dp_autotest_video_pattern(intel_dp); break; case DP_TEST_LINK_EDID_READ: DRM_DEBUG_KMS("EDID test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_EDID_READ; response = intel_dp_autotest_edid(intel_dp); break; case DP_TEST_LINK_PHY_TEST_PATTERN: DRM_DEBUG_KMS("PHY_PATTERN test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_PHY_TEST_PATTERN; response = intel_dp_autotest_phy_pattern(intel_dp); break; default: - DRM_DEBUG_KMS("Invalid test request '%02x'\n", rxdata); + DRM_DEBUG_KMS("Invalid test request '%02x'\n", request); break; } + if (response & DP_TEST_ACK) + intel_dp->compliance.test_type = request; + update_status: - status = drm_dp_dpcd_write(&intel_dp->aux, - DP_TEST_RESPONSE, - &response, 1); + status = drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_RESPONSE, response); if (status <= 0) DRM_DEBUG_KMS("Could not write test response to sink\n"); } @@ -4137,9 +4229,8 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) if (!intel_dp->lane_count) return; - /* if link training is requested we should perform it always */ - if ((intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) || - (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count))) { + /* Retrain if Channel EQ or CR not ok */ + if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n", intel_encoder->base.name); @@ -4164,6 +4255,7 @@ static bool intel_dp_short_pulse(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); + struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; u8 sink_irq_vector = 0; u8 old_sink_count = intel_dp->sink_count; bool ret; @@ -4197,7 +4289,7 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) sink_irq_vector); if (sink_irq_vector & DP_AUTOMATED_TEST_REQUEST) - DRM_DEBUG_DRIVER("Test request in short pulse not handled\n"); + intel_dp_handle_test_request(intel_dp); if (sink_irq_vector & (DP_CP_IRQ | DP_SINK_SPECIFIC_IRQ)) DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n"); } @@ -4205,6 +4297,11 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); intel_dp_check_link_status(intel_dp); drm_modeset_unlock(&dev->mode_config.connection_mutex); + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + DRM_DEBUG_KMS("Link Training Compliance Test requested\n"); + /* Send a Hotplug Uevent to userspace to start modeset */ + drm_kms_helper_hotplug_event(intel_encoder->base.dev); + } return true; } @@ -4213,9 +4310,13 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) static enum drm_connector_status intel_dp_detect_dpcd(struct intel_dp *intel_dp) { + struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp); uint8_t *dpcd = intel_dp->dpcd; uint8_t type; + if (lspcon->active) + lspcon_resume(lspcon); + if (!intel_dp_get_dpcd(intel_dp)) return connector_status_disconnected; @@ -4465,7 +4566,7 @@ intel_dp_unset_edid(struct intel_dp *intel_dp) intel_dp->has_audio = false; } -static enum drm_connector_status +static int intel_dp_long_pulse(struct intel_connector *intel_connector) { struct drm_connector *connector = &intel_connector->base; @@ -4474,11 +4575,11 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = connector->dev; enum drm_connector_status status; - enum intel_display_power_domain power_domain; u8 sink_irq_vector = 0; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(to_i915(dev), power_domain); + WARN_ON(!drm_modeset_is_locked(&connector->dev->mode_config.connection_mutex)); + + intel_display_power_get(to_i915(dev), intel_dp->aux_power_domain); /* Can't disconnect eDP, but you can close the lid... */ if (is_edp(intel_dp)) @@ -4511,11 +4612,15 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) yesno(intel_dp_source_supports_hbr2(intel_dp)), yesno(drm_dp_tps3_supported(intel_dp->dpcd))); - /* Set the max lane count for sink */ - intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + if (intel_dp->reset_link_params) { + /* Set the max lane count for sink */ + intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + + /* Set the max link BW for sink */ + intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); - /* Set the max link BW for sink */ - intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); + intel_dp->reset_link_params = false; + } intel_dp_print_rates(intel_dp); @@ -4531,16 +4636,20 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) */ status = connector_status_disconnected; goto out; - } else if (connector->status == connector_status_connected) { + } else { /* - * If display was connected already and is still connected - * check links status, there has been known issues of - * link loss triggerring long pulse!!!! + * If display is now connected check links status, + * there has been known issues of link loss triggerring + * long pulse. + * + * Some sinks (eg. ASUS PB287Q) seem to perform some + * weird HPD ping pong during modesets. So we can apparently + * end up with HPD going low during a modeset, and then + * going back up soon after. And once that happens we must + * retrain the link to get a picture. That's in case no + * userspace component reacted to intermittent HPD dip. */ - drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); intel_dp_check_link_status(intel_dp); - drm_modeset_unlock(&dev->mode_config.connection_mutex); - goto out; } /* @@ -4575,15 +4684,17 @@ out: if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); - intel_display_power_put(to_i915(dev), power_domain); + intel_display_power_put(to_i915(dev), intel_dp->aux_power_domain); return status; } -static enum drm_connector_status -intel_dp_detect(struct drm_connector *connector, bool force) +static int +intel_dp_detect(struct drm_connector *connector, + struct drm_modeset_acquire_ctx *ctx, + bool force) { struct intel_dp *intel_dp = intel_attached_dp(connector); - enum drm_connector_status status = connector->status; + int status = connector->status; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -4603,7 +4714,6 @@ intel_dp_force(struct drm_connector *connector) struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - enum intel_display_power_domain power_domain; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -4612,12 +4722,11 @@ intel_dp_force(struct drm_connector *connector) if (connector->status != connector_status_connected) return; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); intel_dp_set_edid(intel_dp); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); if (intel_encoder->type != INTEL_OUTPUT_EDP) intel_encoder->type = INTEL_OUTPUT_DP; @@ -4852,7 +4961,6 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; lockdep_assert_held(&dev_priv->pps_mutex); @@ -4866,8 +4974,7 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) * indefinitely. */ DRM_DEBUG_KMS("VDD left on by BIOS, adjusting state tracking\n"); - power_domain = intel_display_port_aux_power_domain(&intel_dig_port->base); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); edp_panel_vdd_schedule_off(intel_dp); } @@ -4897,6 +5004,8 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) if (lspcon->active) lspcon_resume(lspcon); + intel_dp->reset_link_params = true; + pps_lock(intel_dp); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) @@ -4913,7 +5022,6 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) static const struct drm_connector_funcs intel_dp_connector_funcs = { .dpms = drm_atomic_helper_connector_dpms, - .detect = intel_dp_detect, .force = intel_dp_force, .fill_modes = drm_helper_probe_single_connector_modes, .set_property = intel_dp_set_property, @@ -4926,6 +5034,7 @@ static const struct drm_connector_funcs intel_dp_connector_funcs = { }; static const struct drm_connector_helper_funcs intel_dp_connector_helper_funcs = { + .detect_ctx = intel_dp_detect, .get_modes = intel_dp_get_modes, .mode_valid = intel_dp_mode_valid, }; @@ -4939,10 +5048,8 @@ enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) { struct intel_dp *intel_dp = &intel_dig_port->dp; - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; enum irqreturn ret = IRQ_NONE; if (intel_dig_port->base.type != INTEL_OUTPUT_EDP && @@ -4966,12 +5073,12 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) long_hpd ? "long" : "short"); if (long_hpd) { + intel_dp->reset_link_params = true; intel_dp->detect_done = false; return IRQ_NONE; } - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); if (intel_dp->is_mst) { if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { @@ -4999,7 +5106,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) ret = IRQ_HANDLED; put_power: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); return ret; } @@ -5790,6 +5897,41 @@ out_vdd_off: return false; } +/* Set up the hotplug pin and aux power domain. */ +static void +intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) +{ + struct intel_encoder *encoder = &intel_dig_port->base; + struct intel_dp *intel_dp = &intel_dig_port->dp; + + switch (intel_dig_port->port) { + case PORT_A: + encoder->hpd_pin = HPD_PORT_A; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_A; + break; + case PORT_B: + encoder->hpd_pin = HPD_PORT_B; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_B; + break; + case PORT_C: + encoder->hpd_pin = HPD_PORT_C; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_C; + break; + case PORT_D: + encoder->hpd_pin = HPD_PORT_D; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; + break; + case PORT_E: + encoder->hpd_pin = HPD_PORT_E; + + /* FIXME: Check VBT for actual wiring of PORT E */ + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; + break; + default: + MISSING_CASE(intel_dig_port->port); + } +} + bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector) @@ -5807,6 +5949,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dig_port->max_lanes, port_name(port))) return false; + intel_dp->reset_link_params = true; intel_dp->pps_pipe = INVALID_PIPE; intel_dp->active_pipe = INVALID_PIPE; @@ -5863,6 +6006,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, connector->interlace_allowed = true; connector->doublescan_allowed = 0; + intel_dp_init_connector_port_info(intel_dig_port); + intel_dp_aux_init(intel_dp); INIT_DELAYED_WORK(&intel_dp->panel_vdd_work, @@ -5875,29 +6020,6 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, else intel_connector->get_hw_state = intel_connector_get_hw_state; - /* Set up the hotplug pin. */ - switch (port) { - case PORT_A: - intel_encoder->hpd_pin = HPD_PORT_A; - break; - case PORT_B: - intel_encoder->hpd_pin = HPD_PORT_B; - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - intel_encoder->hpd_pin = HPD_PORT_A; - break; - case PORT_C: - intel_encoder->hpd_pin = HPD_PORT_C; - break; - case PORT_D: - intel_encoder->hpd_pin = HPD_PORT_D; - break; - case PORT_E: - intel_encoder->hpd_pin = HPD_PORT_E; - break; - default: - BUG(); - } - /* init MST on ports that can support it */ if (HAS_DP_MST(dev_priv) && !is_edp(intel_dp) && (port == PORT_B || port == PORT_C || port == PORT_D)) @@ -5982,6 +6104,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_dig_port->max_lanes = 4; intel_encoder->type = INTEL_OUTPUT_DP; + intel_encoder->power_domain = intel_port_to_power_domain(port); if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) intel_encoder->crtc_mask = 1 << 2; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 38e3ca2f6f18..c1f62eb07c07 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -47,6 +47,11 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = false; bpp = 24; + if (intel_dp->compliance.test_data.bpc) { + bpp = intel_dp->compliance.test_data.bpc * 3; + DRM_DEBUG_KMS("Setting pipe bpp to %d\n", + bpp); + } /* * for MST we always configure max link bw - the spec doesn't * seem to suggest we should do otherwise. @@ -55,7 +60,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->lane_count = lane_count; - pipe_config->pipe_bpp = 24; + pipe_config->pipe_bpp = bpp; pipe_config->port_clock = intel_dp_max_link_rate(intel_dp); state = pipe_config->base.state; @@ -87,7 +92,6 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = &intel_dig_port->dp; struct intel_connector *connector = to_intel_connector(old_conn_state->connector); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int ret; DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); @@ -98,10 +102,8 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, if (ret) { DRM_ERROR("failed to update payload %d\n", ret); } - if (old_crtc_state->has_audio) { + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); - } } static void intel_mst_post_disable_dp(struct intel_encoder *encoder, @@ -147,7 +149,6 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, to_intel_connector(conn_state->connector); int ret; uint32_t temp; - int slots; /* MST encoders are bound to a crtc, not to a connector, * force the mapping here for get_hw_state. @@ -157,27 +158,14 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); - if (intel_dp->active_mst_links == 0) { - intel_ddi_clk_select(&intel_dig_port->base, - pipe_config->shared_dpll); - - intel_prepare_dp_ddi_buffers(&intel_dig_port->base); - intel_dp_set_link_params(intel_dp, - pipe_config->port_clock, - pipe_config->lane_count, - true); - - intel_ddi_init_dp_buf_reg(&intel_dig_port->base); - - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); - - intel_dp_start_link_train(intel_dp); - intel_dp_stop_link_train(intel_dp); - } + if (intel_dp->active_mst_links == 0) + intel_dig_port->base.pre_enable(&intel_dig_port->base, + pipe_config, NULL); ret = drm_dp_mst_allocate_vcpi(&intel_dp->mst_mgr, connector->port, - pipe_config->pbn, &slots); + pipe_config->pbn, + pipe_config->dp_m_n.tu); if (ret == false) { DRM_ERROR("failed to allocate vcpi\n"); return; @@ -214,10 +202,8 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder, ret = drm_dp_check_act_status(&intel_dp->mst_mgr); ret = drm_dp_update_payload_part2(&intel_dp->mst_mgr); - if (pipe_config->has_audio) { - intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + if (pipe_config->has_audio) intel_audio_codec_enable(encoder, pipe_config, conn_state); - } } static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, @@ -548,6 +534,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum DRM_MODE_ENCODER_DPMST, "DP-MST %c", pipe_name(pipe)); intel_encoder->type = INTEL_OUTPUT_DP_MST; + intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->port; intel_encoder->crtc_mask = 0x7; intel_encoder->cloneable = 0; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index e59e43a9f3a6..b4de632f1158 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -42,44 +42,6 @@ * commit phase. */ -struct intel_shared_dpll * -skl_find_link_pll(struct drm_i915_private *dev_priv, int clock) -{ - struct intel_shared_dpll *pll = NULL; - struct intel_dpll_hw_state dpll_hw_state; - enum intel_dpll_id i; - bool found = false; - - if (!skl_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state)) - return pll; - - for (i = DPLL_ID_SKL_DPLL1; i <= DPLL_ID_SKL_DPLL3; i++) { - pll = &dev_priv->shared_dplls[i]; - - /* Only want to check enabled timings first */ - if (pll->state.crtc_mask == 0) - continue; - - if (memcmp(&dpll_hw_state, &pll->state.hw_state, - sizeof(pll->state.hw_state)) == 0) { - found = true; - break; - } - } - - /* Ok no matching timings, maybe there's a free one? */ - for (i = DPLL_ID_SKL_DPLL1; - ((found == false) && (i <= DPLL_ID_SKL_DPLL3)); i++) { - pll = &dev_priv->shared_dplls[i]; - if (pll->state.crtc_mask == 0) { - pll->state.hw_state = dpll_hw_state; - break; - } - } - - return pll; -} - static void intel_atomic_duplicate_dpll_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll_state *shared_dpll) @@ -811,8 +773,8 @@ static struct intel_shared_dpll *hsw_ddi_hdmi_get_dpll(int clock, return pll; } -struct intel_shared_dpll *hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, - int clock) +static struct intel_shared_dpll * +hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, int clock) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_shared_dpll *pll; @@ -1360,8 +1322,9 @@ static bool skl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, } -bool skl_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state) +static bool +skl_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) { uint32_t ctrl1; @@ -1816,8 +1779,9 @@ static bool bxt_ddi_set_dpll_hw_state(int clock, return true; } -bool bxt_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state) +static bool +bxt_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) { struct bxt_clk_div clk_div = {0}; @@ -2016,7 +1980,7 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) dpll_mgr = &skl_pll_mgr; else if (IS_GEN9_LP(dev_priv)) dpll_mgr = &bxt_pll_mgr; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index af1497eb4f9c..f8d13a947c13 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -282,20 +282,4 @@ void intel_shared_dpll_init(struct drm_device *dev); void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state); -/* BXT dpll related functions */ -bool bxt_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state); - - -/* SKL dpll related functions */ -bool skl_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state); -struct intel_shared_dpll *skl_find_link_pll(struct drm_i915_private *dev_priv, - int clock); - - -/* HSW dpll related functions */ -struct intel_shared_dpll *hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, - int clock); - #endif /* _INTEL_DPLL_MGR_H_ */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 344f238b283f..aaee3949a422 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -242,6 +242,9 @@ struct intel_encoder { * be set correctly before calling this function. */ void (*get_config)(struct intel_encoder *, struct intel_crtc_state *pipe_config); + /* Returns a mask of power domains that need to be referenced as part + * of the hardware state readout code. */ + u64 (*get_power_domains)(struct intel_encoder *encoder); /* * Called during system suspend after all pending requests for the * encoder are flushed (for example for DP AUX transactions) and @@ -250,6 +253,7 @@ struct intel_encoder { void (*suspend)(struct intel_encoder *); int crtc_mask; enum hpd_pin hpd_pin; + enum intel_display_power_domain power_domain; /* for communication with audio component; protected by av_mutex */ const struct drm_connector *audio_connector; }; @@ -334,13 +338,20 @@ struct dpll { struct intel_atomic_state { struct drm_atomic_state base; - unsigned int cdclk; - - /* - * Calculated device cdclk, can be different from cdclk - * only when all crtc's are DPMS off. - */ - unsigned int dev_cdclk; + struct { + /* + * Logical state of cdclk (used for all scaling, watermark, + * etc. calculations and checks). This is computed as if all + * enabled crtcs were active. + */ + struct intel_cdclk_state logical; + + /* + * Actual state of cdclk, can be different from the logical + * state only when all crtc's are DPMS off. + */ + struct intel_cdclk_state actual; + } cdclk; bool dpll_set, modeset; @@ -357,9 +368,6 @@ struct intel_atomic_state { unsigned int active_crtcs; unsigned int min_pixclk[I915_MAX_PIPES]; - /* SKL/KBL Only */ - unsigned int cdclk_pll_vco; - struct intel_shared_dpll_state shared_dpll[I915_NUM_PLLS]; /* @@ -390,6 +398,9 @@ struct intel_plane_state { int x, y; } aux; + /* plane control register */ + u32 ctl; + /* * scaler_id * = -1 : not using a scaler @@ -485,6 +496,24 @@ struct skl_pipe_wm { uint32_t linetime; }; +enum vlv_wm_level { + VLV_WM_LEVEL_PM2, + VLV_WM_LEVEL_PM5, + VLV_WM_LEVEL_DDR_DVFS, + NUM_VLV_WM_LEVELS, +}; + +struct vlv_wm_state { + struct vlv_pipe_wm wm[NUM_VLV_WM_LEVELS]; + struct vlv_sr_wm sr[NUM_VLV_WM_LEVELS]; + uint8_t num_levels; + bool cxsr; +}; + +struct vlv_fifo_state { + u16 plane[I915_MAX_PLANES]; +}; + struct intel_crtc_wm_state { union { struct { @@ -509,6 +538,17 @@ struct intel_crtc_wm_state { struct skl_pipe_wm optimal; struct skl_ddb_entry ddb; } skl; + + struct { + /* "raw" watermarks (not inverted) */ + struct vlv_pipe_wm raw[NUM_VLV_WM_LEVELS]; + /* intermediate watermarks (inverted) */ + struct vlv_wm_state intermediate; + /* optimal watermarks (inverted) */ + struct vlv_wm_state optimal; + /* display FIFO split */ + struct vlv_fifo_state fifo_state; + } vlv; }; /* @@ -539,12 +579,19 @@ struct intel_crtc_state { bool disable_cxsr; bool update_wm_pre, update_wm_post; /* watermarks are updated */ bool fb_changed; /* fb on any of the planes is changed */ + bool fifo_changed; /* FIFO split is changed */ /* Pipe source size (ie. panel fitter input size) * All planes will be positioned inside this space, * and get clipped at the edges. */ int pipe_src_w, pipe_src_h; + /* + * Pipe pixel rate, adjusted for + * panel fitter/pipe scaler downscaling. + */ + unsigned int pixel_rate; + /* Whether to set up the PCH/FDI. Note that we never allow sharing * between pch encoders and cpu encoders. */ bool has_pch_encoder; @@ -581,6 +628,14 @@ struct intel_crtc_state { */ bool dither; + /* + * Dither gets enabled for 18bpp which causes CRC mismatch errors for + * compliance video pattern tests. + * Disable dither only if it is a compliance test request for + * 18bpp. + */ + bool dither_force_disable; + /* Controls for the clock computation, to override various stages. */ bool clock_set; @@ -674,15 +729,15 @@ struct intel_crtc_state { /* Gamma mode programmed on the pipe */ uint32_t gamma_mode; -}; -struct vlv_wm_state { - struct vlv_pipe_wm wm[3]; - struct vlv_sr_wm sr[3]; - uint8_t num_active_planes; - uint8_t num_levels; - uint8_t level; - bool cxsr; + /* bitmask of visible planes (enum plane_id) */ + u8 active_planes; + + /* HDMI scrambling status */ + bool hdmi_scrambling; + + /* HDMI High TMDS char rate ratio */ + bool hdmi_high_tmds_clock_ratio; }; struct intel_crtc { @@ -698,7 +753,7 @@ struct intel_crtc { bool active; bool lowfreq_avail; u8 plane_ids_mask; - unsigned long enabled_power_domains; + unsigned long long enabled_power_domains; struct intel_overlay *overlay; struct intel_flip_work *flip_work; @@ -730,10 +785,8 @@ struct intel_crtc { /* watermarks currently being used */ union { struct intel_pipe_wm ilk; + struct vlv_wm_state vlv; } active; - - /* allow CxSR on this pipe */ - bool cxsr_allowed; } wm; int scanline_offset; @@ -747,27 +800,6 @@ struct intel_crtc { /* scalers available on this crtc */ int num_scalers; - - struct vlv_wm_state wm_state; -}; - -struct intel_plane_wm_parameters { - uint32_t horiz_pixels; - uint32_t vert_pixels; - /* - * For packed pixel formats: - * bytes_per_pixel - holds bytes per pixel - * For planar pixel formats: - * bytes_per_pixel - holds bytes per pixel for uv-plane - * y_bytes_per_pixel - holds bytes per pixel for y-plane - */ - uint8_t bytes_per_pixel; - uint8_t y_bytes_per_pixel; - bool enabled; - bool scaled; - u64 tiling; - unsigned int rotation; - uint16_t fifo_size; }; struct intel_plane { @@ -779,13 +811,6 @@ struct intel_plane { int max_downscale; uint32_t frontbuffer_bit; - /* Since we need to change the watermarks before/after - * enabling/disabling the planes, we need to store the parameters here - * as the other pieces of the struct may not reflect the values we want - * for the watermark calculations. Currently only Haswell uses this. - */ - struct intel_plane_wm_parameters wm; - /* * NOTE: Do not place new plane state fields here (e.g., when adding * new plane properties). New runtime state should now be placed in @@ -891,12 +916,17 @@ struct intel_dp_desc { struct intel_dp_compliance_data { unsigned long edid; + uint8_t video_pattern; + uint16_t hdisplay, vdisplay; + uint8_t bpc; }; struct intel_dp_compliance { unsigned long test_type; struct intel_dp_compliance_data test_data; bool test_active; + int test_link_rate; + u8 test_lane_count; }; struct intel_dp { @@ -911,6 +941,7 @@ struct intel_dp { bool has_audio; bool detect_done; bool channel_eq_status; + bool reset_link_params; enum hdmi_force_audio force_audio; bool limited_color_range; bool color_range_auto; @@ -928,6 +959,7 @@ struct intel_dp { /* sink or branch descriptor */ struct intel_dp_desc desc; struct drm_dp_aux aux; + enum intel_display_power_domain aux_power_domain; uint8_t train_set[4]; int panel_power_up_delay; int panel_power_down_delay; @@ -990,7 +1022,6 @@ struct intel_dp { struct intel_lspcon { bool active; enum drm_lspcon_mode mode; - bool desc_valid; }; struct intel_digital_port { @@ -1003,6 +1034,7 @@ struct intel_digital_port { enum irqreturn (*hpd_pulse)(struct intel_digital_port *, bool); bool release_cl2_override; uint8_t max_lanes; + enum intel_display_power_domain ddi_io_power_domain; }; struct intel_dp_mst_encoder { @@ -1097,7 +1129,19 @@ intel_attached_encoder(struct drm_connector *connector) static inline struct intel_digital_port * enc_to_dig_port(struct drm_encoder *encoder) { - return container_of(encoder, struct intel_digital_port, base.base); + struct intel_encoder *intel_encoder = to_intel_encoder(encoder); + + switch (intel_encoder->type) { + case INTEL_OUTPUT_UNKNOWN: + WARN_ON(!HAS_DDI(to_i915(encoder->dev))); + case INTEL_OUTPUT_DP: + case INTEL_OUTPUT_EDP: + case INTEL_OUTPUT_HDMI: + return container_of(encoder, struct intel_digital_port, + base.base); + default: + return NULL; + } } static inline struct intel_dp_mst_encoder * @@ -1153,7 +1197,13 @@ void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv); -u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask); + +static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, + u32 mask) +{ + return mask & ~i915->rps.pm_intrmsk_mbz; +} + void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv); static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv) @@ -1179,46 +1229,38 @@ void intel_crt_init(struct drm_i915_private *dev_priv); void intel_crt_reset(struct drm_encoder *encoder); /* intel_ddi.c */ -void intel_ddi_clk_select(struct intel_encoder *encoder, - struct intel_shared_dpll *pll); void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state); -void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder); -void hsw_fdi_link_train(struct drm_crtc *crtc); +void hsw_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); -void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc); +void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state); void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder); -void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc); -void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc); -bool intel_ddi_pll_select(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state); -void intel_ddi_set_pipe_settings(struct drm_crtc *crtc); +void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state); +void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state); +struct intel_encoder * +intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state); +void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state); void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, struct intel_crtc *intel_crtc); void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); -struct intel_encoder * -intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state); -void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder); void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); -void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); +void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, + bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); -struct intel_shared_dpll *intel_ddi_get_link_dpll(struct intel_dp *intel_dp, - int clock); -unsigned int intel_fb_align_height(struct drm_device *dev, - unsigned int height, - uint32_t pixel_format, - uint64_t fb_format_modifier); -u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv, - uint64_t fb_modifier, uint32_t pixel_format); +u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); + +unsigned int intel_fb_align_height(const struct drm_framebuffer *fb, + int plane, unsigned int height); /* intel_audio.c */ void intel_init_audio_hooks(struct drm_i915_private *dev_priv); @@ -1231,12 +1273,28 @@ void i915_audio_component_cleanup(struct drm_i915_private *dev_priv); void intel_audio_init(struct drm_i915_private *dev_priv); void intel_audio_deinit(struct drm_i915_private *dev_priv); +/* intel_cdclk.c */ +void skl_init_cdclk(struct drm_i915_private *dev_priv); +void skl_uninit_cdclk(struct drm_i915_private *dev_priv); +void bxt_init_cdclk(struct drm_i915_private *dev_priv); +void bxt_uninit_cdclk(struct drm_i915_private *dev_priv); +void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); +void intel_update_max_cdclk(struct drm_i915_private *dev_priv); +void intel_update_cdclk(struct drm_i915_private *dev_priv); +void intel_update_rawclk(struct drm_i915_private *dev_priv); +bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b); +void intel_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state); + /* intel_display.c */ enum transcoder intel_crtc_pch_transcoder(struct intel_crtc *crtc); -void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco); void intel_update_rawclk(struct drm_i915_private *dev_priv); +int vlv_get_hpll_vco(struct drm_i915_private *dev_priv); int vlv_get_cck_clock(struct drm_i915_private *dev_priv, const char *name, u32 reg, int ref_freq); +int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, + const char *name, u32 reg); void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv); void lpt_disable_iclkip(struct drm_i915_private *dev_priv); extern const struct drm_plane_funcs intel_plane_funcs; @@ -1300,10 +1358,10 @@ int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp); void vlv_wait_port_ready(struct drm_i915_private *dev_priv, struct intel_digital_port *dport, unsigned int expected_mask); -bool intel_get_load_detect_pipe(struct drm_connector *connector, - struct drm_display_mode *mode, - struct intel_load_detect_pipe *old, - struct drm_modeset_acquire_ctx *ctx); +int intel_get_load_detect_pipe(struct drm_connector *connector, + struct drm_display_mode *mode, + struct intel_load_detect_pipe *old, + struct drm_modeset_acquire_ctx *ctx); void intel_release_load_detect_pipe(struct drm_connector *connector, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx); @@ -1311,9 +1369,8 @@ struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); void intel_unpin_fb_vma(struct i915_vma *vma); struct drm_framebuffer * -__intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj); +intel_framebuffer_create(struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe); void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe); void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe); @@ -1332,9 +1389,6 @@ int intel_plane_atomic_set_property(struct drm_plane *plane, int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, struct drm_plane_state *plane_state); -unsigned int intel_tile_height(const struct drm_i915_private *dev_priv, - uint64_t fb_modifier, unsigned int cpp); - void assert_pch_transcoder_disabled(struct drm_i915_private *dev_priv, enum pipe pipe); @@ -1366,14 +1420,10 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv); void intel_finish_reset(struct drm_i915_private *dev_priv); void hsw_enable_pc8(struct drm_i915_private *dev_priv); void hsw_disable_pc8(struct drm_i915_private *dev_priv); -void bxt_init_cdclk(struct drm_i915_private *dev_priv); -void bxt_uninit_cdclk(struct drm_i915_private *dev_priv); void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv); void bxt_enable_dc9(struct drm_i915_private *dev_priv); void bxt_disable_dc9(struct drm_i915_private *dev_priv); void gen9_enable_dc5(struct drm_i915_private *dev_priv); -void skl_init_cdclk(struct drm_i915_private *dev_priv); -void skl_uninit_cdclk(struct drm_i915_private *dev_priv); unsigned int skl_cdclk_get_vco(unsigned int freq); void skl_enable_dc6(struct drm_i915_private *dev_priv); void skl_disable_dc6(struct drm_i915_private *dev_priv); @@ -1388,10 +1438,7 @@ int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); bool intel_crtc_active(struct intel_crtc *crtc); void hsw_enable_ips(struct intel_crtc *crtc); void hsw_disable_ips(struct intel_crtc *crtc); -enum intel_display_power_domain -intel_display_port_power_domain(struct intel_encoder *intel_encoder); -enum intel_display_power_domain -intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder); +enum intel_display_power_domain intel_port_to_power_domain(enum port port); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config); @@ -1403,12 +1450,12 @@ static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state) return i915_ggtt_offset(state->vma); } -u32 skl_plane_ctl_format(uint32_t pixel_format); -u32 skl_plane_ctl_tiling(uint64_t fb_modifier); -u32 skl_plane_ctl_rotation(unsigned int rotation); +u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, unsigned int rotation); int skl_check_plane_surface(struct intel_plane_state *plane_state); +int i9xx_check_plane_surface(struct intel_plane_state *plane_state); /* intel_csr.c */ void intel_csr_ucode_init(struct drm_i915_private *); @@ -1578,6 +1625,10 @@ struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder); bool intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state); +void intel_hdmi_handle_sink_scrambling(struct intel_encoder *intel_encoder, + struct drm_connector *connector, + bool high_tmds_clock_ratio, + bool scrambling); void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable); @@ -1664,6 +1715,7 @@ int intel_power_domains_init(struct drm_i915_private *); void intel_power_domains_fini(struct drm_i915_private *); void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume); void intel_power_domains_suspend(struct drm_i915_private *dev_priv); +void intel_power_domains_verify_state(struct drm_i915_private *dev_priv); void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume); void bxt_display_core_uninit(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable(struct drm_i915_private *dev_priv); @@ -1692,10 +1744,8 @@ static inline void assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) { assert_rpm_device_not_suspended(dev_priv); - /* FIXME: Needs to be converted back to WARN_ONCE, but currently causes - * too much noise. */ - if (!atomic_read(&dev_priv->pm.wakeref_count)) - DRM_DEBUG_DRIVER("RPM wakelock ref not held during HW access"); + WARN_ONCE(!atomic_read(&dev_priv->pm.wakeref_count), + "RPM wakelock ref not held during HW access"); } /** @@ -1783,6 +1833,7 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb /* out */); void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc, struct skl_pipe_wm *out); +void vlv_wm_sanitize(struct drm_i915_private *dev_priv); bool intel_can_enable_sagv(struct drm_atomic_state *state); int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); @@ -1791,7 +1842,6 @@ bool skl_wm_level_equals(const struct skl_wm_level *l1, bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore); -uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config); bool ilk_disable_lp_wm(struct drm_device *dev); int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6); static inline int intel_enable_rc6(void) @@ -1865,9 +1915,9 @@ intel_atomic_get_existing_plane_state(struct drm_atomic_state *state, return to_intel_plane_state(plane_state); } -int intel_atomic_setup_scalers(struct drm_device *dev, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state); +int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state); /* intel_atomic_plane.c */ struct intel_plane_state *intel_create_plane_state(struct drm_plane *plane); @@ -1891,7 +1941,6 @@ void lspcon_wait_pcon_mode(struct intel_lspcon *lspcon); /* intel_pipe_crc.c */ int intel_pipe_crc_create(struct drm_minor *minor); -void intel_pipe_crc_cleanup(struct drm_minor *minor); #ifdef CONFIG_DEBUG_FS int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, size_t *values_cnt); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 16732e7bc08e..3ffe8b1f1d48 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -28,7 +28,6 @@ #include <drm/drm_crtc.h> #include <drm/drm_edid.h> #include <drm/i915_drm.h> -#include <drm/drm_panel.h> #include <drm/drm_mipi_dsi.h> #include <linux/slab.h> #include <linux/gpio/consumer.h> @@ -36,16 +35,6 @@ #include "intel_drv.h" #include "intel_dsi.h" -static const struct { - u16 panel_id; - struct drm_panel * (*init)(struct intel_dsi *intel_dsi, u16 panel_id); -} intel_dsi_drivers[] = { - { - .panel_id = MIPI_DSI_GENERIC_PANEL_ID, - .init = vbt_panel_init, - }, -}; - /* return pixels in terms of txbyteclkhs */ static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count, u16 burst_mode_ratio) @@ -80,7 +69,7 @@ enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt) } } -static void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port) +void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port) { struct drm_encoder *encoder = &intel_dsi->base.base; struct drm_device *dev = encoder->dev; @@ -357,41 +346,132 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder, return true; } -static void bxt_dsi_device_ready(struct intel_encoder *encoder) +static void glk_dsi_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; - u32 val; + u32 tmp, val; - DRM_DEBUG_KMS("\n"); + /* Set the MIPI mode + * If MIPI_Mode is off, then writing to LP_Wake bit is not reflecting. + * Power ON MIPI IO first and then write into IO reset and LP wake bits + */ + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(MIPI_CTRL(port)); + I915_WRITE(MIPI_CTRL(port), tmp | GLK_MIPIIO_ENABLE); + } + + /* Put the IO into reset */ + tmp = I915_READ(MIPI_CTRL(PORT_A)); + tmp &= ~GLK_MIPIIO_RESET_RELEASED; + I915_WRITE(MIPI_CTRL(PORT_A), tmp); + + /* Program LP Wake */ + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(MIPI_CTRL(port)); + tmp |= GLK_LP_WAKE; + I915_WRITE(MIPI_CTRL(port), tmp); + } - /* Exit Low power state in 4 steps*/ + /* Wait for Pwr ACK */ for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_MIPIIO_PORT_POWERED, + GLK_MIPIIO_PORT_POWERED, 20)) + DRM_ERROR("MIPIO port is powergated\n"); + } - /* 1. Enable MIPI PHY transparent latch */ - val = I915_READ(BXT_MIPI_PORT_CTRL(port)); - I915_WRITE(BXT_MIPI_PORT_CTRL(port), val | LP_OUTPUT_HOLD); - usleep_range(2000, 2500); + /* Wait for MIPI PHY status bit to set */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_PHY_STATUS_PORT_READY, + GLK_PHY_STATUS_PORT_READY, 20)) + DRM_ERROR("PHY is not ON\n"); + } - /* 2. Enter ULPS */ + /* Get IO out of reset */ + tmp = I915_READ(MIPI_CTRL(PORT_A)); + I915_WRITE(MIPI_CTRL(PORT_A), tmp | GLK_MIPIIO_RESET_RELEASED); + + /* Get IO out of Low power state*/ + for_each_dsi_port(port, intel_dsi->ports) { + if (!(I915_READ(MIPI_DEVICE_READY(port)) & DEVICE_READY)) { + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= DEVICE_READY; + I915_WRITE(MIPI_DEVICE_READY(port), val); + usleep_range(10, 15); + } + + /* Enter ULPS */ val = I915_READ(MIPI_DEVICE_READY(port)); val &= ~ULPS_STATE_MASK; val |= (ULPS_STATE_ENTER | DEVICE_READY); I915_WRITE(MIPI_DEVICE_READY(port), val); - /* at least 2us - relaxed for hrtimer subsystem optimization */ - usleep_range(10, 50); - /* 3. Exit ULPS */ + /* Wait for ULPS Not active */ + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, + GLK_ULPS_NOT_ACTIVE, 20)) + DRM_ERROR("ULPS is still active\n"); + + /* Exit ULPS */ val = I915_READ(MIPI_DEVICE_READY(port)); val &= ~ULPS_STATE_MASK; val |= (ULPS_STATE_EXIT | DEVICE_READY); I915_WRITE(MIPI_DEVICE_READY(port), val); - usleep_range(1000, 1500); - /* Clear ULPS and set device ready */ + /* Enter Normal Mode */ + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_NORMAL_OPERATION | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); + + tmp = I915_READ(MIPI_CTRL(port)); + tmp &= ~GLK_LP_WAKE; + I915_WRITE(MIPI_CTRL(port), tmp); + } + + /* Wait for Stop state */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_DATA_LANE_STOP_STATE, + GLK_DATA_LANE_STOP_STATE, 20)) + DRM_ERROR("Date lane not in STOP state\n"); + } + + /* Wait for AFE LATCH */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + BXT_MIPI_PORT_CTRL(port), AFE_LATCHOUT, + AFE_LATCHOUT, 20)) + DRM_ERROR("D-PHY not entering LP-11 state\n"); + } +} + +static void bxt_dsi_device_ready(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 val; + + DRM_DEBUG_KMS("\n"); + + /* Enable MIPI PHY transparent latch */ + for_each_dsi_port(port, intel_dsi->ports) { + val = I915_READ(BXT_MIPI_PORT_CTRL(port)); + I915_WRITE(BXT_MIPI_PORT_CTRL(port), val | LP_OUTPUT_HOLD); + usleep_range(2000, 2500); + } + + /* Clear ULPS and set device ready */ + for_each_dsi_port(port, intel_dsi->ports) { val = I915_READ(MIPI_DEVICE_READY(port)); val &= ~ULPS_STATE_MASK; + I915_WRITE(MIPI_DEVICE_READY(port), val); + usleep_range(2000, 2500); val |= DEVICE_READY; I915_WRITE(MIPI_DEVICE_READY(port), val); } @@ -442,8 +522,121 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder) if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_dsi_device_ready(encoder); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_BROXTON(dev_priv)) bxt_dsi_device_ready(encoder); + else if (IS_GEMINILAKE(dev_priv)) + glk_dsi_device_ready(encoder); +} + +static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 val; + + /* Enter ULPS */ + for_each_dsi_port(port, intel_dsi->ports) { + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_ENTER | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); + } + + /* Wait for MIPI PHY status bit to unset */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), + GLK_PHY_STATUS_PORT_READY, 0, 20)) + DRM_ERROR("PHY is not turning OFF\n"); + } + + /* Wait for Pwr ACK bit to unset */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), + GLK_MIPIIO_PORT_POWERED, 0, 20)) + DRM_ERROR("MIPI IO Port is not powergated\n"); + } +} + +static void glk_dsi_disable_mipi_io(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 tmp; + + /* Put the IO into reset */ + tmp = I915_READ(MIPI_CTRL(PORT_A)); + tmp &= ~GLK_MIPIIO_RESET_RELEASED; + I915_WRITE(MIPI_CTRL(PORT_A), tmp); + + /* Wait for MIPI PHY status bit to unset */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), + GLK_PHY_STATUS_PORT_READY, 0, 20)) + DRM_ERROR("PHY is not turning OFF\n"); + } + + /* Clear MIPI mode */ + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(MIPI_CTRL(port)); + tmp &= ~GLK_MIPIIO_ENABLE; + I915_WRITE(MIPI_CTRL(port), tmp); + } +} + +static void glk_dsi_clear_device_ready(struct intel_encoder *encoder) +{ + glk_dsi_enter_low_power_mode(encoder); + glk_dsi_disable_mipi_io(encoder); +} + +static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + + DRM_DEBUG_KMS("\n"); + for_each_dsi_port(port, intel_dsi->ports) { + /* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */ + i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? + BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A); + u32 val; + + I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | + ULPS_STATE_ENTER); + usleep_range(2000, 2500); + + I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | + ULPS_STATE_EXIT); + usleep_range(2000, 2500); + + I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | + ULPS_STATE_ENTER); + usleep_range(2000, 2500); + + /* + * On VLV/CHV, wait till Clock lanes are in LP-00 state for MIPI + * Port A only. MIPI Port C has no similar bit for checking. + */ + if ((IS_GEN9_LP(dev_priv) || port == PORT_A) && + intel_wait_for_register(dev_priv, + port_ctrl, AFE_LATCHOUT, 0, + 30)) + DRM_ERROR("DSI LP not going Low\n"); + + /* Disable MIPI PHY transparent latch */ + val = I915_READ(port_ctrl); + I915_WRITE(port_ctrl, val & ~LP_OUTPUT_HOLD); + usleep_range(1000, 1500); + + I915_WRITE(MIPI_DEVICE_READY(port), 0x00); + usleep_range(2000, 2500); + } } static void intel_dsi_port_enable(struct intel_encoder *encoder) @@ -456,12 +649,21 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder) if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) { u32 temp; - - temp = I915_READ(VLV_CHICKEN_3); - temp &= ~PIXEL_OVERLAP_CNT_MASK | + if (IS_GEN9_LP(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) { + temp = I915_READ(MIPI_CTRL(port)); + temp &= ~BXT_PIXEL_OVERLAP_CNT_MASK | + intel_dsi->pixel_overlap << + BXT_PIXEL_OVERLAP_CNT_SHIFT; + I915_WRITE(MIPI_CTRL(port), temp); + } + } else { + temp = I915_READ(VLV_CHICKEN_3); + temp &= ~PIXEL_OVERLAP_CNT_MASK | intel_dsi->pixel_overlap << PIXEL_OVERLAP_CNT_SHIFT; - I915_WRITE(VLV_CHICKEN_3, temp); + I915_WRITE(VLV_CHICKEN_3, temp); + } } for_each_dsi_port(port, intel_dsi->ports) { @@ -509,37 +711,57 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) } } -static void intel_dsi_enable(struct intel_encoder *encoder) -{ - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; - - DRM_DEBUG_KMS("\n"); - - if (is_cmd_mode(intel_dsi)) { - for_each_dsi_port(port, intel_dsi->ports) - I915_WRITE(MIPI_MAX_RETURN_PKT_SIZE(port), 8 * 4); - } else { - msleep(20); /* XXX */ - for_each_dsi_port(port, intel_dsi->ports) - dpi_send_cmd(intel_dsi, TURN_ON, false, port); - msleep(100); - - drm_panel_enable(intel_dsi->panel); +static void intel_dsi_prepare(struct intel_encoder *intel_encoder, + struct intel_crtc_state *pipe_config); +static void intel_dsi_unprepare(struct intel_encoder *encoder); - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); +static void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec) +{ + struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); - intel_dsi_port_enable(encoder); - } + /* For v3 VBTs in vid-mode the delays are part of the VBT sequences */ + if (is_vid_mode(intel_dsi) && dev_priv->vbt.dsi.seq_version >= 3) + return; - intel_panel_enable_backlight(intel_dsi->attached_connector); + msleep(msec); } -static void intel_dsi_prepare(struct intel_encoder *intel_encoder, - struct intel_crtc_state *pipe_config); +/* + * Panel enable/disable sequences from the VBT spec. + * + * Note the spec has AssertReset / DeassertReset swapped from their + * usual naming. We use the normal names to avoid confusion (so below + * they are swapped compared to the spec). + * + * Steps starting with MIPI refer to VBT sequences, note that for v2 + * VBTs several steps which have a VBT in v2 are expected to be handled + * directly by the driver, by directly driving gpios for example. + * + * v2 video mode seq v3 video mode seq command mode seq + * - power on - MIPIPanelPowerOn - power on + * - wait t1+t2 - wait t1+t2 + * - MIPIDeassertResetPin - MIPIDeassertResetPin - MIPIDeassertResetPin + * - io lines to lp-11 - io lines to lp-11 - io lines to lp-11 + * - MIPISendInitialDcsCmds - MIPISendInitialDcsCmds - MIPISendInitialDcsCmds + * - MIPITearOn + * - MIPIDisplayOn + * - turn on DPI - turn on DPI - set pipe to dsr mode + * - MIPIDisplayOn - MIPIDisplayOn + * - wait t5 - wait t5 + * - backlight on - MIPIBacklightOn - backlight on + * ... ... ... issue mem cmds ... + * - backlight off - MIPIBacklightOff - backlight off + * - wait t6 - wait t6 + * - MIPIDisplayOff + * - turn off DPI - turn off DPI - disable pipe dsr mode + * - MIPITearOff + * - MIPIDisplayOff - MIPIDisplayOff + * - io lines to lp-00 - io lines to lp-00 - io lines to lp-00 + * - MIPIAssertResetPin - MIPIAssertResetPin - MIPIAssertResetPin + * - wait t3 - wait t3 + * - power off - MIPIPanelPowerOff - power off + * - wait t4 - wait t4 + */ static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, @@ -548,6 +770,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; + u32 val; DRM_DEBUG_KMS("\n"); @@ -558,13 +781,16 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, intel_disable_dsi_pll(encoder); intel_enable_dsi_pll(encoder, pipe_config); - intel_dsi_prepare(encoder, pipe_config); - - /* Panel Enable over CRC PMIC */ - if (intel_dsi->gpio_panel) - gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); + if (IS_BROXTON(dev_priv)) { + /* Add MIPI IO reset programming for modeset */ + val = I915_READ(BXT_P_CR_GT_DISP_PWRON); + I915_WRITE(BXT_P_CR_GT_DISP_PWRON, + val | MIPIO_RST_CTRL); - msleep(intel_dsi->panel_on_delay); + /* Power up DSI regulator */ + I915_WRITE(BXT_P_DSI_REGULATOR_CFG, STAP_SELECT); + I915_WRITE(BXT_P_DSI_REGULATOR_TX_CTRL, 0); + } if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 val; @@ -575,42 +801,88 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, I915_WRITE(DSPCLK_GATE_D, val); } - /* put device in ready state */ - intel_dsi_device_ready(encoder); + intel_dsi_prepare(encoder, pipe_config); - drm_panel_prepare(intel_dsi->panel); + /* Power on, try both CRC pmic gpio and VBT */ + if (intel_dsi->gpio_panel) + gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_ON); + intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); + + /* Deassert reset */ + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); + /* Put device in ready state (LP-11) */ + intel_dsi_device_ready(encoder); + + /* Send initialization commands in LP mode */ + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); /* Enable port in pre-enable phase itself because as per hw team * recommendation, port should be enabled befor plane & pipe */ - intel_dsi_enable(encoder); + if (is_cmd_mode(intel_dsi)) { + for_each_dsi_port(port, intel_dsi->ports) + I915_WRITE(MIPI_MAX_RETURN_PKT_SIZE(port), 8 * 4); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_TEAR_ON); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); + } else { + msleep(20); /* XXX */ + for_each_dsi_port(port, intel_dsi->ports) + dpi_send_cmd(intel_dsi, TURN_ON, false, port); + intel_dsi_msleep(intel_dsi, 100); + + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); + + intel_dsi_port_enable(encoder); + } + + intel_panel_enable_backlight(intel_dsi->attached_connector); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); } +/* + * DSI port enable has to be done before pipe and plane enable, so we do it in + * the pre_enable hook. + */ static void intel_dsi_enable_nop(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { DRM_DEBUG_KMS("\n"); - - /* for DSI port enable has to be done before pipe - * and plane enable, so port enable is done in - * pre_enable phase itself unlike other encoders - */ } -static void intel_dsi_pre_disable(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) +/* + * DSI port disable has to be done after pipe and plane disable, so we do it in + * the post_disable hook. + */ +static void intel_dsi_disable(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { + struct drm_device *dev = encoder->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; DRM_DEBUG_KMS("\n"); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); intel_panel_disable_backlight(intel_dsi->attached_connector); + /* + * Disable Device ready before the port shutdown in order + * to avoid split screen + */ + if (IS_BROXTON(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) + I915_WRITE(MIPI_DEVICE_READY(port), 0); + } + + /* + * According to the spec we should send SHUTDOWN before + * MIPI_SEQ_DISPLAY_OFF only for v3+ VBTs, but field testing + * has shown that the v3 sequence works for v2 VBTs too + */ if (is_vid_mode(intel_dsi)) { /* Send Shutdown command to the panel in LP mode */ for_each_dsi_port(port, intel_dsi->ports) @@ -619,13 +891,25 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, } } -static void intel_dsi_disable(struct intel_encoder *encoder) +static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || + IS_BROXTON(dev_priv)) + vlv_dsi_clear_device_ready(encoder); + else if (IS_GEMINILAKE(dev_priv)) + glk_dsi_clear_device_ready(encoder); +} + +static void intel_dsi_post_disable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; - u32 temp; + u32 val; DRM_DEBUG_KMS("\n"); @@ -634,85 +918,32 @@ static void intel_dsi_disable(struct intel_encoder *encoder) wait_for_dsi_fifo_empty(intel_dsi, port); intel_dsi_port_disable(encoder); - msleep(2); + usleep_range(2000, 5000); } - for_each_dsi_port(port, intel_dsi->ports) { - /* Panel commands can be sent when clock is in LP11 */ - I915_WRITE(MIPI_DEVICE_READY(port), 0x0); - - intel_dsi_reset_clocks(encoder, port); - I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); + intel_dsi_unprepare(encoder); - temp = I915_READ(MIPI_DSI_FUNC_PRG(port)); - temp &= ~VID_MODE_FORMAT_MASK; - I915_WRITE(MIPI_DSI_FUNC_PRG(port), temp); - - I915_WRITE(MIPI_DEVICE_READY(port), 0x1); - } - /* if disable packets are sent before sending shutdown packet then in - * some next enable sequence send turn on packet error is observed */ - drm_panel_disable(intel_dsi->panel); - - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); -} - -static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; - - DRM_DEBUG_KMS("\n"); - for_each_dsi_port(port, intel_dsi->ports) { - /* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */ - i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? - BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A); - u32 val; - - I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | - ULPS_STATE_ENTER); - usleep_range(2000, 2500); - - I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | - ULPS_STATE_EXIT); - usleep_range(2000, 2500); - - I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | - ULPS_STATE_ENTER); - usleep_range(2000, 2500); + /* + * if disable packets are sent before sending shutdown packet then in + * some next enable sequence send turn on packet error is observed + */ + if (is_cmd_mode(intel_dsi)) + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_TEAR_OFF); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF); - /* Wait till Clock lanes are in LP-00 state for MIPI Port A - * only. MIPI Port C has no similar bit for checking - */ - if (intel_wait_for_register(dev_priv, - port_ctrl, AFE_LATCHOUT, 0, - 30)) - DRM_ERROR("DSI LP not going Low\n"); + /* Transition to LP-00 */ + intel_dsi_clear_device_ready(encoder); - /* Disable MIPI PHY transparent latch */ - val = I915_READ(port_ctrl); - I915_WRITE(port_ctrl, val & ~LP_OUTPUT_HOLD); - usleep_range(1000, 1500); + if (IS_BROXTON(dev_priv)) { + /* Power down DSI regulator to save power */ + I915_WRITE(BXT_P_DSI_REGULATOR_CFG, STAP_SELECT); + I915_WRITE(BXT_P_DSI_REGULATOR_TX_CTRL, HS_IO_CTRL_SELECT); - I915_WRITE(MIPI_DEVICE_READY(port), 0x00); - usleep_range(2000, 2500); + /* Add MIPI IO reset programming for modeset */ + val = I915_READ(BXT_P_CR_GT_DISP_PWRON); + I915_WRITE(BXT_P_CR_GT_DISP_PWRON, + val & ~MIPIO_RST_CTRL); } -} - -static void intel_dsi_post_disable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - - DRM_DEBUG_KMS("\n"); - - intel_dsi_disable(encoder); - - intel_dsi_clear_device_ready(encoder); intel_disable_dsi_pll(encoder); @@ -724,11 +955,12 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, I915_WRITE(DSPCLK_GATE_D, val); } - drm_panel_unprepare(intel_dsi->panel); + /* Assert reset */ + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); - msleep(intel_dsi->panel_off_delay); - - /* Panel Disable over CRC PMIC */ + /* Power off, try both CRC pmic gpio and VBT */ + intel_dsi_msleep(intel_dsi, intel_dsi->panel_off_delay); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); if (intel_dsi->gpio_panel) gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0); @@ -736,7 +968,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, * FIXME As we do with eDP, just make a note of the time here * and perform the wait before the next panel power on. */ - msleep(intel_dsi->panel_pwr_cycle_delay); + intel_dsi_msleep(intel_dsi, intel_dsi->panel_pwr_cycle_delay); } static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, @@ -744,14 +976,13 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum intel_display_power_domain power_domain; enum port port; bool active = false; DRM_DEBUG_KMS("\n"); - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; /* @@ -807,7 +1038,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, } out_put_power: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return active; } @@ -1279,6 +1510,14 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, */ I915_WRITE(MIPI_LP_BYTECLK(port), intel_dsi->lp_byte_clk); + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE(MIPI_TLPX_TIME_COUNT(port), + intel_dsi->lp_byte_clk); + /* Shadow of DPHY reg */ + I915_WRITE(MIPI_CLK_LANE_TIMING(port), + intel_dsi->dphy_reg); + } + /* the bw essential for transmitting 16 long packets containing * 252 bytes meant for dcs write memory command is programmed in * this register in terms of byte clocks. based on dsi transfer @@ -1302,6 +1541,30 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, } } +static void intel_dsi_unprepare(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 val; + + if (!IS_GEMINILAKE(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) { + /* Panel commands can be sent when clock is in LP11 */ + I915_WRITE(MIPI_DEVICE_READY(port), 0x0); + + intel_dsi_reset_clocks(encoder, port); + I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); + + val = I915_READ(MIPI_DSI_FUNC_PRG(port)); + val &= ~VID_MODE_FORMAT_MASK; + I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); + + I915_WRITE(MIPI_DEVICE_READY(port), 0x1); + } + } +} + static int intel_dsi_get_modes(struct drm_connector *connector) { struct intel_connector *intel_connector = to_intel_connector(connector); @@ -1381,12 +1644,6 @@ static void intel_dsi_encoder_destroy(struct drm_encoder *encoder) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - if (intel_dsi->panel) { - drm_panel_detach(intel_dsi->panel); - /* XXX: Logically this call belongs in the panel driver. */ - drm_panel_remove(intel_dsi->panel); - } - /* dispose of the gpios */ if (intel_dsi->gpio_panel) gpiod_put(intel_dsi->gpio_panel); @@ -1438,7 +1695,6 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) struct drm_connector *connector; struct drm_display_mode *scan, *fixed_mode = NULL; enum port port; - unsigned int i; DRM_DEBUG_KMS("\n"); @@ -1477,7 +1733,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) intel_encoder->compute_config = intel_dsi_compute_config; intel_encoder->pre_enable = intel_dsi_pre_enable; intel_encoder->enable = intel_dsi_enable_nop; - intel_encoder->disable = intel_dsi_pre_disable; + intel_encoder->disable = intel_dsi_disable; intel_encoder->post_disable = intel_dsi_post_disable; intel_encoder->get_hw_state = intel_dsi_get_hw_state; intel_encoder->get_config = intel_dsi_get_config; @@ -1485,6 +1741,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) intel_connector->get_hw_state = intel_connector_get_hw_state; intel_encoder->port = port; + /* * On BYT/CHV, pipe A maps to MIPI DSI port A, pipe B maps to MIPI DSI * port C. BXT isn't limited like this. @@ -1544,14 +1801,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) intel_dsi->dsi_hosts[port] = host; } - for (i = 0; i < ARRAY_SIZE(intel_dsi_drivers); i++) { - intel_dsi->panel = intel_dsi_drivers[i].init(intel_dsi, - intel_dsi_drivers[i].panel_id); - if (intel_dsi->panel) - break; - } - - if (!intel_dsi->panel) { + if (!intel_dsi_vbt_init(intel_dsi, MIPI_DSI_GENERIC_PANEL_ID)) { DRM_DEBUG_KMS("no device found\n"); goto err; } @@ -1560,7 +1810,8 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) * In case of BYT with CRC PMIC, we need to use GPIO for * Panel control. */ - if (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) { + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC)) { intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", GPIOD_OUT_HIGH); @@ -1571,6 +1822,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) } intel_encoder->type = INTEL_OUTPUT_DSI; + intel_encoder->power_domain = POWER_DOMAIN_PORT_DSI; intel_encoder->cloneable = 0; drm_connector_init(dev, connector, &intel_dsi_connector_funcs, DRM_MODE_CONNECTOR_DSI); @@ -1583,10 +1835,8 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); - drm_panel_attach(intel_dsi->panel, connector); - mutex_lock(&dev->mode_config.mutex); - drm_panel_get_modes(intel_dsi->panel); + intel_dsi_vbt_get_modes(intel_dsi); list_for_each_entry(scan, &connector->probed_modes, head) { if ((scan->type & DRM_MODE_TYPE_PREFERRED)) { fixed_mode = drm_mode_duplicate(dev, scan); diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index 5967ea6d6045..7afeb9580f41 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -39,7 +39,6 @@ struct intel_dsi_host; struct intel_dsi { struct intel_encoder base; - struct drm_panel *panel; struct intel_dsi_host *dsi_hosts[I915_MAX_PORTS]; /* GPIO Desc for CRC based Panel control */ @@ -130,6 +129,11 @@ static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder) return container_of(encoder, struct intel_dsi, base.base); } +/* intel_dsi.c */ +void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port); +enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt); + +/* intel_dsi_pll.c */ bool intel_dsi_pll_is_enabled(struct drm_i915_private *dev_priv); int intel_compute_dsi_pll(struct intel_encoder *encoder, struct intel_crtc_state *config); @@ -141,7 +145,10 @@ u32 intel_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, void intel_dsi_reset_clocks(struct intel_encoder *encoder, enum port port); -struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id); -enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt); +/* intel_dsi_vbt.c */ +bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id); +int intel_dsi_vbt_get_modes(struct intel_dsi *intel_dsi); +void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi, + enum mipi_seq seq_id); #endif /* _INTEL_DSI_H */ diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 61440e5c2563..2ff2ee7f3b78 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -206,17 +206,24 @@ static bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv) return false; /* - * Both dividers must be programmed with valid values even if only one - * of the PLL is used, see BSpec/Broxton Clocks. Check this here for + * Dividers must be programmed with valid values. As per BSEPC, for + * GEMINLAKE only PORT A divider values are checked while for BXT + * both divider values are validated. Check this here for * paranoia, since BIOS is known to misconfigure PLLs in this way at * times, and since accessing DSI registers with invalid dividers * causes a system hang. */ val = I915_READ(BXT_DSI_PLL_CTL); - if (!(val & BXT_DSIA_16X_MASK) || !(val & BXT_DSIC_16X_MASK)) { - DRM_DEBUG_DRIVER("PLL is enabled with invalid divider settings (%08x)\n", - val); - enabled = false; + if (IS_GEMINILAKE(dev_priv)) { + if (!(val & BXT_DSIA_16X_MASK)) { + DRM_DEBUG_DRIVER("Invalid PLL divider (%08x)\n", val); + enabled = false; + } + } else { + if (!(val & BXT_DSIA_16X_MASK) || !(val & BXT_DSIC_16X_MASK)) { + DRM_DEBUG_DRIVER("Invalid PLL divider (%08x)\n", val); + enabled = false; + } } return enabled; @@ -372,6 +379,53 @@ static void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) ESCAPE_CLOCK_DIVIDER_SHIFT); } +static void glk_dsi_program_esc_clock(struct drm_device *dev, + const struct intel_crtc_state *config) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + u32 dsi_rate = 0; + u32 pll_ratio = 0; + u32 ddr_clk = 0; + u32 div1_value = 0; + u32 div2_value = 0; + u32 txesc1_div = 0; + u32 txesc2_div = 0; + + pll_ratio = config->dsi_pll.ctrl & BXT_DSI_PLL_RATIO_MASK; + + dsi_rate = (BXT_REF_CLOCK_KHZ * pll_ratio) / 2; + + ddr_clk = dsi_rate / 2; + + /* Variable divider value */ + div1_value = DIV_ROUND_CLOSEST(ddr_clk, 20000); + + /* Calculate TXESC1 divider */ + if (div1_value <= 10) + txesc1_div = div1_value; + else if ((div1_value > 10) && (div1_value <= 20)) + txesc1_div = DIV_ROUND_UP(div1_value, 2); + else if ((div1_value > 20) && (div1_value <= 30)) + txesc1_div = DIV_ROUND_UP(div1_value, 4); + else if ((div1_value > 30) && (div1_value <= 40)) + txesc1_div = DIV_ROUND_UP(div1_value, 6); + else if ((div1_value > 40) && (div1_value <= 50)) + txesc1_div = DIV_ROUND_UP(div1_value, 8); + else + txesc1_div = 10; + + /* Calculate TXESC2 divider */ + div2_value = DIV_ROUND_UP(div1_value, txesc1_div); + + if (div2_value < 10) + txesc2_div = div2_value; + else + txesc2_div = 10; + + I915_WRITE(MIPIO_TXESC_CLK_DIV1, txesc1_div & GLK_TX_ESC_CLK_DIV1_MASK); + I915_WRITE(MIPIO_TXESC_CLK_DIV2, txesc2_div & GLK_TX_ESC_CLK_DIV2_MASK); +} + /* Program BXT Mipi clocks and dividers */ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, const struct intel_crtc_state *config) @@ -416,11 +470,7 @@ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, rx_div_lower = rx_div & RX_DIVIDER_BIT_1_2; rx_div_upper = (rx_div & RX_DIVIDER_BIT_3_4) >> 2; - /* As per bpsec program the 8/3X clock divider to the below value */ - if (dev_priv->vbt.dsi.config->is_cmd_mode) - mipi_8by3_divider = 0x2; - else - mipi_8by3_divider = 0x3; + mipi_8by3_divider = 0x2; tmp |= BXT_MIPI_8X_BY3_DIVIDER(port, mipi_8by3_divider); tmp |= BXT_MIPI_TX_ESCLK_DIVIDER(port, tx_div); @@ -430,11 +480,12 @@ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); } -static int bxt_compute_dsi_pll(struct intel_encoder *encoder, +static int gen9lp_compute_dsi_pll(struct intel_encoder *encoder, struct intel_crtc_state *config) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - u8 dsi_ratio; + u8 dsi_ratio, dsi_ratio_min, dsi_ratio_max; u32 dsi_clk; dsi_clk = dsi_clk_from_pclk(intel_dsi->pclk, intel_dsi->pixel_format, @@ -446,11 +497,20 @@ static int bxt_compute_dsi_pll(struct intel_encoder *encoder, * round 'up' the result */ dsi_ratio = DIV_ROUND_UP(dsi_clk * 2, BXT_REF_CLOCK_KHZ); - if (dsi_ratio < BXT_DSI_PLL_RATIO_MIN || - dsi_ratio > BXT_DSI_PLL_RATIO_MAX) { + + if (IS_BROXTON(dev_priv)) { + dsi_ratio_min = BXT_DSI_PLL_RATIO_MIN; + dsi_ratio_max = BXT_DSI_PLL_RATIO_MAX; + } else { + dsi_ratio_min = GLK_DSI_PLL_RATIO_MIN; + dsi_ratio_max = GLK_DSI_PLL_RATIO_MAX; + } + + if (dsi_ratio < dsi_ratio_min || dsi_ratio > dsi_ratio_max) { DRM_ERROR("Cant get a suitable ratio from DSI PLL ratios\n"); return -ECHRNG; - } + } else + DRM_DEBUG_KMS("DSI PLL calculation is Done!!\n"); /* * Program DSI ratio and Select MIPIC and MIPIA PLL output as 8x @@ -462,13 +522,13 @@ static int bxt_compute_dsi_pll(struct intel_encoder *encoder, /* As per recommendation from hardware team, * Prog PVD ratio =1 if dsi ratio <= 50 */ - if (dsi_ratio <= 50) + if (IS_BROXTON(dev_priv) && dsi_ratio <= 50) config->dsi_pll.ctrl |= BXT_DSI_PLL_PVD_RATIO_1; return 0; } -static void bxt_enable_dsi_pll(struct intel_encoder *encoder, +static void gen9lp_enable_dsi_pll(struct intel_encoder *encoder, const struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -483,8 +543,12 @@ static void bxt_enable_dsi_pll(struct intel_encoder *encoder, POSTING_READ(BXT_DSI_PLL_CTL); /* Program TX, RX, Dphy clocks */ - for_each_dsi_port(port, intel_dsi->ports) - bxt_dsi_program_clocks(encoder->base.dev, port, config); + if (IS_BROXTON(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) + bxt_dsi_program_clocks(encoder->base.dev, port, config); + } else { + glk_dsi_program_esc_clock(encoder->base.dev, config); + } /* Enable DSI PLL */ val = I915_READ(BXT_DSI_PLL_ENABLE); @@ -522,7 +586,7 @@ int intel_compute_dsi_pll(struct intel_encoder *encoder, if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) return vlv_compute_dsi_pll(encoder, config); else if (IS_GEN9_LP(dev_priv)) - return bxt_compute_dsi_pll(encoder, config); + return gen9lp_compute_dsi_pll(encoder, config); return -ENODEV; } @@ -535,7 +599,7 @@ void intel_enable_dsi_pll(struct intel_encoder *encoder, if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_enable_dsi_pll(encoder, config); else if (IS_GEN9_LP(dev_priv)) - bxt_enable_dsi_pll(encoder, config); + gen9lp_enable_dsi_pll(encoder, config); } void intel_disable_dsi_pll(struct intel_encoder *encoder) @@ -548,19 +612,30 @@ void intel_disable_dsi_pll(struct intel_encoder *encoder) bxt_disable_dsi_pll(encoder); } -static void bxt_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) +static void gen9lp_dsi_reset_clocks(struct intel_encoder *encoder, + enum port port) { u32 tmp; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); /* Clear old configurations */ - tmp = I915_READ(BXT_MIPI_CLOCK_CTL); - tmp &= ~(BXT_MIPI_TX_ESCLK_FIXDIV_MASK(port)); - tmp &= ~(BXT_MIPI_RX_ESCLK_UPPER_FIXDIV_MASK(port)); - tmp &= ~(BXT_MIPI_8X_BY3_DIVIDER_MASK(port)); - tmp &= ~(BXT_MIPI_RX_ESCLK_LOWER_FIXDIV_MASK(port)); - I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); + if (IS_BROXTON(dev_priv)) { + tmp = I915_READ(BXT_MIPI_CLOCK_CTL); + tmp &= ~(BXT_MIPI_TX_ESCLK_FIXDIV_MASK(port)); + tmp &= ~(BXT_MIPI_RX_ESCLK_UPPER_FIXDIV_MASK(port)); + tmp &= ~(BXT_MIPI_8X_BY3_DIVIDER_MASK(port)); + tmp &= ~(BXT_MIPI_RX_ESCLK_LOWER_FIXDIV_MASK(port)); + I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); + } else { + tmp = I915_READ(MIPIO_TXESC_CLK_DIV1); + tmp &= ~GLK_TX_ESC_CLK_DIV1_MASK; + I915_WRITE(MIPIO_TXESC_CLK_DIV1, tmp); + + tmp = I915_READ(MIPIO_TXESC_CLK_DIV2); + tmp &= ~GLK_TX_ESC_CLK_DIV2_MASK; + I915_WRITE(MIPIO_TXESC_CLK_DIV2, tmp); + } I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); } @@ -569,7 +644,7 @@ void intel_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); if (IS_GEN9_LP(dev_priv)) - bxt_dsi_reset_clocks(encoder, port); + gen9lp_dsi_reset_clocks(encoder, port); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_dsi_reset_clocks(encoder, port); } diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 8f683b8b1816..0dce7792643a 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -28,7 +28,6 @@ #include <drm/drm_crtc.h> #include <drm/drm_edid.h> #include <drm/i915_drm.h> -#include <drm/drm_panel.h> #include <linux/gpio/consumer.h> #include <linux/slab.h> #include <video/mipi_display.h> @@ -38,16 +37,6 @@ #include "intel_drv.h" #include "intel_dsi.h" -struct vbt_panel { - struct drm_panel panel; - struct intel_dsi *intel_dsi; -}; - -static inline struct vbt_panel *to_vbt_panel(struct drm_panel *panel) -{ - return container_of(panel, struct vbt_panel, panel); -} - #define MIPI_TRANSFER_MODE_SHIFT 0 #define MIPI_VIRTUAL_CHANNEL_SHIFT 1 #define MIPI_PORT_SHIFT 3 @@ -192,6 +181,8 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, break; } + wait_for_dsi_fifo_empty(intel_dsi, port); + out: data += len; @@ -424,10 +415,9 @@ static const char *sequence_name(enum mipi_seq seq_id) return "(unknown)"; } -static void generic_exec_sequence(struct drm_panel *panel, enum mipi_seq seq_id) +void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi, + enum mipi_seq seq_id) { - struct vbt_panel *vbt_panel = to_vbt_panel(panel); - struct intel_dsi *intel_dsi = vbt_panel->intel_dsi; struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); const u8 *data; fn_mipi_elem_exec mipi_elem_exec; @@ -491,78 +481,31 @@ static void generic_exec_sequence(struct drm_panel *panel, enum mipi_seq seq_id) } } -static int vbt_panel_prepare(struct drm_panel *panel) +int intel_dsi_vbt_get_modes(struct intel_dsi *intel_dsi) { - generic_exec_sequence(panel, MIPI_SEQ_ASSERT_RESET); - generic_exec_sequence(panel, MIPI_SEQ_POWER_ON); - generic_exec_sequence(panel, MIPI_SEQ_DEASSERT_RESET); - generic_exec_sequence(panel, MIPI_SEQ_INIT_OTP); - - return 0; -} - -static int vbt_panel_unprepare(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_ASSERT_RESET); - generic_exec_sequence(panel, MIPI_SEQ_POWER_OFF); - - return 0; -} - -static int vbt_panel_enable(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_DISPLAY_ON); - generic_exec_sequence(panel, MIPI_SEQ_BACKLIGHT_ON); - - return 0; -} - -static int vbt_panel_disable(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_BACKLIGHT_OFF); - generic_exec_sequence(panel, MIPI_SEQ_DISPLAY_OFF); - - return 0; -} - -static int vbt_panel_get_modes(struct drm_panel *panel) -{ - struct vbt_panel *vbt_panel = to_vbt_panel(panel); - struct intel_dsi *intel_dsi = vbt_panel->intel_dsi; + struct intel_connector *connector = intel_dsi->attached_connector; struct drm_device *dev = intel_dsi->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_display_mode *mode; - if (!panel->connector) - return 0; - mode = drm_mode_duplicate(dev, dev_priv->vbt.lfp_lvds_vbt_mode); if (!mode) return 0; mode->type |= DRM_MODE_TYPE_PREFERRED; - drm_mode_probed_add(panel->connector, mode); + drm_mode_probed_add(&connector->base, mode); return 1; } -static const struct drm_panel_funcs vbt_panel_funcs = { - .disable = vbt_panel_disable, - .unprepare = vbt_panel_unprepare, - .prepare = vbt_panel_prepare, - .enable = vbt_panel_enable, - .get_modes = vbt_panel_get_modes, -}; - -struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) +bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) { struct drm_device *dev = intel_dsi->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct mipi_config *mipi_config = dev_priv->vbt.dsi.config; struct mipi_pps_data *pps = dev_priv->vbt.dsi.pps; struct drm_display_mode *mode = dev_priv->vbt.lfp_lvds_vbt_mode; - struct vbt_panel *vbt_panel; u32 bpp; u32 tlpx_ns, extra_byte_count, bitrate, tlpx_ui; u32 ui_num, ui_den; @@ -571,6 +514,7 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) u32 tclk_prepare_clkzero, ths_prepare_hszero; u32 lp_to_hs_switch, hs_to_lp_switch; u32 pclk, computed_ddr; + u32 mul; u16 burst_mode_ratio; enum port port; @@ -624,7 +568,7 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) if (mipi_config->target_burst_mode_freq < computed_ddr) { DRM_ERROR("Burst mode freq is less than computed\n"); - return NULL; + return false; } burst_mode_ratio = DIV_ROUND_UP( @@ -634,7 +578,7 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) pclk = DIV_ROUND_UP(pclk * burst_mode_ratio, 100); } else { DRM_ERROR("Burst mode target is not set\n"); - return NULL; + return false; } } else burst_mode_ratio = 100; @@ -674,11 +618,6 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) break; } - /* - * ui(s) = 1/f [f in hz] - * ui(ns) = 10^9 / (f*10^6) [f in Mhz] -> 10^3/f(Mhz) - */ - /* in Kbps */ ui_num = NS_KHZ_RATIO; ui_den = bitrate; @@ -692,21 +631,26 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) */ intel_dsi->lp_byte_clk = DIV_ROUND_UP(tlpx_ns * ui_den, 8 * ui_num); - /* count values in UI = (ns value) * (bitrate / (2 * 10^6)) + /* DDR clock period = 2 * UI + * UI(sec) = 1/(bitrate * 10^3) (bitrate is in KHZ) + * UI(nsec) = 10^6 / bitrate + * DDR clock period (nsec) = 2 * UI = (2 * 10^6)/ bitrate + * DDR clock count = ns_value / DDR clock period * - * Since txddrclkhs_i is 2xUI, all the count values programmed in - * DPHY param register are divided by 2 - * - * prepare count + * For GEMINILAKE dphy_param_reg will be programmed in terms of + * HS byte clock count for other platform in HS ddr clock count */ + mul = IS_GEMINILAKE(dev_priv) ? 8 : 2; ths_prepare_ns = max(mipi_config->ths_prepare, mipi_config->tclk_prepare); - prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * 2); + + /* prepare count */ + prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * mul); /* exit zero count */ exit_zero_cnt = DIV_ROUND_UP( (ths_prepare_hszero - ths_prepare_ns) * ui_den, - ui_num * 2 + ui_num * mul ); /* @@ -720,12 +664,12 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) /* clk zero count */ clk_zero_cnt = DIV_ROUND_UP( - (tclk_prepare_clkzero - ths_prepare_ns) - * ui_den, 2 * ui_num); + (tclk_prepare_clkzero - ths_prepare_ns) + * ui_den, ui_num * mul); /* trail count */ tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail); - trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, 2 * ui_num); + trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, ui_num * mul); if (prepare_cnt > PREPARE_CNT_MAX || exit_zero_cnt > EXIT_ZERO_CNT_MAX || @@ -801,6 +745,19 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) 8); intel_dsi->clk_hs_to_lp_count += extra_byte_count; + DRM_DEBUG_KMS("Pclk %d\n", intel_dsi->pclk); + DRM_DEBUG_KMS("Pixel overlap %d\n", intel_dsi->pixel_overlap); + DRM_DEBUG_KMS("Lane count %d\n", intel_dsi->lane_count); + DRM_DEBUG_KMS("DPHY param reg 0x%x\n", intel_dsi->dphy_reg); + DRM_DEBUG_KMS("Video mode format %s\n", + intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE ? + "non-burst with sync pulse" : + intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS ? + "non-burst with sync events" : + intel_dsi->video_mode_format == VIDEO_MODE_BURST ? + "burst" : "<unknown>"); + DRM_DEBUG_KMS("Burst mode ratio %d\n", intel_dsi->burst_mode_ratio); + DRM_DEBUG_KMS("Reset timer %d\n", intel_dsi->rst_timer_val); DRM_DEBUG_KMS("Eot %s\n", enableddisabled(intel_dsi->eotp_pkt)); DRM_DEBUG_KMS("Clockstop %s\n", enableddisabled(!intel_dsi->clock_stop)); DRM_DEBUG_KMS("Mode %s\n", intel_dsi->operation_mode ? "command" : "video"); @@ -832,20 +789,10 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) intel_dsi->panel_off_delay = pps->panel_off_delay / 10; intel_dsi->panel_pwr_cycle_delay = pps->panel_power_cycle_delay / 10; - /* This is cheating a bit with the cleanup. */ - vbt_panel = devm_kzalloc(dev->dev, sizeof(*vbt_panel), GFP_KERNEL); - if (!vbt_panel) - return NULL; - - vbt_panel->intel_dsi = intel_dsi; - drm_panel_init(&vbt_panel->panel); - vbt_panel->panel.funcs = &vbt_panel_funcs; - drm_panel_add(&vbt_panel->panel); - /* a regular driver would get the device in probe */ for_each_dsi_port(port, intel_dsi->ports) { mipi_dsi_attach(intel_dsi->dsi_hosts[port]->device); } - return &vbt_panel->panel; + return true; } diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 50da89dcb92b..6025839ed3b7 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -515,6 +515,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) "DVO %c", port_name(port)); intel_encoder->type = INTEL_OUTPUT_DVO; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; intel_encoder->crtc_mask = (1 << 0) | (1 << 1); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ab1be5c80ea5..854e8e0c836b 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -28,53 +28,53 @@ static const struct engine_info { const char *name; - unsigned exec_id; - enum intel_engine_hw_id hw_id; + unsigned int exec_id; + unsigned int hw_id; u32 mmio_base; unsigned irq_shift; int (*init_legacy)(struct intel_engine_cs *engine); int (*init_execlists)(struct intel_engine_cs *engine); } intel_engines[] = { [RCS] = { - .name = "render ring", - .exec_id = I915_EXEC_RENDER, + .name = "rcs", .hw_id = RCS_HW, + .exec_id = I915_EXEC_RENDER, .mmio_base = RENDER_RING_BASE, .irq_shift = GEN8_RCS_IRQ_SHIFT, .init_execlists = logical_render_ring_init, .init_legacy = intel_init_render_ring_buffer, }, [BCS] = { - .name = "blitter ring", - .exec_id = I915_EXEC_BLT, + .name = "bcs", .hw_id = BCS_HW, + .exec_id = I915_EXEC_BLT, .mmio_base = BLT_RING_BASE, .irq_shift = GEN8_BCS_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_blt_ring_buffer, }, [VCS] = { - .name = "bsd ring", - .exec_id = I915_EXEC_BSD, + .name = "vcs", .hw_id = VCS_HW, + .exec_id = I915_EXEC_BSD, .mmio_base = GEN6_BSD_RING_BASE, .irq_shift = GEN8_VCS1_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_bsd_ring_buffer, }, [VCS2] = { - .name = "bsd2 ring", - .exec_id = I915_EXEC_BSD, + .name = "vcs2", .hw_id = VCS2_HW, + .exec_id = I915_EXEC_BSD, .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_bsd2_ring_buffer, }, [VECS] = { - .name = "video enhancement ring", - .exec_id = I915_EXEC_VEBOX, + .name = "vecs", .hw_id = VECS_HW, + .exec_id = I915_EXEC_VEBOX, .mmio_base = VEBOX_RING_BASE, .irq_shift = GEN8_VECS_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, @@ -112,21 +112,20 @@ intel_engine_setup(struct drm_i915_private *dev_priv, } /** - * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * intel_engines_init_early() - allocate the Engine Command Streamers * @dev_priv: i915 device private * * Return: non-zero if the initialization failed. */ -int intel_engines_init(struct drm_i915_private *dev_priv) +int intel_engines_init_early(struct drm_i915_private *dev_priv) { struct intel_device_info *device_info = mkwrite_device_info(dev_priv); unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; unsigned int mask = 0; - int (*init)(struct intel_engine_cs *engine); struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int i; - int ret; + int err; WARN_ON(ring_mask == 0); WARN_ON(ring_mask & @@ -136,20 +135,8 @@ int intel_engines_init(struct drm_i915_private *dev_priv) if (!HAS_ENGINE(dev_priv, i)) continue; - if (i915.enable_execlists) - init = intel_engines[i].init_execlists; - else - init = intel_engines[i].init_legacy; - - if (!init) - continue; - - ret = intel_engine_setup(dev_priv, i); - if (ret) - goto cleanup; - - ret = init(dev_priv->engine[i]); - if (ret) + err = intel_engine_setup(dev_priv, i); + if (err) goto cleanup; mask |= ENGINE_MASK(i); @@ -168,14 +155,68 @@ int intel_engines_init(struct drm_i915_private *dev_priv) return 0; cleanup: + for_each_engine(engine, dev_priv, id) + kfree(engine); + return err; +} + +/** + * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * @dev_priv: i915 device private + * + * Return: non-zero if the initialization failed. + */ +int intel_engines_init(struct drm_i915_private *dev_priv) +{ + struct intel_device_info *device_info = mkwrite_device_info(dev_priv); + struct intel_engine_cs *engine; + enum intel_engine_id id, err_id; + unsigned int mask = 0; + int err = 0; + for_each_engine(engine, dev_priv, id) { + int (*init)(struct intel_engine_cs *engine); + if (i915.enable_execlists) - intel_logical_ring_cleanup(engine); + init = intel_engines[id].init_execlists; else - intel_engine_cleanup(engine); + init = intel_engines[id].init_legacy; + if (!init) { + kfree(engine); + dev_priv->engine[id] = NULL; + continue; + } + + err = init(engine); + if (err) { + err_id = id; + goto cleanup; + } + + GEM_BUG_ON(!engine->submit_request); + mask |= ENGINE_MASK(id); } - return ret; + /* + * Catch failures to update intel_engines table when the new engines + * are added to the driver by a warning and disabling the forgotten + * engines. + */ + if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) + device_info->ring_mask = mask; + + device_info->num_rings = hweight32(mask); + + return 0; + +cleanup: + for_each_engine(engine, dev_priv, id) { + if (id >= err_id) + kfree(engine); + else + dev_priv->gt.cleanup_engine(engine); + } + return err; } void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) @@ -201,21 +242,18 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) void *semaphores; /* Semaphores are in noncoherent memory, flush to be safe */ - semaphores = kmap(page); + semaphores = kmap_atomic(page); memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), I915_NUM_ENGINES * gen8_semaphore_seqno_size); - kunmap(page); + kunmap_atomic(semaphores); } intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); + clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); - engine->timeline->last_submitted_seqno = seqno; - engine->hangcheck.seqno = seqno; /* After manually advancing the seqno, fake the interrupt in case @@ -306,6 +344,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine) { int ret; + engine->set_default_submission(engine); + /* We may need to do things with the shrinker which * require us to immediately switch back to the default * context. This can cause a problem as pinning the @@ -484,3 +524,619 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, break; } } + +static int wa_add(struct drm_i915_private *dev_priv, + i915_reg_t addr, + const u32 mask, const u32 val) +{ + const u32 idx = dev_priv->workarounds.count; + + if (WARN_ON(idx >= I915_MAX_WA_REGS)) + return -ENOSPC; + + dev_priv->workarounds.reg[idx].addr = addr; + dev_priv->workarounds.reg[idx].value = val; + dev_priv->workarounds.reg[idx].mask = mask; + + dev_priv->workarounds.count++; + + return 0; +} + +#define WA_REG(addr, mask, val) do { \ + const int r = wa_add(dev_priv, (addr), (mask), (val)); \ + if (r) \ + return r; \ + } while (0) + +#define WA_SET_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) + +#define WA_CLR_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) + +#define WA_SET_FIELD_MASKED(addr, mask, value) \ + WA_REG(addr, mask, _MASKED_FIELD(mask, value)) + +#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) +#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) + +#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) + +static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, + i915_reg_t reg) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_workarounds *wa = &dev_priv->workarounds; + const uint32_t index = wa->hw_whitelist_count[engine->id]; + + if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) + return -EINVAL; + + WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), + i915_mmio_reg_offset(reg)); + wa->hw_whitelist_count[engine->id]++; + + return 0; +} + +static int gen8_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisableAsyncFlipPerfMode:bdw,chv */ + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); + + /* WaDisablePartialInstShootdown:bdw,chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:bdw,chv */ + /* WaHdcDisableFetchWhenMasked:bdw,chv */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_DONOT_FETCH_MEM_WHEN_MASKED | + HDC_FORCE_NON_COHERENT); + + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping + * polygons in the same 8x4 pixel/sample area to be processed without + * stalling waiting for the earlier ones to write to Hierarchical Z + * buffer." + * + * This optimization is off by default for BDW and CHV; turn it on. + */ + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + + /* Wa4x4STCOptimizationDisable:bdw,chv */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); + + return 0; +} + +static int bdw_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen8_init_workarounds(engine); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* WaDisableDopClockGating:bdw + * + * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * to disable EUTC clock gating. + */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); + + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + WA_SET_BIT_MASKED(HDC_CHICKEN0, + /* WaForceContextSaveRestoreNonCoherent:bdw */ + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ + (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); + + return 0; +} + +static int chv_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen8_init_workarounds(engine); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* Improve HiZ throughput on CHV. */ + WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); + + return 0; +} + +static int gen9_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */ + I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); + + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */ + I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + + /* WaDisableKillLogic:bxt,skl,kbl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + ECOCHK_DIS_TLB); + + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); + + /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_DG_MIRROR_FIX_ENABLE); + + /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, + GEN9_RHWO_OPTIMIZATION_DISABLE); + /* + * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set + * but we do that in per ctx batchbuffer as there is an issue + * with this register not getting restored on ctx restore + */ + } + + /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_GPGPU_PREEMPTION); + + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ + /* WaDisablePartialResolveInVc:skl,bxt,kbl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | + GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); + + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); + + /* WaDisableMaskBasedCammingInRCC:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, + PIXEL_MASK_CAMMING_DISABLE); + + /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); + + /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are + * both tied to WaForceContextSaveRestoreNonCoherent + * in some hsds for skl. We keep the tie for all gen9. The + * documentation is a bit hazy and so we want to get common behaviour, + * even though there is no clear evidence we would need both on kbl/bxt. + * This area has been source of system hangs so we play it safe + * and mimic the skl regardless of what bspec says. + * + * Use Force Non-Coherent whenever executing a 3D context. This + * is a workaround for a possible hang in the unlikely event + * a TLB invalidation occurs during a PSD flush. + */ + + /* WaForceEnableNonCoherent:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); + + /* WaDisableHDCInvalidation:skl,bxt,kbl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + BDW_DISABLE_HDC_INVALIDATION); + + /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ + if (IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + + /* WaOCLCoherentLineFlush:skl,bxt,kbl */ + I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | + GEN8_LQSC_FLUSH_COHERENT_LINES)); + + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */ + ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); + if (ret) + return ret; + + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ + ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); + if (ret) + return ret; + + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */ + ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); + if (ret) + return ret; + + return 0; +} + +static int skl_tune_iz_hashing(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + u8 vals[3] = { 0, 0, 0 }; + unsigned int i; + + for (i = 0; i < 3; i++) { + u8 ss; + + /* + * Only consider slices where one, and only one, subslice has 7 + * EUs + */ + if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) + continue; + + /* + * subslice_7eu[i] != 0 (because of the check above) and + * ss_max == 4 (maximum number of subslices possible per slice) + * + * -> 0 <= ss <= 3; + */ + ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; + vals[i] = 3 - ss; + } + + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) + return 0; + + /* Tune IZ hashing. See intel_device_info_runtime_init() */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN9_IZ_HASHING_MASK(2) | + GEN9_IZ_HASHING_MASK(1) | + GEN9_IZ_HASHING_MASK(0), + GEN9_IZ_HASHING(2, vals[2]) | + GEN9_IZ_HASHING(1, vals[1]) | + GEN9_IZ_HASHING(0, vals[0])); + + return 0; +} + +static int skl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* + * Actual WA is to disable percontext preemption granularity control + * until D0 which is the default case so this is equivalent to + * !WaDisablePerCtxtPreemptionGranularityControl:skl + */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); + + /* WaEnableGapsTsvCreditFix:skl */ + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + + /* WaDisableGafsUnitClkGating:skl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:skl */ + if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaDisableLSQCROPERFforOCL:skl */ + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + + return skl_tune_iz_hashing(engine); +} + +static int bxt_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaStoreMultiplePTEenable:bxt */ + /* This is a requirement according to Hardware specification */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); + + /* WaSetClckGatingDisableMedia:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & + ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); + } + + /* WaDisableThreadStallDopClockGating:bxt */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); + + /* WaDisablePooledEuLoadBalancingFix:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { + WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, + GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); + } + + /* WaDisableSbeCacheDispatchPortSharing:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + } + + /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ + /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ + /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ + /* WaDisableLSQCROPERFforOCL:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); + if (ret) + return ret; + + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + } + + /* WaProgramL3SqcReg1DefaultForPerf:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) + I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | + L3_HIGH_PRIO_CREDITS(2)); + + /* WaToEnableHwFixForPushConstHWBug:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaInPlaceDecompressionHang:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + return 0; +} + +static int kbl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaEnableGapsTsvCreditFix:kbl */ + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + + /* WaDisableDynamicCreditSharing:kbl */ + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + WA_SET_BIT(GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); + + /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FENCE_DEST_SLM_DISABLE); + + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableGafsUnitClkGating:kbl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaDisableSbeCacheDispatchPortSharing:kbl */ + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + + /* WaInPlaceDecompressionHang:kbl */ + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaDisableLSQCROPERFforOCL:kbl */ + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + + return 0; +} + +static int glk_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaToEnableHwFixForPushConstHWBug:glk */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + return 0; +} + +int init_workarounds_ring(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int err; + + WARN_ON(engine->id != RCS); + + dev_priv->workarounds.count = 0; + dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; + + if (IS_BROADWELL(dev_priv)) + err = bdw_init_workarounds(engine); + else if (IS_CHERRYVIEW(dev_priv)) + err = chv_init_workarounds(engine); + else if (IS_SKYLAKE(dev_priv)) + err = skl_init_workarounds(engine); + else if (IS_BROXTON(dev_priv)) + err = bxt_init_workarounds(engine); + else if (IS_KABYLAKE(dev_priv)) + err = kbl_init_workarounds(engine); + else if (IS_GEMINILAKE(dev_priv)) + err = glk_init_workarounds(engine); + else + err = 0; + if (err) + return err; + + DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n", + engine->name, dev_priv->workarounds.count); + return 0; +} + +int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) +{ + struct i915_workarounds *w = &req->i915->workarounds; + u32 *cs; + int ret, i; + + if (w->count == 0) + return 0; + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + cs = intel_ring_begin(req, (w->count * 2 + 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(w->count); + for (i = 0; i < w->count; i++) { + *cs++ = i915_mmio_reg_offset(w->reg[i].addr); + *cs++ = w->reg[i].value; + } + *cs++ = MI_NOOP; + + intel_ring_advance(req, cs); + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + return 0; +} + +/** + * intel_engine_is_idle() - Report if the engine has finished process all work + * @engine: the intel_engine_cs + * + * Return true if there are no requests pending, nothing left to be submitted + * to hardware, and that the engine is idle. + */ +bool intel_engine_is_idle(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + /* Any inflight/incomplete requests? */ + if (!i915_seqno_passed(intel_engine_get_seqno(engine), + intel_engine_last_submit(engine))) + return false; + + /* Interrupt/tasklet pending? */ + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) + return false; + + /* Both ports drained, no more ELSP submission? */ + if (engine->execlist_port[0].request) + return false; + + /* Ring stopped? */ + if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) + return false; + + return true; +} + +bool intel_engines_are_idle(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (READ_ONCE(dev_priv->gt.active_requests)) + return false; + + /* If the driver is wedged, HW state may be very inconsistent and + * report that it is still busy, even though we have stopped using it. + */ + if (i915_terminally_wedged(&dev_priv->gpu_error)) + return true; + + for_each_engine(engine, dev_priv, id) { + if (!intel_engine_is_idle(engine)) + return false; + } + + return true; +} + +void intel_engines_reset_default_submission(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + engine->set_default_submission(engine); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_engine.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 89fe5c8464df..ded2add18b26 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -537,8 +537,7 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, * reserved range size, so it always assumes the maximum (8mb) is used. * If we enable FBC using a CFB on that memory range we'll get FIFO * underruns, even if that range is not reserved by the BIOS. */ - if (IS_BROADWELL(dev_priv) || - IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv)) end = ggtt->stolen_size - 8 * 1024 * 1024; else end = U64_MAX; @@ -628,7 +627,8 @@ err_fb: kfree(compressed_llb); i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb); err_llb: - pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); + if (drm_mm_initialized(&dev_priv->mm.stolen)) + pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); return -ENOSPC; } @@ -743,8 +743,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - cache->crtc.hsw_bdw_pixel_rate = - ilk_pipe_pixel_rate(crtc_state); + cache->crtc.hsw_bdw_pixel_rate = crtc_state->pixel_rate; cache->plane.rotation = plane_state->base.rotation; cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; @@ -819,7 +818,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) /* WaFbcExceedCdClockThreshold:hsw,bdw */ if ((IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) && - cache->crtc.hsw_bdw_pixel_rate >= dev_priv->cdclk_freq * 95 / 100) { + cache->crtc.hsw_bdw_pixel_rate >= dev_priv->cdclk.hw.cdclk * 95 / 100) { fbc->no_fbc_reason = "pixel rate is too big"; return false; } @@ -1062,7 +1061,7 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, * plane. We could go for fancier schemes such as checking the plane * size, but this would just affect the few platforms that don't tie FBC * to pipe or plane A. */ - for_each_plane_in_state(state, plane, plane_state, i) { + for_each_new_plane_in_state(state, plane, plane_state, i) { struct intel_plane_state *intel_plane_state = to_intel_plane_state(plane_state); struct intel_crtc_state *intel_crtc_state; diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 2d449fb5d1d2..332254a8eebe 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -45,6 +45,14 @@ #include <drm/i915_drm.h> #include "i915_drv.h" +static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev) +{ + struct drm_i915_gem_object *obj = ifbdev->fb->obj; + unsigned int origin = ifbdev->vma->fence ? ORIGIN_GTT : ORIGIN_CPU; + + intel_fb_obj_invalidate(obj, origin); +} + static int intel_fbdev_set_par(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; @@ -53,12 +61,8 @@ static int intel_fbdev_set_par(struct fb_info *info) int ret; ret = drm_fb_helper_set_par(info); - - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -71,12 +75,8 @@ static int intel_fbdev_blank(int blank, struct fb_info *info) int ret; ret = drm_fb_helper_blank(blank, info); - - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -87,15 +87,11 @@ static int intel_fbdev_pan_display(struct fb_var_screeninfo *var, struct drm_fb_helper *fb_helper = info->par; struct intel_fbdev *ifbdev = container_of(fb_helper, struct intel_fbdev, helper); - int ret; - ret = drm_fb_helper_pan_display(var, info); - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + ret = drm_fb_helper_pan_display(var, info); + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -121,7 +117,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_mode_fb_cmd2 mode_cmd = {}; - struct drm_i915_gem_object *obj = NULL; + struct drm_i915_gem_object *obj; int size, ret; /* we don't do packed 24bpp */ @@ -136,14 +132,13 @@ static int intelfb_alloc(struct drm_fb_helper *helper, mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); - mutex_lock(&dev->struct_mutex); - size = mode_cmd.pitches[0] * mode_cmd.height; size = PAGE_ALIGN(size); /* If the FB is too big, just don't use it since fbdev is not very * important and we should probably use that space with FBC or other * features. */ + obj = NULL; if (size * 2 < ggtt->stolen_usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); if (obj == NULL) @@ -151,24 +146,22 @@ static int intelfb_alloc(struct drm_fb_helper *helper, if (IS_ERR(obj)) { DRM_ERROR("failed to allocate framebuffer\n"); ret = PTR_ERR(obj); - goto out; + goto err; } - fb = __intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) { - i915_gem_object_put(obj); ret = PTR_ERR(fb); - goto out; + goto err_obj; } - mutex_unlock(&dev->struct_mutex); - ifbdev->fb = to_intel_framebuffer(fb); return 0; -out: - mutex_unlock(&dev->struct_mutex); +err_obj: + i915_gem_object_put(obj); +err: return ret; } @@ -253,7 +246,7 @@ static int intelfb_create(struct drm_fb_helper *helper, if (IS_ERR(vaddr)) { DRM_ERROR("Failed to remap framebuffer into virtual memory\n"); ret = PTR_ERR(vaddr); - goto out_destroy_fbi; + goto out_unpin; } info->screen_base = vaddr; info->screen_size = vma->node.size; @@ -281,8 +274,6 @@ static int intelfb_create(struct drm_fb_helper *helper, vga_switcheroo_client_fb_set(pdev, info); return 0; -out_destroy_fbi: - drm_fb_helper_release_fbi(helper); out_unpin: intel_unpin_fb_vma(vma); out_unlock: @@ -370,7 +361,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, return false; memcpy(save_enabled, enabled, count); - mask = BIT(count) - 1; + mask = GENMASK(count - 1, 0); conn_configured = 0; retry: conn_seq = conn_configured; @@ -541,7 +532,6 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) */ drm_fb_helper_unregister_fbi(&ifbdev->helper); - drm_fb_helper_release_fbi(&ifbdev->helper); drm_fb_helper_fini(&ifbdev->helper); @@ -629,9 +619,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, } cur_size = intel_crtc->config->base.adjusted_mode.crtc_vdisplay; - cur_size = intel_fb_align_height(dev, cur_size, - fb->base.format->format, - fb->base.modifier); + cur_size = intel_fb_align_height(&fb->base, 0, cur_size); cur_size *= fb->base.pitches[0]; DRM_DEBUG_KMS("pipe %c area: %dx%d, bpp: %d, size: %d\n", pipe_name(intel_crtc->pipe), @@ -839,11 +827,6 @@ void intel_fbdev_restore_mode(struct drm_device *dev) if (!ifbdev->fb) return; - if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper)) { - DRM_DEBUG("failed to restore crtc mode\n"); - } else { - mutex_lock(&dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&dev->struct_mutex); - } + if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper) == 0) + intel_fbdev_invalidate(ifbdev); } diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index e660d8b4bbc3..966e255ca053 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -54,7 +54,7 @@ static bool ivb_can_enable_err_int(struct drm_device *dev) struct intel_crtc *crtc; enum pipe pipe; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { crtc = intel_get_crtc_for_pipe(dev_priv, pipe); @@ -72,7 +72,7 @@ static bool cpt_can_enable_serr_int(struct drm_device *dev) enum pipe pipe; struct intel_crtc *crtc; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { crtc = intel_get_crtc_for_pipe(dev_priv, pipe); @@ -90,7 +90,7 @@ static void i9xx_check_fifo_underruns(struct intel_crtc *crtc) i915_reg_t reg = PIPESTAT(crtc->pipe); u32 pipestat = I915_READ(reg) & 0xffff0000; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if ((pipestat & PIPE_FIFO_UNDERRUN_STATUS) == 0) return; @@ -98,6 +98,7 @@ static void i9xx_check_fifo_underruns(struct intel_crtc *crtc) I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS); POSTING_READ(reg); + trace_intel_cpu_fifo_underrun(dev_priv, crtc->pipe); DRM_ERROR("pipe %c underrun\n", pipe_name(crtc->pipe)); } @@ -109,7 +110,7 @@ static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev, i915_reg_t reg = PIPESTAT(pipe); u32 pipestat = I915_READ(reg) & 0xffff0000; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (enable) { I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS); @@ -139,7 +140,7 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) enum pipe pipe = crtc->pipe; uint32_t err_int = I915_READ(GEN7_ERR_INT); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if ((err_int & ERR_INT_FIFO_UNDERRUN(pipe)) == 0) return; @@ -147,6 +148,7 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) I915_WRITE(GEN7_ERR_INT, ERR_INT_FIFO_UNDERRUN(pipe)); POSTING_READ(GEN7_ERR_INT); + trace_intel_cpu_fifo_underrun(dev_priv, pipe); DRM_ERROR("fifo underrun on pipe %c\n", pipe_name(pipe)); } @@ -204,7 +206,7 @@ static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc) enum transcoder pch_transcoder = (enum transcoder) crtc->pipe; uint32_t serr_int = I915_READ(SERR_INT); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if ((serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)) == 0) return; @@ -212,6 +214,7 @@ static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc) I915_WRITE(SERR_INT, SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)); POSTING_READ(SERR_INT); + trace_intel_pch_fifo_underrun(dev_priv, pch_transcoder); DRM_ERROR("pch fifo underrun on pch transcoder %s\n", transcoder_name(pch_transcoder)); } @@ -248,7 +251,7 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); bool old; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); old = !crtc->cpu_fifo_underrun_disabled; crtc->cpu_fifo_underrun_disabled = !enable; @@ -368,9 +371,11 @@ void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, crtc->cpu_fifo_underrun_disabled) return; - if (intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false)) + if (intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false)) { + trace_intel_cpu_fifo_underrun(dev_priv, pipe); DRM_ERROR("CPU pipe %c FIFO underrun\n", pipe_name(pipe)); + } intel_fbc_handle_fifo_underrun_irq(dev_priv); } @@ -388,9 +393,11 @@ void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, enum transcoder pch_transcoder) { if (intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, - false)) + false)) { + trace_intel_pch_fifo_underrun(dev_priv, pch_transcoder); DRM_ERROR("PCH transcoder %s FIFO underrun\n", transcoder_name(pch_transcoder)); + } } /** diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index 966de4c7c7a2..fcfc217e754e 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -114,13 +114,12 @@ static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv, } void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin, unsigned int frontbuffer_bits) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - if (retire) { + if (origin == ORIGIN_CS) { spin_lock(&dev_priv->fb_tracking.lock); /* Filter out new bits since rendering started. */ frontbuffer_bits &= dev_priv->fb_tracking.busy_bits; diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h index 7bab41218cf7..63cd9a753a72 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.h +++ b/drivers/gpu/drm/i915/intel_frontbuffer.h @@ -38,7 +38,6 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, enum fb_op_origin origin, unsigned int frontbuffer_bits); void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin, unsigned int frontbuffer_bits); @@ -69,15 +68,12 @@ static inline bool intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, /** * intel_fb_obj_flush - flush frontbuffer object * @obj: GEM object to flush - * @retire: set when retiring asynchronous rendering * @origin: which operation caused the flush * * This function gets called every time rendering on the given object has - * completed and frontbuffer caching can be started again. If @retire is true - * then any delayed flushes will be unblocked. + * completed and frontbuffer caching can be started again. */ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin) { unsigned int frontbuffer_bits; @@ -86,7 +82,7 @@ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, if (!frontbuffer_bits) return; - __intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits); + __intel_fb_obj_flush(obj, origin, frontbuffer_bits); } #endif /* __INTEL_FRONTBUFFER_H__ */ diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 25691f0e4c50..cb36cbf3818f 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -26,14 +26,14 @@ #define GFXCORE_FAMILY_GEN9 12 #define GFXCORE_FAMILY_UNKNOWN 0x7fffffff -#define GUC_CTX_PRIORITY_KMD_HIGH 0 -#define GUC_CTX_PRIORITY_HIGH 1 -#define GUC_CTX_PRIORITY_KMD_NORMAL 2 -#define GUC_CTX_PRIORITY_NORMAL 3 -#define GUC_CTX_PRIORITY_NUM 4 +#define GUC_CLIENT_PRIORITY_KMD_HIGH 0 +#define GUC_CLIENT_PRIORITY_HIGH 1 +#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 +#define GUC_CLIENT_PRIORITY_NORMAL 3 +#define GUC_CLIENT_PRIORITY_NUM 4 -#define GUC_MAX_GPU_CONTEXTS 1024 -#define GUC_INVALID_CTX_ID GUC_MAX_GPU_CONTEXTS +#define GUC_MAX_STAGE_DESCRIPTORS 1024 +#define GUC_INVALID_STAGE_ID GUC_MAX_STAGE_DESCRIPTORS #define GUC_RENDER_ENGINE 0 #define GUC_VIDEO_ENGINE 1 @@ -68,14 +68,14 @@ #define GUC_DOORBELL_ENABLED 1 #define GUC_DOORBELL_DISABLED 0 -#define GUC_CTX_DESC_ATTR_ACTIVE (1 << 0) -#define GUC_CTX_DESC_ATTR_PENDING_DB (1 << 1) -#define GUC_CTX_DESC_ATTR_KERNEL (1 << 2) -#define GUC_CTX_DESC_ATTR_PREEMPT (1 << 3) -#define GUC_CTX_DESC_ATTR_RESET (1 << 4) -#define GUC_CTX_DESC_ATTR_WQLOCKED (1 << 5) -#define GUC_CTX_DESC_ATTR_PCH (1 << 6) -#define GUC_CTX_DESC_ATTR_TERMINATED (1 << 7) +#define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0) +#define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1) +#define GUC_STAGE_DESC_ATTR_KERNEL BIT(2) +#define GUC_STAGE_DESC_ATTR_PREEMPT BIT(3) +#define GUC_STAGE_DESC_ATTR_RESET BIT(4) +#define GUC_STAGE_DESC_ATTR_WQLOCKED BIT(5) +#define GUC_STAGE_DESC_ATTR_PCH BIT(6) +#define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7) /* The guc control data is 10 DWORDs */ #define GUC_CTL_CTXINFO 0 @@ -241,8 +241,8 @@ union guc_doorbell_qw { u64 value_qw; } __packed; -#define GUC_MAX_DOORBELLS 256 -#define GUC_INVALID_DOORBELL_ID (GUC_MAX_DOORBELLS) +#define GUC_NUM_DOORBELLS 256 +#define GUC_DOORBELL_INVALID (GUC_NUM_DOORBELLS) #define GUC_DB_SIZE (PAGE_SIZE) #define GUC_WQ_SIZE (PAGE_SIZE * 2) @@ -251,12 +251,12 @@ union guc_doorbell_qw { struct guc_wq_item { u32 header; u32 context_desc; - u32 ring_tail; + u32 submit_element_info; u32 fence_id; } __packed; struct guc_process_desc { - u32 context_id; + u32 stage_id; u64 db_base_addr; u32 head; u32 tail; @@ -278,7 +278,7 @@ struct guc_execlist_context { u32 context_desc; u32 context_id; u32 ring_status; - u32 ring_lcra; + u32 ring_lrca; u32 ring_begin; u32 ring_end; u32 ring_next_free_location; @@ -289,10 +289,18 @@ struct guc_execlist_context { u16 engine_submit_queue_count; } __packed; -/*Context descriptor for communicating between uKernel and Driver*/ -struct guc_context_desc { +/* + * This structure describes a stage set arranged for a particular communication + * between uKernel (GuC) and Driver (KMD). Technically, this is known as a + * "GuC Context descriptor" in the specs, but we use the term "stage descriptor" + * to avoid confusion with all the other things already named "context" in the + * driver. A static pool of these descriptors are stored inside a GEM object + * (stage_desc_pool) which is held for the entire lifetime of our interaction + * with the GuC, being allocated before the GuC is loaded with its firmware. + */ +struct guc_stage_desc { u32 sched_common_area; - u32 context_id; + u32 stage_id; u32 pas_id; u8 engines_used; u64 db_trigger_cpu; @@ -359,7 +367,7 @@ struct guc_policy { } __packed; struct guc_policies { - struct guc_policy policy[GUC_CTX_PRIORITY_NUM][GUC_MAX_ENGINES_NUM]; + struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINES_NUM]; /* In micro seconds. How much time to allow before DPC processing is * called back via interrupt (to prevent DPC queue drain starving). @@ -401,16 +409,17 @@ struct guc_mmio_regset { u32 number_of_registers; } __packed; +/* MMIO registers that are set as non privileged */ +struct mmio_white_list { + u32 mmio_start; + u32 offsets[GUC_MMIO_WHITE_LIST_MAX]; + u32 count; +} __packed; + struct guc_mmio_reg_state { struct guc_mmio_regset global_reg; struct guc_mmio_regset engine_reg[GUC_MAX_ENGINES_NUM]; - - /* MMIO registers that are set as non privileged */ - struct __packed { - u32 mmio_start; - u32 offsets[GUC_MMIO_WHITE_LIST_MAX]; - u32 count; - } mmio_white_list[GUC_MAX_ENGINES_NUM]; + struct mmio_white_list white_list[GUC_MAX_ENGINES_NUM]; } __packed; /* GuC Additional Data Struct */ diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 2f1cf9aea04e..8a1a023e48b2 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -26,7 +26,6 @@ * Dave Gordon <david.s.gordon@intel.com> * Alex Dai <yu.dai@intel.com> */ -#include <linux/firmware.h> #include "i915_drv.h" #include "intel_uc.h" @@ -74,86 +73,6 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE); #define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR) MODULE_FIRMWARE(I915_KBL_GUC_UCODE); -/* User-friendly representation of an enum */ -const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) -{ - switch (status) { - case INTEL_UC_FIRMWARE_FAIL: - return "FAIL"; - case INTEL_UC_FIRMWARE_NONE: - return "NONE"; - case INTEL_UC_FIRMWARE_PENDING: - return "PENDING"; - case INTEL_UC_FIRMWARE_SUCCESS: - return "SUCCESS"; - default: - return "UNKNOWN!"; - } -}; - -static void guc_interrupts_release(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int irqs; - - /* tell all command streamers NOT to forward interrupts or vblank to GuC */ - irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); - irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MODE_GEN7(engine), irqs); - - /* route all GT interrupts to the host */ - I915_WRITE(GUC_BCS_RCS_IER, 0); - I915_WRITE(GUC_VCS2_VCS1_IER, 0); - I915_WRITE(GUC_WD_VECS_IER, 0); -} - -static void guc_interrupts_capture(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int irqs; - u32 tmp; - - /* tell all command streamers to forward interrupts (but not vblank) to GuC */ - irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MODE_GEN7(engine), irqs); - - /* route USER_INTERRUPT to Host, all others are sent to GuC. */ - irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; - /* These three registers have the same bit definitions */ - I915_WRITE(GUC_BCS_RCS_IER, ~irqs); - I915_WRITE(GUC_VCS2_VCS1_IER, ~irqs); - I915_WRITE(GUC_WD_VECS_IER, ~irqs); - - /* - * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all - * (unmasked) PM interrupts to the GuC. All other bits of this - * register *disable* generation of a specific interrupt. - * - * 'pm_intr_keep' indicates bits that are NOT to be set when - * writing to the PM interrupt mask register, i.e. interrupts - * that must not be disabled. - * - * If the GuC is handling these interrupts, then we must not let - * the PM code disable ANY interrupt that the GuC is expecting. - * So for each ENABLED (0) bit in this register, we must SET the - * bit in pm_intr_keep so that it's left enabled for the GuC. - * - * OTOH the REDIRECT_TO_GUC bit is initially SET in pm_intr_keep - * (so interrupts go to the DISPLAY unit at first); but here we - * need to CLEAR that bit, which will result in the register bit - * being left SET! - */ - tmp = I915_READ(GEN6_PMINTRMSK); - if (tmp & GEN8_PMINTR_REDIRECT_TO_GUC) { - dev_priv->rps.pm_intr_keep |= ~tmp; - dev_priv->rps.pm_intr_keep &= ~GEN8_PMINTR_REDIRECT_TO_GUC; - } -} static u32 get_gttype(struct drm_i915_private *dev_priv) { @@ -213,16 +132,14 @@ static void guc_params_init(struct drm_i915_private *dev_priv) } else params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED; - if (guc->ads_vma) { + /* If GuC submission is enabled, set up additional parameters here */ + if (i915.enable_guc_submission) { u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; + u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool); + u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16; + params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; - } - - /* If GuC submission is enabled, set up additional parameters here */ - if (i915.enable_guc_submission) { - u32 pgs = guc_ggtt_offset(dev_priv->guc.ctx_pool_vma); - u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; pgs >>= PAGE_SHIFT; params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) | @@ -409,399 +326,89 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) return ret; } -static int guc_hw_reset(struct drm_i915_private *dev_priv) -{ - int ret; - u32 guc_status; - - ret = intel_guc_reset(dev_priv); - if (ret) { - DRM_ERROR("GuC reset failed, ret = %d\n", ret); - return ret; - } - - guc_status = I915_READ(GUC_STATUS); - WARN(!(guc_status & GS_MIA_IN_RESET), - "GuC status: 0x%x, MIA core expected to be in reset\n", guc_status); - - return ret; -} - /** - * intel_guc_setup() - finish preparing the GuC for activity - * @dev_priv: i915 device private + * intel_guc_init_hw() - finish preparing the GuC for activity + * @guc: intel_guc structure * - * Called from gem_init_hw() during driver loading and also after a GPU reset. + * Called during driver loading and also after a GPU reset. * * The main action required here it to load the GuC uCode into the device. * The firmware image should have already been fetched into memory by the - * earlier call to intel_guc_init(), so here we need only check that worked, - * and then transfer the image to the h/w. + * earlier call to intel_guc_init(), so here we need only check that + * worked, and then transfer the image to the h/w. * * Return: non-zero code on error */ -int intel_guc_setup(struct drm_i915_private *dev_priv) +int intel_guc_init_hw(struct intel_guc *guc) { - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; - const char *fw_path = guc_fw->path; - int retries, ret, err; + struct drm_i915_private *dev_priv = guc_to_i915(guc); + const char *fw_path = guc->fw.path; + int ret; DRM_DEBUG_DRIVER("GuC fw status: path %s, fetch %s, load %s\n", fw_path, - intel_uc_fw_status_repr(guc_fw->fetch_status), - intel_uc_fw_status_repr(guc_fw->load_status)); - - /* Loading forbidden, or no firmware to load? */ - if (!i915.enable_guc_loading) { - err = 0; - goto fail; - } else if (fw_path == NULL) { - /* Device is known to have no uCode (e.g. no GuC) */ - err = -ENXIO; - goto fail; - } else if (*fw_path == '\0') { - /* Device has a GuC but we don't know what f/w to load? */ - WARN(1, "No GuC firmware known for this platform!\n"); - err = -ENODEV; - goto fail; - } - - /* Fetch failed, or already fetched but failed to load? */ - if (guc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) { - err = -EIO; - goto fail; - } else if (guc_fw->load_status == INTEL_UC_FIRMWARE_FAIL) { - err = -ENOEXEC; - goto fail; - } + intel_uc_fw_status_repr(guc->fw.fetch_status), + intel_uc_fw_status_repr(guc->fw.load_status)); - guc_interrupts_release(dev_priv); - gen9_reset_guc_interrupts(dev_priv); + if (guc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return -EIO; - /* We need to notify the guc whenever we change the GGTT */ - i915_ggtt_enable_guc(dev_priv); - - guc_fw->load_status = INTEL_UC_FIRMWARE_PENDING; + guc->fw.load_status = INTEL_UC_FIRMWARE_PENDING; DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", - intel_uc_fw_status_repr(guc_fw->fetch_status), - intel_uc_fw_status_repr(guc_fw->load_status)); - - err = i915_guc_submission_init(dev_priv); - if (err) - goto fail; - - /* - * WaEnableuKernelHeaderValidFix:skl,bxt - * For BXT, this is only upto B0 but below WA is required for later - * steppings also so this is extended as well. - */ - /* WaEnableGuCBootHashCheckNotSet:skl,bxt */ - for (retries = 3; ; ) { - /* - * Always reset the GuC just before (re)loading, so - * that the state and timing are fairly predictable - */ - err = guc_hw_reset(dev_priv); - if (err) - goto fail; - - intel_huc_load(dev_priv); - err = guc_ucode_xfer(dev_priv); - if (!err) - break; - - if (--retries == 0) - goto fail; - - DRM_INFO("GuC fw load failed: %d; will reset and " - "retry %d more time(s)\n", err, retries); - } + intel_uc_fw_status_repr(guc->fw.fetch_status), + intel_uc_fw_status_repr(guc->fw.load_status)); - guc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; + ret = guc_ucode_xfer(dev_priv); - DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", - intel_uc_fw_status_repr(guc_fw->fetch_status), - intel_uc_fw_status_repr(guc_fw->load_status)); + if (ret) + return -EAGAIN; - intel_guc_auth_huc(dev_priv); + guc->fw.load_status = INTEL_UC_FIRMWARE_SUCCESS; - if (i915.enable_guc_submission) { - if (i915.guc_log_level >= 0) - gen9_enable_guc_interrupts(dev_priv); - - err = i915_guc_submission_enable(dev_priv); - if (err) - goto fail; - guc_interrupts_capture(dev_priv); - } + DRM_INFO("GuC %s (firmware %s [version %u.%u])\n", + i915.enable_guc_submission ? "submission enabled" : "loaded", + guc->fw.path, + guc->fw.major_ver_found, guc->fw.minor_ver_found); return 0; - -fail: - if (guc_fw->load_status == INTEL_UC_FIRMWARE_PENDING) - guc_fw->load_status = INTEL_UC_FIRMWARE_FAIL; - - guc_interrupts_release(dev_priv); - i915_guc_submission_disable(dev_priv); - i915_guc_submission_fini(dev_priv); - i915_ggtt_disable_guc(dev_priv); - - /* - * We've failed to load the firmware :( - * - * Decide whether to disable GuC submission and fall back to - * execlist mode, and whether to hide the error by returning - * zero or to return -EIO, which the caller will treat as a - * nonfatal error (i.e. it doesn't prevent driver load, but - * marks the GPU as wedged until reset). - */ - if (i915.enable_guc_loading > 1) { - ret = -EIO; - } else if (i915.enable_guc_submission > 1) { - ret = -EIO; - } else { - ret = 0; - } - - if (err == 0 && !HAS_GUC_UCODE(dev_priv)) - ; /* Don't mention the GuC! */ - else if (err == 0) - DRM_INFO("GuC firmware load skipped\n"); - else if (ret != -EIO) - DRM_NOTE("GuC firmware load failed: %d\n", err); - else - DRM_WARN("GuC firmware load failed: %d\n", err); - - if (i915.enable_guc_submission) { - if (fw_path == NULL) - DRM_INFO("GuC submission without firmware not supported\n"); - if (ret == 0) - DRM_NOTE("Falling back from GuC submission to execlist mode\n"); - else - DRM_ERROR("GuC init failed: %d\n", ret); - } - i915.enable_guc_submission = 0; - - return ret; -} - -void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, - struct intel_uc_fw *uc_fw) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_i915_gem_object *obj; - const struct firmware *fw = NULL; - struct uc_css_header *css; - size_t size; - int err; - - DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", - intel_uc_fw_status_repr(uc_fw->fetch_status)); - - err = request_firmware(&fw, uc_fw->path, &pdev->dev); - if (err) - goto fail; - if (!fw) - goto fail; - - DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", - uc_fw->path, fw); - - /* Check the size of the blob before examining buffer contents */ - if (fw->size < sizeof(struct uc_css_header)) { - DRM_NOTE("Firmware header is missing\n"); - goto fail; - } - - css = (struct uc_css_header *)fw->data; - - /* Firmware bits always start from header */ - uc_fw->header_offset = 0; - uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - - css->key_size_dw - css->exponent_size_dw) * sizeof(u32); - - if (uc_fw->header_size != sizeof(struct uc_css_header)) { - DRM_NOTE("CSS header definition mismatch\n"); - goto fail; - } - - /* then, uCode */ - uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; - uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); - - /* now RSA */ - if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_NOTE("RSA key size is bad\n"); - goto fail; - } - uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; - uc_fw->rsa_size = css->key_size_dw * sizeof(u32); - - /* At least, it should have header, uCode and RSA. Size of all three. */ - size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; - if (fw->size < size) { - DRM_NOTE("Missing firmware components\n"); - goto fail; - } - - /* - * The GuC firmware image has the version number embedded at a well-known - * offset within the firmware blob; note that major / minor version are - * TWO bytes each (i.e. u16), although all pointers and offsets are defined - * in terms of bytes (u8). - */ - switch (uc_fw->fw) { - case INTEL_UC_FW_TYPE_GUC: - /* Header and uCode will be loaded to WOPCM. Size of the two. */ - size = uc_fw->header_size + uc_fw->ucode_size; - - /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ - if (size > intel_guc_wopcm_size(dev_priv)) { - DRM_ERROR("Firmware is too large to fit in WOPCM\n"); - goto fail; - } - uc_fw->major_ver_found = css->guc.sw_version >> 16; - uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; - break; - - case INTEL_UC_FW_TYPE_HUC: - uc_fw->major_ver_found = css->huc.sw_version >> 16; - uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; - break; - - default: - DRM_ERROR("Unknown firmware type %d\n", uc_fw->fw); - err = -ENOEXEC; - goto fail; - } - - if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || - uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - DRM_NOTE("uC firmware version %d.%d, required %d.%d\n", - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - err = -ENOEXEC; - goto fail; - } - - DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - - mutex_lock(&dev_priv->drm.struct_mutex); - obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); - mutex_unlock(&dev_priv->drm.struct_mutex); - if (IS_ERR_OR_NULL(obj)) { - err = obj ? PTR_ERR(obj) : -ENOMEM; - goto fail; - } - - uc_fw->obj = obj; - uc_fw->size = fw->size; - - DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", - uc_fw->obj); - - release_firmware(fw); - uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; - return; - -fail: - DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", - uc_fw->path, err); - DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", - err, fw, uc_fw->obj); - - mutex_lock(&dev_priv->drm.struct_mutex); - obj = uc_fw->obj; - if (obj) - i915_gem_object_put(obj); - uc_fw->obj = NULL; - mutex_unlock(&dev_priv->drm.struct_mutex); - - release_firmware(fw); /* OK even if fw is NULL */ - uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; } /** - * intel_guc_init() - define parameters and fetch firmware - * @dev_priv: i915 device private - * - * Called early during driver load, but after GEM is initialised. + * intel_guc_select_fw() - selects GuC firmware for loading + * @guc: intel_guc struct * - * The firmware will be transferred to the GuC's memory later, - * when intel_guc_setup() is called. + * Return: zero when we know firmware, non-zero in other case */ -void intel_guc_init(struct drm_i915_private *dev_priv) +int intel_guc_select_fw(struct intel_guc *guc) { - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; - const char *fw_path; + struct drm_i915_private *dev_priv = guc_to_i915(guc); - if (!HAS_GUC(dev_priv)) { - i915.enable_guc_loading = 0; - i915.enable_guc_submission = 0; - } else { - /* A negative value means "use platform default" */ - if (i915.enable_guc_loading < 0) - i915.enable_guc_loading = HAS_GUC_UCODE(dev_priv); - if (i915.enable_guc_submission < 0) - i915.enable_guc_submission = HAS_GUC_SCHED(dev_priv); - } + guc->fw.path = NULL; + guc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE; + guc->fw.load_status = INTEL_UC_FIRMWARE_NONE; + guc->fw.type = INTEL_UC_FW_TYPE_GUC; - if (!HAS_GUC_UCODE(dev_priv)) { - fw_path = NULL; + if (i915.guc_firmware_path) { + guc->fw.path = i915.guc_firmware_path; + guc->fw.major_ver_wanted = 0; + guc->fw.minor_ver_wanted = 0; } else if (IS_SKYLAKE(dev_priv)) { - fw_path = I915_SKL_GUC_UCODE; - guc_fw->major_ver_wanted = SKL_FW_MAJOR; - guc_fw->minor_ver_wanted = SKL_FW_MINOR; + guc->fw.path = I915_SKL_GUC_UCODE; + guc->fw.major_ver_wanted = SKL_FW_MAJOR; + guc->fw.minor_ver_wanted = SKL_FW_MINOR; } else if (IS_BROXTON(dev_priv)) { - fw_path = I915_BXT_GUC_UCODE; - guc_fw->major_ver_wanted = BXT_FW_MAJOR; - guc_fw->minor_ver_wanted = BXT_FW_MINOR; + guc->fw.path = I915_BXT_GUC_UCODE; + guc->fw.major_ver_wanted = BXT_FW_MAJOR; + guc->fw.minor_ver_wanted = BXT_FW_MINOR; } else if (IS_KABYLAKE(dev_priv)) { - fw_path = I915_KBL_GUC_UCODE; - guc_fw->major_ver_wanted = KBL_FW_MAJOR; - guc_fw->minor_ver_wanted = KBL_FW_MINOR; + guc->fw.path = I915_KBL_GUC_UCODE; + guc->fw.major_ver_wanted = KBL_FW_MAJOR; + guc->fw.minor_ver_wanted = KBL_FW_MINOR; } else { - fw_path = ""; /* unknown device */ + DRM_ERROR("No GuC firmware known for platform with GuC!\n"); + return -ENOENT; } - guc_fw->path = fw_path; - guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; - guc_fw->load_status = INTEL_UC_FIRMWARE_NONE; - - /* Early (and silent) return if GuC loading is disabled */ - if (!i915.enable_guc_loading) - return; - if (fw_path == NULL) - return; - if (*fw_path == '\0') - return; - - guc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; - DRM_DEBUG_DRIVER("GuC firmware pending, path %s\n", fw_path); - intel_uc_fw_fetch(dev_priv, guc_fw); - /* status must now be FAIL or SUCCESS */ -} - -/** - * intel_guc_fini() - clean up all allocated resources - * @dev_priv: i915 device private - */ -void intel_guc_fini(struct drm_i915_private *dev_priv) -{ - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; - - mutex_lock(&dev_priv->drm.struct_mutex); - guc_interrupts_release(dev_priv); - i915_guc_submission_disable(dev_priv); - i915_guc_submission_fini(dev_priv); - - if (guc_fw->obj) - i915_gem_object_put(guc_fw->obj); - guc_fw->obj = NULL; - mutex_unlock(&dev_priv->drm.struct_mutex); - - guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; + return 0; } diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 5c0f9a49da0e..6fb63a3c65b0 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -66,7 +66,6 @@ static int guc_log_control(struct intel_guc *guc, u32 control_val) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } - /* * Sub buffer switch callback. Called whenever relay has to switch to a new * sub buffer, relay stays on the same sub buffer if 0 is returned. @@ -139,45 +138,15 @@ static struct rchan_callbacks relay_callbacks = { .remove_buf_file = remove_buf_file_callback, }; -static void guc_log_remove_relay_file(struct intel_guc *guc) -{ - relay_close(guc->log.relay_chan); -} - -static int guc_log_create_relay_channel(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct rchan *guc_log_relay_chan; - size_t n_subbufs, subbuf_size; - - /* Keep the size of sub buffers same as shared log buffer */ - subbuf_size = guc->log.vma->obj->base.size; - - /* Store up to 8 snapshots, which is large enough to buffer sufficient - * boot time logs and provides enough leeway to User, in terms of - * latency, for consuming the logs from relay. Also doesn't take - * up too much memory. - */ - n_subbufs = 8; - - guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size, - n_subbufs, &relay_callbacks, dev_priv); - if (!guc_log_relay_chan) { - DRM_ERROR("Couldn't create relay chan for GuC logging\n"); - return -ENOMEM; - } - - GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); - guc->log.relay_chan = guc_log_relay_chan; - return 0; -} - -static int guc_log_create_relay_file(struct intel_guc *guc) +static int guc_log_relay_file_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct dentry *log_dir; int ret; + if (i915.guc_log_level < 0) + return 0; + /* For now create the log file in /sys/kernel/debug/dri/0 dir */ log_dir = dev_priv->drm.primary->debugfs_root; @@ -197,8 +166,8 @@ static int guc_log_create_relay_file(struct intel_guc *guc) return -ENODEV; } - ret = relay_late_setup_files(guc->log.relay_chan, "guc_log", log_dir); - if (ret) { + ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir); + if (ret < 0 && ret != -EEXIST) { DRM_ERROR("Couldn't associate relay chan with file %d\n", ret); return ret; } @@ -214,15 +183,15 @@ static void guc_move_to_next_buf(struct intel_guc *guc) smp_wmb(); /* All data has been written, so now move the offset of sub buffer. */ - relay_reserve(guc->log.relay_chan, guc->log.vma->obj->base.size); + relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size); /* Switch to the next sub buffer */ - relay_flush(guc->log.relay_chan); + relay_flush(guc->log.runtime.relay_chan); } static void *guc_get_write_buffer(struct intel_guc *guc) { - if (!guc->log.relay_chan) + if (!guc->log.runtime.relay_chan) return NULL; /* Just get the base address of a new sub buffer and copy data into it @@ -233,7 +202,7 @@ static void *guc_get_write_buffer(struct intel_guc *guc) * done without using relay_reserve() along with relay_write(). So its * better to use relay_reserve() alone. */ - return relay_reserve(guc->log.relay_chan, 0); + return relay_reserve(guc->log.runtime.relay_chan, 0); } static bool guc_check_log_buf_overflow(struct intel_guc *guc, @@ -284,11 +253,11 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) void *src_data, *dst_data; bool new_overflow; - if (WARN_ON(!guc->log.buf_addr)) + if (WARN_ON(!guc->log.runtime.buf_addr)) return; /* Get the pointer to shared GuC log buffer */ - log_buf_state = src_data = guc->log.buf_addr; + log_buf_state = src_data = guc->log.runtime.buf_addr; /* Get the pointer to local buffer to store the logs */ log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc); @@ -371,153 +340,113 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) } } -static void guc_log_cleanup(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - /* First disable the flush interrupt */ - gen9_disable_guc_interrupts(dev_priv); - - if (guc->log.flush_wq) - destroy_workqueue(guc->log.flush_wq); - - guc->log.flush_wq = NULL; - - if (guc->log.relay_chan) - guc_log_remove_relay_file(guc); - - guc->log.relay_chan = NULL; - - if (guc->log.buf_addr) - i915_gem_object_unpin_map(guc->log.vma->obj); - - guc->log.buf_addr = NULL; -} - static void capture_logs_work(struct work_struct *work) { struct intel_guc *guc = - container_of(work, struct intel_guc, log.flush_work); + container_of(work, struct intel_guc, log.runtime.flush_work); guc_log_capture_logs(guc); } -static int guc_log_create_extras(struct intel_guc *guc) +static bool guc_log_has_runtime(struct intel_guc *guc) +{ + return guc->log.runtime.buf_addr != NULL; +} + +static int guc_log_runtime_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); void *vaddr; - int ret; + struct rchan *guc_log_relay_chan; + size_t n_subbufs, subbuf_size; + int ret = 0; lockdep_assert_held(&dev_priv->drm.struct_mutex); - /* Nothing to do */ - if (i915.guc_log_level < 0) - return 0; - - if (!guc->log.buf_addr) { - /* Create a WC (Uncached for read) vmalloc mapping of log - * buffer pages, so that we can directly get the data - * (up-to-date) from memory. - */ - vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - DRM_ERROR("Couldn't map log buffer pages %d\n", ret); - return ret; - } + GEM_BUG_ON(guc_log_has_runtime(guc)); - guc->log.buf_addr = vaddr; + /* Create a WC (Uncached for read) vmalloc mapping of log + * buffer pages, so that we can directly get the data + * (up-to-date) from memory. + */ + vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + DRM_ERROR("Couldn't map log buffer pages %d\n", ret); + return PTR_ERR(vaddr); } - if (!guc->log.relay_chan) { - /* Create a relay channel, so that we have buffers for storing - * the GuC firmware logs, the channel will be linked with a file - * later on when debugfs is registered. - */ - ret = guc_log_create_relay_channel(guc); - if (ret) - return ret; - } + guc->log.runtime.buf_addr = vaddr; - if (!guc->log.flush_wq) { - INIT_WORK(&guc->log.flush_work, capture_logs_work); - - /* - * GuC log buffer flush work item has to do register access to - * send the ack to GuC and this work item, if not synced before - * suspend, can potentially get executed after the GFX device is - * suspended. - * By marking the WQ as freezable, we don't have to bother about - * flushing of this work item from the suspend hooks, the pending - * work item if any will be either executed before the suspend - * or scheduled later on resume. This way the handling of work - * item can be kept same between system suspend & rpm suspend. - */ - guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", - WQ_HIGHPRI | WQ_FREEZABLE); - if (guc->log.flush_wq == NULL) { - DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); - return -ENOMEM; - } - } - - return 0; -} - -void intel_guc_log_create(struct intel_guc *guc) -{ - struct i915_vma *vma; - unsigned long offset; - uint32_t size, flags; + /* Keep the size of sub buffers same as shared log buffer */ + subbuf_size = guc->log.vma->obj->base.size; - if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) - i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; + /* Store up to 8 snapshots, which is large enough to buffer sufficient + * boot time logs and provides enough leeway to User, in terms of + * latency, for consuming the logs from relay. Also doesn't take + * up too much memory. + */ + n_subbufs = 8; - /* The first page is to save log buffer state. Allocate one - * extra page for others in case for overlap */ - size = (1 + GUC_LOG_DPC_PAGES + 1 + - GUC_LOG_ISR_PAGES + 1 + - GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; + /* Create a relay channel, so that we have buffers for storing + * the GuC firmware logs, the channel will be linked with a file + * later on when debugfs is registered. + */ + guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size, + n_subbufs, &relay_callbacks, dev_priv); + if (!guc_log_relay_chan) { + DRM_ERROR("Couldn't create relay chan for GuC logging\n"); - vma = guc->log.vma; - if (!vma) { - /* We require SSE 4.1 for fast reads from the GuC log buffer and - * it should be present on the chipsets supporting GuC based - * submisssions. - */ - if (WARN_ON(!i915_has_memcpy_from_wc())) { - /* logging will not be enabled */ - i915.guc_log_level = -1; - return; - } + ret = -ENOMEM; + goto err_vaddr; + } - vma = intel_guc_allocate_vma(guc, size); - if (IS_ERR(vma)) { - /* logging will be off */ - i915.guc_log_level = -1; - return; - } + GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); + guc->log.runtime.relay_chan = guc_log_relay_chan; + + INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work); + + /* + * GuC log buffer flush work item has to do register access to + * send the ack to GuC and this work item, if not synced before + * suspend, can potentially get executed after the GFX device is + * suspended. + * By marking the WQ as freezable, we don't have to bother about + * flushing of this work item from the suspend hooks, the pending + * work item if any will be either executed before the suspend + * or scheduled later on resume. This way the handling of work + * item can be kept same between system suspend & rpm suspend. + */ + guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log", + WQ_HIGHPRI | WQ_FREEZABLE); + if (!guc->log.runtime.flush_wq) { + DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); + ret = -ENOMEM; + goto err_relaychan; + } - guc->log.vma = vma; + return 0; - if (guc_log_create_extras(guc)) { - guc_log_cleanup(guc); - i915_vma_unpin_and_release(&guc->log.vma); - i915.guc_log_level = -1; - return; - } - } +err_relaychan: + relay_close(guc->log.runtime.relay_chan); +err_vaddr: + i915_gem_object_unpin_map(guc->log.vma->obj); + guc->log.runtime.buf_addr = NULL; + return ret; +} - /* each allocated unit is a page */ - flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | - (GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) | - (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | - (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); +static void guc_log_runtime_destroy(struct intel_guc *guc) +{ + /* + * It's possible that the runtime stuff was never allocated because + * guc_log_level was < 0 at the time + **/ + if (!guc_log_has_runtime(guc)) + return; - offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ - guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; + destroy_workqueue(guc->log.runtime.flush_wq); + relay_close(guc->log.runtime.relay_chan); + i915_gem_object_unpin_map(guc->log.vma->obj); + guc->log.runtime.buf_addr = NULL; } static int guc_log_late_setup(struct intel_guc *guc) @@ -527,24 +456,25 @@ static int guc_log_late_setup(struct intel_guc *guc) lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (i915.guc_log_level < 0) - return -EINVAL; - - /* If log_level was set as -1 at boot time, then setup needed to - * handle log buffer flush interrupts would not have been done yet, - * so do that now. - */ - ret = guc_log_create_extras(guc); - if (ret) - goto err; + if (!guc_log_has_runtime(guc)) { + /* If log_level was set as -1 at boot time, then setup needed to + * handle log buffer flush interrupts would not have been done yet, + * so do that now. + */ + ret = guc_log_runtime_create(guc); + if (ret) + goto err; + } - ret = guc_log_create_relay_file(guc); + ret = guc_log_relay_file_create(guc); if (ret) - goto err; + goto err_runtime; return 0; + +err_runtime: + guc_log_runtime_destroy(guc); err: - guc_log_cleanup(guc); /* logging will remain off */ i915.guc_log_level = -1; return ret; @@ -577,7 +507,7 @@ static void guc_flush_logs(struct intel_guc *guc) /* Before initiating the forceful flush, wait for any pending/ongoing * flush to complete otherwise forceful flush may not actually happen. */ - flush_work(&guc->log.flush_work); + flush_work(&guc->log.runtime.flush_work); /* Ask GuC to update the log buffer state */ guc_log_flush(guc); @@ -586,6 +516,72 @@ static void guc_flush_logs(struct intel_guc *guc) guc_log_capture_logs(guc); } +int intel_guc_log_create(struct intel_guc *guc) +{ + struct i915_vma *vma; + unsigned long offset; + uint32_t size, flags; + int ret; + + GEM_BUG_ON(guc->log.vma); + + if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) + i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; + + /* The first page is to save log buffer state. Allocate one + * extra page for others in case for overlap */ + size = (1 + GUC_LOG_DPC_PAGES + 1 + + GUC_LOG_ISR_PAGES + 1 + + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; + + /* We require SSE 4.1 for fast reads from the GuC log buffer and + * it should be present on the chipsets supporting GuC based + * submisssions. + */ + if (WARN_ON(!i915_has_memcpy_from_wc())) { + ret = -EINVAL; + goto err; + } + + vma = intel_guc_allocate_vma(guc, size); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; + } + + guc->log.vma = vma; + + if (i915.guc_log_level >= 0) { + ret = guc_log_runtime_create(guc); + if (ret < 0) + goto err_vma; + } + + /* each allocated unit is a page */ + flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | + (GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) | + (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | + (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); + + offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ + guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; + + return 0; + +err_vma: + i915_vma_unpin_and_release(&guc->log.vma); +err: + /* logging will be off */ + i915.guc_log_level = -1; + return ret; +} + +void intel_guc_log_destroy(struct intel_guc *guc) +{ + guc_log_runtime_destroy(guc); + i915_vma_unpin_and_release(&guc->log.vma); +} + int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) { struct intel_guc *guc = &dev_priv->guc; @@ -609,17 +605,22 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) return ret; } - i915.guc_log_level = log_param.verbosity; + if (log_param.logging_enabled) { + i915.guc_log_level = log_param.verbosity; - /* If log_level was set as -1 at boot time, then the relay channel file - * wouldn't have been created by now and interrupts also would not have - * been enabled. - */ - if (!dev_priv->guc.log.relay_chan) { + /* If log_level was set as -1 at boot time, then the relay channel file + * wouldn't have been created by now and interrupts also would not have + * been enabled. Try again now, just in case. + */ ret = guc_log_late_setup(guc); - if (!ret) - gen9_enable_guc_interrupts(dev_priv); - } else if (!log_param.logging_enabled) { + if (ret < 0) { + DRM_DEBUG_DRIVER("GuC log late setup failed %d\n", ret); + return ret; + } + + /* GuC logging is currently the only user of Guc2Host interrupts */ + gen9_enable_guc_interrupts(dev_priv); + } else { /* Once logging is disabled, GuC won't generate logs & send an * interrupt. But there could be some data in the log buffer * which is yet to be captured. So request GuC to update the log @@ -629,9 +630,6 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) /* As logging is disabled, update log level to reflect that */ i915.guc_log_level = -1; - } else { - /* In case interrupts were disabled, enable them now */ - gen9_enable_guc_interrupts(dev_priv); } return ret; @@ -639,7 +637,7 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) void i915_guc_log_register(struct drm_i915_private *dev_priv) { - if (!i915.enable_guc_submission) + if (!i915.enable_guc_submission || i915.guc_log_level < 0) return; mutex_lock(&dev_priv->drm.struct_mutex); @@ -653,6 +651,8 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv) return; mutex_lock(&dev_priv->drm.struct_mutex); - guc_log_cleanup(&dev_priv->guc); + /* GuC logging is currently the only user of Guc2Host interrupts */ + gen9_disable_guc_interrupts(dev_priv); + guc_log_runtime_destroy(&dev_priv->guc); mutex_unlock(&dev_priv->drm.struct_mutex); } diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index 8c04eca84351..e1ab6432a914 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -45,6 +45,8 @@ static bool is_supported_device(struct drm_i915_private *dev_priv) return true; if (IS_SKYLAKE(dev_priv)) return true; + if (IS_KABYLAKE(dev_priv) && INTEL_DEVID(dev_priv) == 0x591D) + return true; return false; } diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index f05971f5586f..dce742243ba6 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -480,3 +480,7 @@ void intel_hangcheck_init(struct drm_i915_private *i915) INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work, i915_hangcheck_elapsed); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_hangcheck.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 24b2fa5b6282..1d623b5e09d6 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -34,6 +34,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> #include <drm/drm_edid.h> +#include <drm/drm_scdc_helper.h> #include "intel_drv.h" #include <drm/i915_drm.h> #include <drm/intel_lpe_audio.h> @@ -902,12 +903,11 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -927,7 +927,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -1209,6 +1209,8 @@ static int intel_hdmi_source_max_tmds_clock(struct drm_i915_private *dev_priv) { if (IS_G4X(dev_priv)) return 165000; + else if (IS_GEMINILAKE(dev_priv)) + return 594000; else if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8) return 300000; else @@ -1335,6 +1337,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct drm_scdc *scdc = &conn_state->connector->display_info.hdmi.scdc; int clock_8bpc = pipe_config->base.adjusted_mode.crtc_clock; int clock_12bpc = clock_8bpc * 3 / 2; int desired_bpp; @@ -1404,6 +1407,16 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, pipe_config->lane_count = 4; + if (scdc->scrambling.supported && IS_GEMINILAKE(dev_priv)) { + if (scdc->scrambling.low_rates) + pipe_config->hdmi_scrambling = true; + + if (pipe_config->port_clock > 340000) { + pipe_config->hdmi_scrambling = true; + pipe_config->hdmi_high_tmds_clock_ratio = true; + } + } + return true; } @@ -1813,6 +1826,57 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_NONE; } +/* + * intel_hdmi_handle_sink_scrambling: handle sink scrambling/clock ratio setup + * @encoder: intel_encoder + * @connector: drm_connector + * @high_tmds_clock_ratio = bool to indicate if the function needs to set + * or reset the high tmds clock ratio for scrambling + * @scrambling: bool to Indicate if the function needs to set or reset + * sink scrambling + * + * This function handles scrambling on HDMI 2.0 capable sinks. + * If required clock rate is > 340 Mhz && scrambling is supported by sink + * it enables scrambling. This should be called before enabling the HDMI + * 2.0 port, as the sink can choose to disable the scrambling if it doesn't + * detect a scrambled clock within 100 ms. + */ +void intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, + struct drm_connector *connector, + bool high_tmds_clock_ratio, + bool scrambling) +{ + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct drm_i915_private *dev_priv = connector->dev->dev_private; + struct drm_scrambling *sink_scrambling = + &connector->display_info.hdmi.scdc.scrambling; + struct i2c_adapter *adptr = intel_gmbus_get_adapter(dev_priv, + intel_hdmi->ddc_bus); + bool ret; + + if (!sink_scrambling->supported) + return; + + DRM_DEBUG_KMS("Setting sink scrambling for enc:%s connector:%s\n", + encoder->base.name, connector->name); + + /* Set TMDS bit clock ratio to 1/40 or 1/10 */ + ret = drm_scdc_set_high_tmds_clock_ratio(adptr, high_tmds_clock_ratio); + if (!ret) { + DRM_ERROR("Set TMDS ratio failed\n"); + return; + } + + /* Enable/disable sink scrambling */ + ret = drm_scdc_set_scrambling(adptr, scrambling); + if (!ret) { + DRM_ERROR("Set sink scrambling failed\n"); + return; + } + + DRM_DEBUG_KMS("sink scrambling handled\n"); +} + static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv, enum port port) { @@ -1887,14 +1951,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, switch (port) { case PORT_B: - /* - * On BXT A0/A1, sw needs to activate DDIA HPD logic and - * interrupts to check the external panel connection. - */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - intel_encoder->hpd_pin = HPD_PORT_A; - else - intel_encoder->hpd_pin = HPD_PORT_B; + intel_encoder->hpd_pin = HPD_PORT_B; break; case PORT_C: intel_encoder->hpd_pin = HPD_PORT_C; @@ -2006,6 +2063,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, } intel_encoder->type = INTEL_OUTPUT_HDMI; + intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 54208bef7a83..f1200272a699 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -100,7 +100,6 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port) } #define HPD_STORM_DETECT_PERIOD 1000 -#define HPD_STORM_THRESHOLD 5 #define HPD_STORM_REENABLE_DELAY (2 * 60 * 1000) /** @@ -112,9 +111,13 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port) * storms. Only the pin specific stats and state are changed, the caller is * responsible for further action. * - * @HPD_STORM_THRESHOLD irqs are allowed within @HPD_STORM_DETECT_PERIOD ms, - * otherwise it's considered an irq storm, and the irq state is set to - * @HPD_MARK_DISABLED. + * The number of irqs that are allowed within @HPD_STORM_DETECT_PERIOD is + * stored in @dev_priv->hotplug.hpd_storm_threshold which defaults to + * @HPD_STORM_DEFAULT_THRESHOLD. If this threshold is exceeded, it's + * considered an irq storm and the irq state is set to @HPD_MARK_DISABLED. + * + * The HPD threshold can be controlled through i915_hpd_storm_ctl in debugfs, + * and should only be adjusted for automated hotplug testing. * * Return true if an irq storm was detected on @pin. */ @@ -123,13 +126,15 @@ static bool intel_hpd_irq_storm_detect(struct drm_i915_private *dev_priv, { unsigned long start = dev_priv->hotplug.stats[pin].last_jiffies; unsigned long end = start + msecs_to_jiffies(HPD_STORM_DETECT_PERIOD); + const int threshold = dev_priv->hotplug.hpd_storm_threshold; bool storm = false; if (!time_in_range(jiffies, start, end)) { dev_priv->hotplug.stats[pin].last_jiffies = jiffies; dev_priv->hotplug.stats[pin].count = 0; DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: 0\n", pin); - } else if (dev_priv->hotplug.stats[pin].count > HPD_STORM_THRESHOLD) { + } else if (dev_priv->hotplug.stats[pin].count > threshold && + threshold) { dev_priv->hotplug.stats[pin].state = HPD_MARK_DISABLED; DRM_DEBUG_KMS("HPD interrupt storm detected on PIN %d\n", pin); storm = true; @@ -145,16 +150,17 @@ static bool intel_hpd_irq_storm_detect(struct drm_i915_private *dev_priv, static void intel_hpd_irq_storm_disable(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - struct drm_mode_config *mode_config = &dev->mode_config; struct intel_connector *intel_connector; struct intel_encoder *intel_encoder; struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; enum hpd_pin pin; bool hpd_disabled = false; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); - list_for_each_entry(connector, &mode_config->connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { if (connector->polled != DRM_CONNECTOR_POLL_HPD) continue; @@ -177,6 +183,7 @@ static void intel_hpd_irq_storm_disable(struct drm_i915_private *dev_priv) | DRM_CONNECTOR_POLL_DISCONNECT; hpd_disabled = true; } + drm_connector_list_iter_end(&conn_iter); /* Enable polling and queue hotplug re-enabling. */ if (hpd_disabled) { @@ -192,7 +199,6 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) container_of(work, typeof(*dev_priv), hotplug.reenable_work.work); struct drm_device *dev = &dev_priv->drm; - struct drm_mode_config *mode_config = &dev->mode_config; int i; intel_runtime_pm_get(dev_priv); @@ -200,13 +206,15 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) spin_lock_irq(&dev_priv->irq_lock); for_each_hpd_pin(i) { struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; if (dev_priv->hotplug.stats[i].state != HPD_DISABLED) continue; dev_priv->hotplug.stats[i].state = HPD_ENABLED; - list_for_each_entry(connector, &mode_config->connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { struct intel_connector *intel_connector = to_intel_connector(connector); if (intel_connector->encoder->hpd_pin == i) { @@ -218,6 +226,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) connector->polled = DRM_CONNECTOR_POLL_HPD; } } + drm_connector_list_iter_end(&conn_iter); } if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) dev_priv->display.hpd_irq_setup(dev_priv); @@ -234,7 +243,8 @@ static bool intel_hpd_irq_event(struct drm_device *dev, WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); old_status = connector->status; - connector->status = connector->funcs->detect(connector, false); + connector->status = drm_helper_probe_detect(connector, NULL, false); + if (old_status == connector->status) return false; @@ -303,14 +313,14 @@ static void i915_hotplug_work_func(struct work_struct *work) struct drm_i915_private *dev_priv = container_of(work, struct drm_i915_private, hotplug.hotplug_work); struct drm_device *dev = &dev_priv->drm; - struct drm_mode_config *mode_config = &dev->mode_config; struct intel_connector *intel_connector; struct intel_encoder *intel_encoder; struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; bool changed = false; u32 hpd_event_bits; - mutex_lock(&mode_config->mutex); + mutex_lock(&dev->mode_config.mutex); DRM_DEBUG_KMS("running encoder hotplug functions\n"); spin_lock_irq(&dev_priv->irq_lock); @@ -323,7 +333,8 @@ static void i915_hotplug_work_func(struct work_struct *work) spin_unlock_irq(&dev_priv->irq_lock); - list_for_each_entry(connector, &mode_config->connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { intel_connector = to_intel_connector(connector); if (!intel_connector->encoder) continue; @@ -337,7 +348,8 @@ static void i915_hotplug_work_func(struct work_struct *work) changed = true; } } - mutex_unlock(&mode_config->mutex); + drm_connector_list_iter_end(&conn_iter); + mutex_unlock(&dev->mode_config.mutex); if (changed) drm_kms_helper_hotplug_event(dev); @@ -485,15 +497,16 @@ static void i915_hpd_poll_init_work(struct work_struct *work) container_of(work, struct drm_i915_private, hotplug.poll_init_work); struct drm_device *dev = &dev_priv->drm; - struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; bool enabled; mutex_lock(&dev->mode_config.mutex); enabled = READ_ONCE(dev_priv->hotplug.poll_enabled); - list_for_each_entry(connector, &mode_config->connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { struct intel_connector *intel_connector = to_intel_connector(connector); connector->polled = intel_connector->polled; @@ -511,6 +524,7 @@ static void i915_hpd_poll_init_work(struct work_struct *work) DRM_CONNECTOR_POLL_HPD; } } + drm_connector_list_iter_end(&conn_iter); if (enabled) drm_kms_helper_poll_enable(dev); diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index c144609425f6..9ee819666a4c 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -141,58 +141,43 @@ static int huc_ucode_xfer(struct drm_i915_private *dev_priv) } /** - * intel_huc_init() - initiate HuC firmware loading request - * @dev_priv: the drm_i915_private device - * - * Called early during driver load, but after GEM is initialised. The loading - * will continue only when driver explicitly specify firmware name and version. - * All other cases are considered as INTEL_UC_FIRMWARE_NONE either because HW - * is not capable or driver yet support it. And there will be no error message - * for INTEL_UC_FIRMWARE_NONE cases. - * - * The DMA-copying to HW is done later when intel_huc_load() is called. + * intel_huc_select_fw() - selects HuC firmware for loading + * @huc: intel_huc struct */ -void intel_huc_init(struct drm_i915_private *dev_priv) +void intel_huc_select_fw(struct intel_huc *huc) { - struct intel_huc *huc = &dev_priv->huc; - struct intel_uc_fw *huc_fw = &huc->fw; - const char *fw_path = NULL; - - huc_fw->path = NULL; - huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; - huc_fw->load_status = INTEL_UC_FIRMWARE_NONE; - huc_fw->fw = INTEL_UC_FW_TYPE_HUC; - - if (!HAS_HUC_UCODE(dev_priv)) - return; - - if (IS_SKYLAKE(dev_priv)) { - fw_path = I915_SKL_HUC_UCODE; - huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR; + struct drm_i915_private *dev_priv = huc_to_i915(huc); + + huc->fw.path = NULL; + huc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE; + huc->fw.load_status = INTEL_UC_FIRMWARE_NONE; + huc->fw.type = INTEL_UC_FW_TYPE_HUC; + + if (i915.huc_firmware_path) { + huc->fw.path = i915.huc_firmware_path; + huc->fw.major_ver_wanted = 0; + huc->fw.minor_ver_wanted = 0; + } else if (IS_SKYLAKE(dev_priv)) { + huc->fw.path = I915_SKL_HUC_UCODE; + huc->fw.major_ver_wanted = SKL_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = SKL_HUC_FW_MINOR; } else if (IS_BROXTON(dev_priv)) { - fw_path = I915_BXT_HUC_UCODE; - huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR; + huc->fw.path = I915_BXT_HUC_UCODE; + huc->fw.major_ver_wanted = BXT_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = BXT_HUC_FW_MINOR; } else if (IS_KABYLAKE(dev_priv)) { - fw_path = I915_KBL_HUC_UCODE; - huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR; + huc->fw.path = I915_KBL_HUC_UCODE; + huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR; + } else { + DRM_ERROR("No HuC firmware known for platform with HuC!\n"); + return; } - - huc_fw->path = fw_path; - huc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; - - DRM_DEBUG_DRIVER("HuC firmware pending, path %s\n", fw_path); - - WARN(huc_fw->path == NULL, "HuC present but no fw path\n"); - - intel_uc_fw_fetch(dev_priv, huc_fw); } /** - * intel_huc_load() - load HuC uCode to device - * @dev_priv: the drm_i915_private device + * intel_huc_init_hw() - load HuC uCode to device + * @huc: intel_huc structure * * Called from guc_setup() during driver loading and also after a GPU reset. * Be note that HuC loading must be done before GuC loading. @@ -203,26 +188,26 @@ void intel_huc_init(struct drm_i915_private *dev_priv) * * Return: non-zero code on error */ -int intel_huc_load(struct drm_i915_private *dev_priv) +int intel_huc_init_hw(struct intel_huc *huc) { - struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + struct drm_i915_private *dev_priv = huc_to_i915(huc); int err; - if (huc_fw->fetch_status == INTEL_UC_FIRMWARE_NONE) + if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_NONE) return 0; DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", - huc_fw->path, - intel_uc_fw_status_repr(huc_fw->fetch_status), - intel_uc_fw_status_repr(huc_fw->load_status)); + huc->fw.path, + intel_uc_fw_status_repr(huc->fw.fetch_status), + intel_uc_fw_status_repr(huc->fw.load_status)); - if (huc_fw->fetch_status == INTEL_UC_FIRMWARE_SUCCESS && - huc_fw->load_status == INTEL_UC_FIRMWARE_FAIL) + if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_SUCCESS && + huc->fw.load_status == INTEL_UC_FIRMWARE_FAIL) return -ENOEXEC; - huc_fw->load_status = INTEL_UC_FIRMWARE_PENDING; + huc->fw.load_status = INTEL_UC_FIRMWARE_PENDING; - switch (huc_fw->fetch_status) { + switch (huc->fw.fetch_status) { case INTEL_UC_FIRMWARE_FAIL: /* something went wrong :( */ err = -EIO; @@ -233,9 +218,9 @@ int intel_huc_load(struct drm_i915_private *dev_priv) default: /* "can't happen" */ WARN_ONCE(1, "HuC fw %s invalid fetch_status %s [%d]\n", - huc_fw->path, - intel_uc_fw_status_repr(huc_fw->fetch_status), - huc_fw->fetch_status); + huc->fw.path, + intel_uc_fw_status_repr(huc->fw.fetch_status), + huc->fw.fetch_status); err = -ENXIO; goto fail; @@ -247,18 +232,18 @@ int intel_huc_load(struct drm_i915_private *dev_priv) if (err) goto fail; - huc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; + huc->fw.load_status = INTEL_UC_FIRMWARE_SUCCESS; DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", - huc_fw->path, - intel_uc_fw_status_repr(huc_fw->fetch_status), - intel_uc_fw_status_repr(huc_fw->load_status)); + huc->fw.path, + intel_uc_fw_status_repr(huc->fw.fetch_status), + intel_uc_fw_status_repr(huc->fw.load_status)); return 0; fail: - if (huc_fw->load_status == INTEL_UC_FIRMWARE_PENDING) - huc_fw->load_status = INTEL_UC_FIRMWARE_FAIL; + if (huc->fw.load_status == INTEL_UC_FIRMWARE_PENDING) + huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL; DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err); @@ -266,25 +251,6 @@ fail: } /** - * intel_huc_fini() - clean up resources allocated for HuC - * @dev_priv: the drm_i915_private device - * - * Cleans up by releasing the huc firmware GEM obj. - */ -void intel_huc_fini(struct drm_i915_private *dev_priv) -{ - struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; - - mutex_lock(&dev_priv->drm.struct_mutex); - if (huc_fw->obj) - i915_gem_object_put(huc_fw->obj); - huc_fw->obj = NULL; - mutex_unlock(&dev_priv->drm.struct_mutex); - - huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; -} - -/** * intel_guc_auth_huc() - authenticate ucode * @dev_priv: the drm_i915_device * diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index bce1ba80f277..b6401e8f1bd6 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -74,7 +74,7 @@ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv, { if (IS_GEN9_LP(dev_priv)) return &gmbus_pins_bxt[pin]; - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) return &gmbus_pins_skl[pin]; else if (IS_BROADWELL(dev_priv)) return &gmbus_pins_bdw[pin]; @@ -89,7 +89,7 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, if (IS_GEN9_LP(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bxt); - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) size = ARRAY_SIZE(gmbus_pins_skl); else if (IS_BROADWELL(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bdw); diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 7a5b41b1c024..25d8e76489e4 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -131,8 +131,15 @@ err: static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv) { + /* XXX Note that platform_device_register_full() allocates a dma_mask + * and never frees it. We can't free it here as we cannot guarantee + * this is the last reference (i.e. that the dma_mask will not be + * used after our unregister). So ee choose to leak the sizeof(u64) + * allocation here - it should be fixed in the platform_device rather + * than us fiddle with its internals. + */ + platform_device_unregister(dev_priv->lpe_audio.platdev); - kfree(dev_priv->lpe_audio.platdev->dev.dma_mask); } static void lpe_audio_irq_unmask(struct irq_data *d) @@ -331,6 +338,7 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) * audio driver and i915 * @dev_priv: the i915 drm device private data * @eld : ELD data + * @pipe: pipe id * @port: port id * @tmds_clk_speed: tmds clock frequency in Hz * diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 47517a02f0a4..c8f7c631fc1f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -190,13 +190,7 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 -#define GEN8_CTX_VALID (1<<0) -#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) -#define GEN8_CTX_FORCE_RESTORE (1<<2) -#define GEN8_CTX_L3LLC_COHERENT (1<<5) -#define GEN8_CTX_PRIVILEGE (1<<8) - -#define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \ +#define CTX_REG(reg_state, pos, reg, val) do { \ (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \ (reg_state)[(pos)+1] = (val); \ } while (0) @@ -212,14 +206,6 @@ reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \ } while (0) -enum { - FAULT_AND_HANG = 0, - FAULT_AND_HALT, /* Debug only */ - FAULT_AND_STREAM, - FAULT_AND_CONTINUE /* Unsupported */ -}; -#define GEN8_CTX_ID_SHIFT 32 -#define GEN8_CTX_ID_WIDTH 21 #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 @@ -267,30 +253,6 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enabl return 0; } -static void -logical_ring_init_platform_invariants(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - engine->disable_lite_restore_wa = - IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) && - (engine->id == VCS || engine->id == VCS2); - - engine->ctx_desc_template = GEN8_CTX_VALID; - if (IS_GEN8(dev_priv)) - engine->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT; - engine->ctx_desc_template |= GEN8_CTX_PRIVILEGE; - - /* TODO: WaDisableLiteRestore when we start using semaphore - * signalling between Command Streamers */ - /* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */ - - /* WaEnableForceRestoreInCtxtDescForVCS:skl */ - /* WaEnableForceRestoreInCtxtDescForVCS:bxt */ - if (engine->disable_lite_restore_wa) - engine->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; -} - /** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context @@ -304,7 +266,7 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) * * This is what a descriptor looks like, from LSB to MSB:: * - * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) + * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) * bits 12-31: LRCA, GTT address of (the HWSP of) this context * bits 32-52: ctx ID, a globally unique tag * bits 53-54: mbz, reserved for use by hardware @@ -319,8 +281,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH)); - desc = ctx->desc_template; /* bits 3-4 */ - desc |= engine->ctx_desc_template; /* bits 0-11 */ + desc = ctx->desc_template; /* bits 0-11 */ desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE; /* bits 12-31 */ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ @@ -365,6 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; + assert_ring_tail_valid(rq->ring, rq->tail); reg_state[CTX_RING_TAIL+1] = rq->tail; /* True 32b PPGTT with dynamic page allocation: update PDP @@ -372,7 +334,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) * PML4 is allocated during ppgtt init, so this is not needed * in 48-bit mode. */ - if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + if (ppgtt && !i915_vm_is_48bit(&ppgtt->base)) execlists_update_context_pdps(ppgtt, reg_state); return ce->lrc_desc; @@ -386,17 +348,20 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); u64 desc[2]; + GEM_BUG_ON(port[0].count > 1); if (!port[0].count) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_IN); desc[0] = execlists_update_context(port[0].request); - engine->preempt_wa = port[0].count++; /* bdw only? fixed on skl? */ + GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0])); + port[0].count++; if (port[1].request) { GEM_BUG_ON(port[1].count); execlists_context_status_change(port[1].request, INTEL_CONTEXT_SCHEDULE_IN); desc[1] = execlists_update_context(port[1].request); + GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1])); port[1].count = 1; } else { desc[1] = 0; @@ -434,7 +399,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) { struct drm_i915_gem_request *last; struct execlist_port *port = engine->execlist_port; - unsigned long flags; struct rb_node *rb; bool submit = false; @@ -471,7 +435,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irq(&engine->timeline->lock); rb = engine->execlist_first; while (rb) { struct drm_i915_gem_request *cursor = @@ -515,6 +479,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) cursor->priotree.priority = INT_MAX; __i915_gem_request_submit(cursor); + trace_i915_gem_request_in(cursor, port - engine->execlist_port); last = cursor; submit = true; } @@ -522,7 +487,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) i915_gem_request_assign(&port->request, last); engine->execlist_first = rb; } - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irq(&engine->timeline->lock); if (submit) execlists_submit_ports(engine); @@ -533,37 +498,11 @@ static bool execlists_elsp_idle(struct intel_engine_cs *engine) return !engine->execlist_port[0].request; } -/** - * intel_execlists_idle() - Determine if all engine submission ports are idle - * @dev_priv: i915 device private - * - * Return true if there are no requests pending on any of the submission ports - * of any engines. - */ -bool intel_execlists_idle(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (!i915.enable_execlists) - return true; - - for_each_engine(engine, dev_priv, id) - if (!execlists_elsp_idle(engine)) - return false; - - return true; -} - -static bool execlists_elsp_ready(struct intel_engine_cs *engine) +static bool execlists_elsp_ready(const struct intel_engine_cs *engine) { - int port; - - port = 1; /* wait for a free slot */ - if (engine->disable_lite_restore_wa || engine->preempt_wa) - port = 0; /* wait for GPU to be idle before continuing */ + const struct execlist_port *port = engine->execlist_port; - return !engine->execlist_port[port].request; + return port[0].count + port[1].count < 2; } /* @@ -578,44 +517,80 @@ static void intel_lrc_irq_handler(unsigned long data) intel_uncore_forcewake_get(dev_priv, engine->fw_domains); - if (!execlists_elsp_idle(engine)) { + /* Prefer doing test_and_clear_bit() as a two stage operation to avoid + * imposing the cost of a locked atomic transaction when submitting a + * new request (outside of the context-switch interrupt). + */ + while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { u32 __iomem *csb_mmio = dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); u32 __iomem *buf = dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)); - unsigned int csb, head, tail; - - csb = readl(csb_mmio); - head = GEN8_CSB_READ_PTR(csb); - tail = GEN8_CSB_WRITE_PTR(csb); - if (tail < head) - tail += GEN8_CSB_ENTRIES; - while (head < tail) { - unsigned int idx = ++head % GEN8_CSB_ENTRIES; - unsigned int status = readl(buf + 2 * idx); + unsigned int head, tail; + + /* The write will be ordered by the uncached read (itself + * a memory barrier), so we do not need another in the form + * of a locked instruction. The race between the interrupt + * handler and the split test/clear is harmless as we order + * our clear before the CSB read. If the interrupt arrived + * first between the test and the clear, we read the updated + * CSB and clear the bit. If the interrupt arrives as we read + * the CSB or later (i.e. after we had cleared the bit) the bit + * is set and we do a new loop. + */ + __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + head = readl(csb_mmio); + tail = GEN8_CSB_WRITE_PTR(head); + head = GEN8_CSB_READ_PTR(head); + while (head != tail) { + unsigned int status; + + if (++head == GEN8_CSB_ENTRIES) + head = 0; + + /* We are flying near dragons again. + * + * We hold a reference to the request in execlist_port[] + * but no more than that. We are operating in softirq + * context and so cannot hold any mutex or sleep. That + * prevents us stopping the requests we are processing + * in port[] from being retired simultaneously (the + * breadcrumb will be complete before we see the + * context-switch). As we only hold the reference to the + * request, any pointer chasing underneath the request + * is subject to a potential use-after-free. Thus we + * store all of the bookkeeping within port[] as + * required, and avoid using unguarded pointers beneath + * request itself. The same applies to the atomic + * status notifier. + */ + status = readl(buf + 2 * head); if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + /* Check the context/desc id for this event matches */ + GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) != + port[0].context_id); + GEM_BUG_ON(port[0].count == 0); if (--port[0].count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); + GEM_BUG_ON(!i915_gem_request_completed(port[0].request)); execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_OUT); + trace_i915_gem_request_out(port[0].request); i915_gem_request_put(port[0].request); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); - - engine->preempt_wa = false; } GEM_BUG_ON(port[0].count == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); } - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, - GEN8_CSB_WRITE_PTR(csb) << 8), + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), csb_mmio); } @@ -659,10 +634,11 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - if (insert_request(&request->priotree, &engine->execlist_queue)) + if (insert_request(&request->priotree, &engine->execlist_queue)) { engine->execlist_first = &request->priotree.node; - if (execlists_elsp_idle(engine)) - tasklet_hi_schedule(&engine->irq_tasklet); + if (execlists_elsp_ready(engine)) + tasklet_hi_schedule(&engine->irq_tasklet); + } spin_unlock_irqrestore(&engine->timeline->lock, flags); } @@ -772,6 +748,7 @@ static int execlists_context_pin(struct intel_engine_cs *engine, if (ce->pin_count++) return 0; + GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ if (!ce->state) { ret = execlists_context_deferred_alloc(ctx, engine); @@ -780,11 +757,9 @@ static int execlists_context_pin(struct intel_engine_cs *engine, } GEM_BUG_ON(!ce->state); - flags = PIN_GLOBAL; + flags = PIN_GLOBAL | PIN_HIGH; if (ctx->ggtt_offset_bias) flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias; - if (i915_gem_context_is_kernel(ctx)) - flags |= PIN_HIGH; ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, flags); if (ret) @@ -843,6 +818,7 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; + u32 *cs; int ret; GEM_BUG_ON(!ce->pin_count); @@ -867,9 +843,11 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) goto err; } - ret = intel_ring_begin(request, 0); - if (ret) + cs = intel_ring_begin(request, 0); + if (IS_ERR(cs)) { + ret = PTR_ERR(cs); goto err_unreserve; + } if (!ce->initialised) { ret = engine->init_context(request); @@ -896,51 +874,6 @@ err: return ret; } -static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) -{ - int ret, i; - struct intel_ring *ring = req->ring; - struct i915_workarounds *w = &req->i915->workarounds; - - if (w->count == 0) - return 0; - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - ret = intel_ring_begin(req, w->count * 2 + 2); - if (ret) - return ret; - - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); - for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(ring, w->reg[i].addr); - intel_ring_emit(ring, w->reg[i].value); - } - intel_ring_emit(ring, MI_NOOP); - - intel_ring_advance(ring); - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - return 0; -} - -#define wa_ctx_emit(batch, index, cmd) \ - do { \ - int __index = (index)++; \ - if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \ - return -ENOSPC; \ - } \ - batch[__index] = (cmd); \ - } while (0) - -#define wa_ctx_emit_reg(batch, index, reg) \ - wa_ctx_emit((batch), (index), i915_mmio_reg_offset(reg)) - /* * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after * PIPE_CONTROL instruction. This is required for the flush to happen correctly @@ -957,56 +890,29 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) * This WA is also required for Gen9 so extracting as a function avoids * code duplication. */ -static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, - uint32_t *batch, - uint32_t index) +static u32 * +gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) { - uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES); - - wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); - wa_ctx_emit(batch, index, 0); - - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, l3sqc4_flush); - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE)); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - - wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); - wa_ctx_emit(batch, index, 0); - - return index; -} + *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = 0; -static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx, - uint32_t offset, - uint32_t start_alignment) -{ - return wa_ctx->offset = ALIGN(offset, start_alignment); -} + *batch++ = MI_LOAD_REGISTER_IMM(1); + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; -static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, - uint32_t offset, - uint32_t size_alignment) -{ - wa_ctx->size = offset - wa_ctx->offset; + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); - WARN(wa_ctx->size % size_alignment, - "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n", - wa_ctx->size, size_alignment); - return 0; + *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = 0; + + return batch; } /* @@ -1024,42 +930,28 @@ static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together * makes a complete batch buffer. */ -static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t scratch_addr; - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaDisableCtxRestoreArbitration:bdw,chv */ - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE); + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ - if (IS_BROADWELL(engine->i915)) { - int rc = gen8_emit_flush_coherentl3_wa(engine, batch, index); - if (rc < 0) - return rc; - index = rc; - } + if (IS_BROADWELL(engine->i915)) + batch = gen8_emit_flush_coherentl3_wa(engine, batch); /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* Actual scratch location is at 128 bytes offset */ - scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - wa_ctx_emit(batch, index, scratch_addr); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES); /* Pad to end of cacheline */ - while (index % CACHELINE_DWORDS) - wa_ctx_emit(batch, index, MI_NOOP); + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; /* * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because @@ -1067,7 +959,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, * in the register CTX_RCS_INDIRECT_CTX */ - return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); + return batch; } /* @@ -1079,65 +971,40 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. */ -static int gen8_init_perctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaDisableCtxRestoreArbitration:bdw,chv */ - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE); - - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *batch++ = MI_BATCH_BUFFER_END; - return wa_ctx_end(wa_ctx, *offset = index, 1); + return batch; } -static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { - int ret; - struct drm_i915_private *dev_priv = engine->i915; - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ + batch = gen8_emit_flush_coherentl3_wa(engine, batch); - /* WaDisableCtxRestoreArbitration:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE); - - /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */ - ret = gen8_emit_flush_coherentl3_wa(engine, batch, index); - if (ret < 0) - return ret; - index = ret; - - /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl */ - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE( - GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE)); - wa_ctx_emit(batch, index, MI_NOOP); + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ + *batch++ = MI_LOAD_REGISTER_IMM(1); + *batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2); + *batch++ = _MASKED_BIT_DISABLE( + GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE); + *batch++ = MI_NOOP; /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { - u32 scratch_addr = - i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - wa_ctx_emit(batch, index, scratch_addr); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) { + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES); } - /* WaMediaPoolStateCmdInWABB:bxt */ + /* WaMediaPoolStateCmdInWABB:bxt,glk */ if (HAS_POOLED_EU(engine->i915)) { /* * EU pool configuration is setup along with golden context @@ -1152,73 +1019,37 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, * possible configurations, to avoid duplication they are * not shown here again. */ - u32 eu_pool_config = 0x00777000; - wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_STATE); - wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_ENABLE); - wa_ctx_emit(batch, index, eu_pool_config); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + *batch++ = GEN9_MEDIA_POOL_STATE; + *batch++ = GEN9_MEDIA_POOL_ENABLE; + *batch++ = 0x00777000; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; } /* Pad to end of cacheline */ - while (index % CACHELINE_DWORDS) - wa_ctx_emit(batch, index, MI_NOOP); + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; - return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); + return batch; } -static int gen9_init_perctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) { - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0); - wa_ctx_emit(batch, index, - _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING)); - wa_ctx_emit(batch, index, MI_NOOP); - } + *batch++ = MI_BATCH_BUFFER_END; - /* WaClearTdlStateAckDirtyBits:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_B0)) { - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(4)); - - wa_ctx_emit_reg(batch, index, GEN8_STATE_ACK); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE1); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE2); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN7_ROW_CHICKEN2); - /* dummy write to CS, mask bits are 0 to ensure the register is not modified */ - wa_ctx_emit(batch, index, 0x0); - wa_ctx_emit(batch, index, MI_NOOP); - } - - /* WaDisableCtxRestoreArbitration:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE); - - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); - - return wa_ctx_end(wa_ctx, *offset = index, 1); + return batch; } -static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) +#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) + +static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) { struct drm_i915_gem_object *obj; struct i915_vma *vma; int err; - obj = i915_gem_object_create(engine->i915, PAGE_ALIGN(size)); + obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1240,82 +1071,79 @@ err: return err; } -static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine) +static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) { i915_vma_unpin_and_release(&engine->wa_ctx.vma); } +typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); + static int intel_init_workaround_bb(struct intel_engine_cs *engine) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - uint32_t *batch; - uint32_t offset; + struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, + &wa_ctx->per_ctx }; + wa_bb_func_t wa_bb_fn[2]; struct page *page; + void *batch, *batch_ptr; + unsigned int i; int ret; - WARN_ON(engine->id != RCS); + if (WARN_ON(engine->id != RCS || !engine->scratch)) + return -EINVAL; - /* update this when WA for higher Gen are added */ - if (INTEL_GEN(engine->i915) > 9) { - DRM_ERROR("WA batch buffer is not initialized for Gen%d\n", - INTEL_GEN(engine->i915)); + switch (INTEL_GEN(engine->i915)) { + case 9: + wa_bb_fn[0] = gen9_init_indirectctx_bb; + wa_bb_fn[1] = gen9_init_perctx_bb; + break; + case 8: + wa_bb_fn[0] = gen8_init_indirectctx_bb; + wa_bb_fn[1] = gen8_init_perctx_bb; + break; + default: + MISSING_CASE(INTEL_GEN(engine->i915)); return 0; } - /* some WA perform writes to scratch page, ensure it is valid */ - if (!engine->scratch) { - DRM_ERROR("scratch page not allocated for %s\n", engine->name); - return -EINVAL; - } - - ret = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE); + ret = lrc_setup_wa_ctx(engine); if (ret) { DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret); return ret; } page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); - batch = kmap_atomic(page); - offset = 0; - - if (IS_GEN8(engine->i915)) { - ret = gen8_init_indirectctx_bb(engine, - &wa_ctx->indirect_ctx, - batch, - &offset); - if (ret) - goto out; + batch = batch_ptr = kmap_atomic(page); - ret = gen8_init_perctx_bb(engine, - &wa_ctx->per_ctx, - batch, - &offset); - if (ret) - goto out; - } else if (IS_GEN9(engine->i915)) { - ret = gen9_init_indirectctx_bb(engine, - &wa_ctx->indirect_ctx, - batch, - &offset); - if (ret) - goto out; - - ret = gen9_init_perctx_bb(engine, - &wa_ctx->per_ctx, - batch, - &offset); - if (ret) - goto out; + /* + * Emit the two workaround batch buffers, recording the offset from the + * start of the workaround batch buffer object for each and their + * respective sizes. + */ + for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { + wa_bb[i]->offset = batch_ptr - batch; + if (WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, CACHELINE_BYTES))) { + ret = -EINVAL; + break; + } + batch_ptr = wa_bb_fn[i](engine, batch_ptr); + wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); } -out: + BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); + kunmap_atomic(batch); if (ret) - lrc_destroy_wa_ctx_obj(engine); + lrc_destroy_wa_ctx(engine); return ret; } +static u32 port_seqno(struct execlist_port *port) +{ + return port->request ? port->request->global_seqno : 0; +} + static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1330,7 +1158,6 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); I915_WRITE(RING_HWS_PGA(engine->mmio_base), engine->status_page.ggtt_offset); @@ -1339,7 +1166,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name); /* After a GPU reset, we may have requests to replay */ - if (!execlists_elsp_idle(engine)) { + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) { + DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n", + engine->name, + port_seqno(&engine->execlist_port[0]), + port_seqno(&engine->execlist_port[1])); engine->execlist_port[0].count = 0; engine->execlist_port[1].count = 0; execlists_submit_ports(engine); @@ -1384,7 +1216,6 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - struct drm_i915_private *dev_priv = engine->i915; struct execlist_port *port = engine->execlist_port; struct intel_context *ce; @@ -1418,14 +1249,9 @@ static void reset_common_ring(struct intel_engine_cs *engine, ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix; request->ring->head = request->postfix; - request->ring->last_retired_head = -1; intel_ring_update_space(request->ring); - if (i915.enable_guc_submission) - return; - /* Catch up with any missed context-switch interrupts */ - I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0)); if (request->ctx != port[0].request->ctx) { i915_gem_request_put(port[0].request); port[0] = port[1]; @@ -1438,42 +1264,42 @@ static void reset_common_ring(struct intel_engine_cs *engine, request->tail = intel_ring_wrap(request->ring, request->wa_tail - WA_TAIL_DWORDS*sizeof(u32)); + assert_ring_tail_valid(request->ring, request->tail); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; - int i, ret; + const int num_lri_cmds = GEN8_3LVL_PDPES * 2; + u32 *cs; + int i; - ret = intel_ring_begin(req, num_lri_cmds * 2 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, num_lri_cmds * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_lri_cmds)); - for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { + *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds); + for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, i)); - intel_ring_emit(ring, upper_32_bits(pd_daddr)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, i)); - intel_ring_emit(ring, lower_32_bits(pd_daddr)); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i)); + *cs++ = lower_32_bits(pd_daddr); } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } static int gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, - unsigned int dispatch_flags) + const unsigned int flags) { - struct intel_ring *ring = req->ring; - bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); + u32 *cs; int ret; /* Don't rely in hw updating PDPs, specially in lite-restore. @@ -1483,30 +1309,28 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * not idle). PML4 is allocated during ppgtt init so this is * not needed in 48-bit.*/ if (req->ctx->ppgtt && - (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) { - if (!USES_FULL_48BIT_PPGTT(req->i915) && - !intel_vgpu_active(req->i915)) { - ret = intel_logical_ring_emit_pdps(req); - if (ret) - return ret; - } + (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings) && + !i915_vm_is_48bit(&req->ctx->ppgtt->base) && + !intel_vgpu_active(req->i915)) { + ret = intel_logical_ring_emit_pdps(req); + if (ret) + return ret; req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | - (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) | + (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1527,13 +1351,11 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ring *ring = request->ring; - u32 cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(request, 4); - if (ret) - return ret; + cs = intel_ring_begin(request, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW + 1; @@ -1550,13 +1372,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) cmd |= MI_INVALIDATE_BSD; } - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, - I915_GEM_HWS_SCRATCH_ADDR | - MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ + intel_ring_advance(request, cs); return 0; } @@ -1564,13 +1384,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) static int gen8_emit_flush_render(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; u32 scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; - u32 flags = 0; - int ret; + u32 *cs, flags = 0; int len; flags |= PIPE_CONTROL_CS_STALL; @@ -1612,62 +1430,25 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, if (dc_flush_wa) len += 12; - ret = intel_ring_begin(request, len); - if (ret) - return ret; + cs = intel_ring_begin(request, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); - if (vf_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - } + if (vf_flush_wa) + cs = gen8_emit_pipe_control(cs, 0, 0); - if (dc_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, PIPE_CONTROL_DC_FLUSH_ENABLE); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - } + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - - if (dc_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, PIPE_CONTROL_CS_STALL); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - } + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); - intel_ring_advance(ring); + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); - return 0; -} + intel_ring_advance(request, cs); -static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) -{ - /* - * On BXT A steppings there is a HW coherency issue whereby the - * MI_STORE_DATA_IMM storing the completed request's seqno - * occasionally doesn't invalidate the CPU cache. Work around this by - * clflushing the corresponding cacheline whenever the caller wants - * the coherency to be guaranteed. Note that this cacheline is known - * to be clean at this point, since we only write it in - * bxt_a_set_seqno(), where we also do a clflush after the write. So - * this clflush in practice becomes an invalidate operation. - */ - intel_flush_status_page(engine, I915_GEM_HWS_INDEX); + return 0; } /* @@ -1675,34 +1456,34 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *out) +static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) { - *out++ = MI_NOOP; - *out++ = MI_NOOP; - request->wa_tail = intel_ring_offset(request->ring, out); + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + request->wa_tail = intel_ring_offset(request, cs); } -static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, - u32 *out) +static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *out++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; - *out++ = 0; - *out++ = request->global_seqno; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; - request->tail = intel_ring_offset(request->ring, out); + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = request->global_seqno; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + request->tail = intel_ring_offset(request, cs); + assert_ring_tail_valid(request->ring, request->tail); - gen8_emit_wa_tail(request, out); + gen8_emit_wa_tail(request, cs); } static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, - u32 *out) + u32 *cs) { /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); @@ -1711,20 +1492,20 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE); - *out++ = intel_hws_seqno_address(request->engine); - *out++ = 0; - *out++ = request->global_seqno; + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = intel_hws_seqno_address(request->engine); + *cs++ = 0; + *cs++ = request->global_seqno; /* We're thrashing one dword of HWS. */ - *out++ = 0; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; - request->tail = intel_ring_offset(request->ring, out); + *cs++ = 0; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + request->tail = intel_ring_offset(request, cs); + assert_ring_tail_valid(request->ring, request->tail); - gen8_emit_wa_tail(request, out); + gen8_emit_wa_tail(request, cs); } static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; @@ -1733,7 +1514,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) { int ret; - ret = intel_logical_ring_workarounds_emit(req); + ret = intel_ring_workarounds_emit(req); if (ret) return ret; @@ -1779,21 +1560,17 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); - lrc_destroy_wa_ctx_obj(engine); + lrc_destroy_wa_ctx(engine); engine->i915 = NULL; dev_priv->engine[engine->id] = NULL; kfree(engine); } -void intel_execlists_enable_submission(struct drm_i915_private *dev_priv) +static void execlists_set_default_submission(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, dev_priv, id) { - engine->submit_request = execlists_submit_request; - engine->schedule = execlists_schedule; - } + engine->submit_request = execlists_submit_request; + engine->schedule = execlists_schedule; + engine->irq_tasklet.func = intel_lrc_irq_handler; } static void @@ -1811,14 +1588,12 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush; engine->emit_breadcrumb = gen8_emit_breadcrumb; engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; - engine->submit_request = execlists_submit_request; - engine->schedule = execlists_schedule; + + engine->set_default_submission = execlists_set_default_submission; engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; engine->emit_bb_start = gen8_emit_bb_start; - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) - engine->irq_seqno_barrier = bxt_a_seqno_barrier; } static inline void @@ -1875,7 +1650,6 @@ logical_ring_setup(struct intel_engine_cs *engine) tasklet_init(&engine->irq_tasklet, intel_lrc_irq_handler, (unsigned long)engine); - logical_ring_init_platform_invariants(engine); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); } @@ -2013,105 +1787,89 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } -static void execlists_init_reg_state(u32 *reg_state, +static void execlists_init_reg_state(u32 *regs, struct i915_gem_context *ctx, struct intel_engine_cs *engine, struct intel_ring *ring) { struct drm_i915_private *dev_priv = engine->i915; struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt; + u32 base = engine->mmio_base; + bool rcs = engine->id == RCS; + + /* A context is actually a big batch buffer with several + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The + * values we are setting here are only for the first context restore: + * on a subsequent save, the GPU will recreate this batchbuffer with new + * values (including all the missing MI_LOAD_REGISTER_IMM commands that + * we are not initializing here). + */ + regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | + MI_LRI_FORCE_POSTED; + + CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine), + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | + (HAS_RESOURCE_STREAMER(dev_priv) ? + CTX_CTRL_RS_CTX_ENABLE : 0))); + CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); + CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); + CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); + CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), + RING_CTL_SIZE(ring->size) | RING_VALID); + CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); + CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); + CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); + CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); + CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); + CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); + if (rcs) { + CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); + CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); + CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, + RING_INDIRECT_CTX_OFFSET(base), 0); - /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM - * commands followed by (reg, value) pairs. The values we are setting here are - * only for the first context restore: on a subsequent save, the GPU will - * recreate this batchbuffer with new values (including all the missing - * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */ - reg_state[CTX_LRI_HEADER_0] = - MI_LOAD_REGISTER_IMM(engine->id == RCS ? 14 : 11) | MI_LRI_FORCE_POSTED; - ASSIGN_CTX_REG(reg_state, CTX_CONTEXT_CONTROL, - RING_CONTEXT_CONTROL(engine), - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - (HAS_RESOURCE_STREAMER(dev_priv) ? - CTX_CTRL_RS_CTX_ENABLE : 0))); - ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base), - 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base), - 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, - RING_START(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, - RING_CTL(engine->mmio_base), - RING_CTL_SIZE(ring->size) | RING_VALID); - ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, - RING_BBADDR_UDW(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, - RING_BBADDR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_BB_STATE, - RING_BBSTATE(engine->mmio_base), - RING_BB_PPGTT); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_U, - RING_SBBADDR_UDW(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_L, - RING_SBBADDR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_STATE, - RING_SBBSTATE(engine->mmio_base), 0); - if (engine->id == RCS) { - ASSIGN_CTX_REG(reg_state, CTX_BB_PER_CTX_PTR, - RING_BB_PER_CTX_PTR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX, - RING_INDIRECT_CTX(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, - RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0); if (engine->wa_ctx.vma) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - reg_state[CTX_RCS_INDIRECT_CTX+1] = - (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) | - (wa_ctx->indirect_ctx.size / CACHELINE_DWORDS); + regs[CTX_RCS_INDIRECT_CTX + 1] = + (ggtt_offset + wa_ctx->indirect_ctx.offset) | + (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = + regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] = intel_lr_indirect_ctx_offset(engine) << 6; - reg_state[CTX_BB_PER_CTX_PTR+1] = - (ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) | - 0x01; + regs[CTX_BB_PER_CTX_PTR + 1] = + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } } - reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; - ASSIGN_CTX_REG(reg_state, CTX_CTX_TIMESTAMP, - RING_CTX_TIMESTAMP(engine->mmio_base), 0); + + regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; + + CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); /* PDP values well be assigned later if needed */ - ASSIGN_CTX_REG(reg_state, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), - 0); - - if (ppgtt && USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0); + CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0); + CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0); + CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0); + CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0); + CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0); + CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0); + CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); + + if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and * other PDP Descriptors are ignored. */ - ASSIGN_CTX_PML4(ppgtt, reg_state); + ASSIGN_CTX_PML4(ppgtt, regs); } - if (engine->id == RCS) { - reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - make_rpcs(dev_priv)); + if (rcs) { + regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); + CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, + make_rpcs(dev_priv)); } } @@ -2279,7 +2037,6 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) i915_gem_object_unpin_map(ce->state->obj); ce->ring->head = ce->ring->tail = 0; - ce->ring->last_retired_head = -1; intel_ring_update_space(ce->ring); } } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 0c852c024227..e8015e7bf4e9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -68,8 +68,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int logical_render_ring_init(struct intel_engine_cs *engine); int logical_xcs_ring_init(struct intel_engine_cs *engine); -int intel_engines_init(struct drm_i915_private *dev_priv); - /* Logical Ring Contexts */ /* One extra page is added before LRC for GuC as shared data */ @@ -89,7 +87,5 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, /* Execlists */ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists); -void intel_execlists_enable_submission(struct drm_i915_private *dev_priv); -bool intel_execlists_idle(struct drm_i915_private *dev_priv); #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index c300647ef604..71cbe9c08932 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -162,21 +162,8 @@ static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); unsigned long start = jiffies; - if (!lspcon->desc_valid) - return; - while (1) { - struct intel_dp_desc desc; - - /* - * The w/a only applies in PCON mode and we don't expect any - * AUX errors. - */ - if (!__intel_dp_read_desc(intel_dp, &desc)) - return; - - if (intel_digital_port_connected(dev_priv, dig_port) && - !memcmp(&intel_dp->desc, &desc, sizeof(desc))) { + if (intel_digital_port_connected(dev_priv, dig_port)) { DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n", jiffies_to_msecs(jiffies - start)); return; @@ -253,7 +240,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) return false; } - lspcon->desc_valid = intel_dp_read_desc(dp); + intel_dp_read_desc(dp); DRM_DEBUG_KMS("Success: LSPCON init\n"); return true; diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 9ca4dc4d2378..8b942ef2b3ec 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -91,12 +91,11 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -114,7 +113,7 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -1066,6 +1065,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); intel_encoder->type = INTEL_OUTPUT_LVDS; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->cloneable = 0; if (HAS_PCH_SPLIT(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index c787fc4e6eb9..92e461c68385 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -178,7 +178,7 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->table = skylake_mocs_table; result = true; @@ -191,7 +191,7 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, "Platform that should have a MOCS table does not.\n"); } - /* WaDisableSkipCaching:skl,bxt,kbl */ + /* WaDisableSkipCaching:skl,bxt,kbl,glk */ if (IS_GEN9(dev_priv)) { int i; @@ -276,23 +276,22 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) static int emit_mocs_control_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ring *ring = req->ring; enum intel_engine_id engine = req->engine->id; unsigned int index; - int ret; + u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - ret = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); - if (ret) - return ret; + cs = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES); for (index = 0; index < table->size; index++) { - intel_ring_emit_reg(ring, mocs_register(engine, index)); - intel_ring_emit(ring, table->table[index].control_value); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = table->table[index].control_value; } /* @@ -304,12 +303,12 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, * that value to all the used entries. */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { - intel_ring_emit_reg(ring, mocs_register(engine, index)); - intel_ring_emit(ring, table->table[0].control_value); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = table->table[0].control_value; } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -336,29 +335,27 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ring *ring = req->ring; unsigned int i; - int ret; + u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - ret = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); - if (ret) - return ret; + cs = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); for (i = 0; i < table->size/2; i++) { - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 2*i, 2*i+1)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 2 * i, 2 * i + 1); } if (table->size & 0x01) { /* Odd table size - 1 left over */ - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 2*i, 0)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 2 * i, 0); i++; } @@ -368,12 +365,12 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, * they are reserved by the hardware. */ for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 0, 0)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 0, 0); } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index 4a862a358c70..d44465190dc1 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -434,6 +434,7 @@ int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv, static u32 asle_set_backlight(struct drm_i915_private *dev_priv, u32 bclp) { struct intel_connector *connector; + struct drm_connector_list_iter conn_iter; struct opregion_asle *asle = dev_priv->opregion.asle; struct drm_device *dev = &dev_priv->drm; @@ -458,8 +459,10 @@ static u32 asle_set_backlight(struct drm_i915_private *dev_priv, u32 bclp) * only one). */ DRM_DEBUG_KMS("updating opregion backlight %d/255\n", bclp); - for_each_intel_connector(dev, connector) + drm_connector_list_iter_begin(dev, &conn_iter); + for_each_intel_connector_iter(connector, &conn_iter) intel_panel_set_backlight_acpi(connector, bclp, 255); + drm_connector_list_iter_end(&conn_iter); asle->cblv = DIV_ROUND_UP(bclp * 100, 255) | ASLE_CBLV_VALID; drm_modeset_unlock(&dev->mode_config.connection_mutex); @@ -701,6 +704,7 @@ static void intel_didl_outputs(struct drm_i915_private *dev_priv) { struct intel_opregion *opregion = &dev_priv->opregion; struct intel_connector *connector; + struct drm_connector_list_iter conn_iter; int i = 0, max_outputs; int display_index[16] = {}; @@ -714,7 +718,8 @@ static void intel_didl_outputs(struct drm_i915_private *dev_priv) max_outputs = ARRAY_SIZE(opregion->acpi->didl) + ARRAY_SIZE(opregion->acpi->did2); - for_each_intel_connector(&dev_priv->drm, connector) { + drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + for_each_intel_connector_iter(connector, &conn_iter) { u32 device_id, type; device_id = acpi_display_type(connector); @@ -729,6 +734,7 @@ static void intel_didl_outputs(struct drm_i915_private *dev_priv) set_did(opregion, i, device_id); i++; } + drm_connector_list_iter_end(&conn_iter); DRM_DEBUG_KMS("%d outputs detected\n", i); @@ -745,6 +751,7 @@ static void intel_setup_cadls(struct drm_i915_private *dev_priv) { struct intel_opregion *opregion = &dev_priv->opregion; struct intel_connector *connector; + struct drm_connector_list_iter conn_iter; int i = 0; /* @@ -757,11 +764,13 @@ static void intel_setup_cadls(struct drm_i915_private *dev_priv) * Note that internal panels should be at the front of the connector * list already, ensuring they're not left out. */ - for_each_intel_connector(&dev_priv->drm, connector) { + drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + for_each_intel_connector_iter(connector, &conn_iter) { if (i >= ARRAY_SIZE(opregion->acpi->cadl)) break; opregion->acpi->cadl[i++] = connector->acpi_device_id; } + drm_connector_list_iter_end(&conn_iter); /* If fewer than 8 active devices, the list must be null terminated */ if (i < ARRAY_SIZE(opregion->acpi->cadl)) @@ -911,6 +920,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) char buf[sizeof(OPREGION_SIGNATURE)]; int err = 0; void *base; + const void *vbt; + u32 vbt_size; BUILD_BUG_ON(sizeof(struct opregion_header) != 0x100); BUILD_BUG_ON(sizeof(struct opregion_acpi) != 0x100); @@ -963,45 +974,46 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) if (mboxes & MBOX_ASLE_EXT) DRM_DEBUG_DRIVER("ASLE extension supported\n"); - if (!dmi_check_system(intel_no_opregion_vbt)) { - const void *vbt = NULL; - u32 vbt_size = 0; - - if (opregion->header->opregion_ver >= 2 && opregion->asle && - opregion->asle->rvda && opregion->asle->rvds) { - opregion->rvda = memremap(opregion->asle->rvda, - opregion->asle->rvds, - MEMREMAP_WB); - vbt = opregion->rvda; - vbt_size = opregion->asle->rvds; - } + if (dmi_check_system(intel_no_opregion_vbt)) + goto out; + if (opregion->header->opregion_ver >= 2 && opregion->asle && + opregion->asle->rvda && opregion->asle->rvds) { + opregion->rvda = memremap(opregion->asle->rvda, + opregion->asle->rvds, + MEMREMAP_WB); + vbt = opregion->rvda; + vbt_size = opregion->asle->rvds; if (intel_bios_is_valid_vbt(vbt, vbt_size)) { DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (RVDA)\n"); opregion->vbt = vbt; opregion->vbt_size = vbt_size; + goto out; } else { - vbt = base + OPREGION_VBT_OFFSET; - /* - * The VBT specification says that if the ASLE ext - * mailbox is not used its area is reserved, but - * on some CHT boards the VBT extends into the - * ASLE ext area. Allow this even though it is - * against the spec, so we do not end up rejecting - * the VBT on those boards (and end up not finding the - * LCD panel because of this). - */ - vbt_size = (mboxes & MBOX_ASLE_EXT) ? - OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; - vbt_size -= OPREGION_VBT_OFFSET; - if (intel_bios_is_valid_vbt(vbt, vbt_size)) { - DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); - opregion->vbt = vbt; - opregion->vbt_size = vbt_size; - } + DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (RVDA)\n"); } } + vbt = base + OPREGION_VBT_OFFSET; + /* + * The VBT specification says that if the ASLE ext mailbox is not used + * its area is reserved, but on some CHT boards the VBT extends into the + * ASLE ext area. Allow this even though it is against the spec, so we + * do not end up rejecting the VBT on those boards (and end up not + * finding the LCD panel because of this). + */ + vbt_size = (mboxes & MBOX_ASLE_EXT) ? + OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; + vbt_size -= OPREGION_VBT_OFFSET; + if (intel_bios_is_valid_vbt(vbt, vbt_size)) { + DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); + opregion->vbt = vbt; + opregion->vbt_size = vbt_size; + } else { + DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (Mailbox #4)\n"); + } + +out: return 0; err_out: @@ -1061,16 +1073,5 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) return -ENODEV; } - /* - * FIXME On Dell XPS 13 9350 the OpRegion panel type (0) gives us - * low vswing for eDP, whereas the VBT panel type (2) gives us normal - * vswing instead. Low vswing results in some display flickers, so - * let's simply ignore the OpRegion panel type on SKL for now. - */ - if (IS_SKYLAKE(dev_priv)) { - DRM_DEBUG_KMS("Ignoring OpRegion panel type (%d)\n", ret - 1); - return -ENODEV; - } - return ret - 1; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 0608fad7f593..2e0c56ed22bb 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -267,8 +267,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_gem_request *req; - struct intel_ring *ring; - int ret; + u32 *cs; WARN_ON(overlay->active); WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); @@ -277,10 +276,10 @@ static int intel_overlay_on(struct intel_overlay *overlay) if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 4); - if (ret) { - i915_add_request_no_flush(req); - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) { + i915_add_request(req); + return PTR_ERR(cs); } overlay->active = true; @@ -288,12 +287,11 @@ static int intel_overlay_on(struct intel_overlay *overlay) if (IS_I830(dev_priv)) i830_overlay_clock_gating(dev_priv, false); - ring = req->ring; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); - intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_ON; + *cs++ = overlay->flip_addr | OFC_UPDATE; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return intel_overlay_do_wait_request(overlay, req, NULL); } @@ -326,10 +324,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay, { struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_gem_request *req; - struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; - u32 tmp; - int ret; + u32 tmp, *cs; WARN_ON(!overlay->active); @@ -345,16 +341,15 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 2); - if (ret) { - i915_add_request_no_flush(req); - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) { + i915_add_request(req); + return PTR_ERR(cs); } - ring = req->ring; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(ring, flip_addr); - intel_ring_advance(ring); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; + *cs++ = flip_addr; + intel_ring_advance(req, cs); intel_overlay_flip_prepare(overlay, vma); @@ -408,9 +403,7 @@ static void intel_overlay_off_tail(struct i915_gem_active *active, static int intel_overlay_off(struct intel_overlay *overlay) { struct drm_i915_gem_request *req; - struct intel_ring *ring; - u32 flip_addr = overlay->flip_addr; - int ret; + u32 *cs, flip_addr = overlay->flip_addr; WARN_ON(!overlay->active); @@ -424,25 +417,23 @@ static int intel_overlay_off(struct intel_overlay *overlay) if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 6); - if (ret) { - i915_add_request_no_flush(req); - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) { + i915_add_request(req); + return PTR_ERR(cs); } - ring = req->ring; - /* wait for overlay to go idle */ - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(ring, flip_addr); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; + *cs++ = flip_addr; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; /* turn overlay off */ - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); - intel_ring_emit(ring, flip_addr); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_OFF; + *cs++ = flip_addr; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); intel_overlay_flip_prepare(overlay, NULL); @@ -465,6 +456,7 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) static int intel_overlay_release_old_vid(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; + u32 *cs; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -478,23 +470,20 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ struct drm_i915_gem_request *req; - struct intel_ring *ring; req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 2); - if (ret) { - i915_add_request_no_flush(req); - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) { + i915_add_request(req); + return PTR_ERR(cs); } - ring = req->ring; - intel_ring_emit(ring, - MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); ret = intel_overlay_do_wait_request(overlay, req, intel_overlay_release_old_vid_tail); diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 1a6ff26dea20..cb50c527401f 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1315,7 +1315,7 @@ static u32 i9xx_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) if (IS_PINEVIEW(dev_priv)) clock = KHz(dev_priv->rawclk_freq); else - clock = KHz(dev_priv->cdclk_freq); + clock = KHz(dev_priv->cdclk.hw.cdclk); return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 32); } @@ -1333,7 +1333,7 @@ static u32 i965_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) if (IS_G4X(dev_priv)) clock = KHz(dev_priv->rawclk_freq); else - clock = KHz(dev_priv->cdclk_freq); + clock = KHz(dev_priv->cdclk.hw.cdclk); return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 128); } diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index c0b1f99da37b..206ee4f0150e 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -36,31 +36,6 @@ struct pipe_crc_info { enum pipe pipe; }; -/* As the drm_debugfs_init() routines are called before dev->dev_private is - * allocated we need to hook into the minor for release. - */ -static int drm_add_fake_info_node(struct drm_minor *minor, - struct dentry *ent, const void *key) -{ - struct drm_info_node *node; - - node = kmalloc(sizeof(*node), GFP_KERNEL); - if (node == NULL) { - debugfs_remove(ent); - return -ENOMEM; - } - - node->minor = minor; - node->dent = ent; - node->info_ent = (void *) key; - - mutex_lock(&minor->debugfs_lock); - list_add(&node->list, &minor->debugfs_list); - mutex_unlock(&minor->debugfs_lock); - - return 0; -} - static int i915_pipe_crc_open(struct inode *inode, struct file *filep) { struct pipe_crc_info *info = inode->i_private; @@ -105,7 +80,7 @@ static int i915_pipe_crc_release(struct inode *inode, struct file *filep) static int pipe_crc_data_count(struct intel_pipe_crc *pipe_crc) { - assert_spin_locked(&pipe_crc->lock); + lockdep_assert_held(&pipe_crc->lock); return CIRC_CNT(pipe_crc->head, pipe_crc->tail, INTEL_PIPE_CRC_ENTRIES_NR); } @@ -209,22 +184,6 @@ static struct pipe_crc_info i915_pipe_crc_data[I915_MAX_PIPES] = { }, }; -static int i915_pipe_crc_create(struct dentry *root, struct drm_minor *minor, - enum pipe pipe) -{ - struct drm_i915_private *dev_priv = to_i915(minor->dev); - struct dentry *ent; - struct pipe_crc_info *info = &i915_pipe_crc_data[pipe]; - - info->dev_priv = dev_priv; - ent = debugfs_create_file(info->name, S_IRUGO, root, info, - &i915_pipe_crc_fops); - if (!ent) - return -ENOMEM; - - return drm_add_fake_info_node(minor, ent, info); -} - static const char * const pipe_crc_sources[] = { "none", "plane1", @@ -563,7 +522,7 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv, goto unlock; } - state->acquire_ctx = drm_modeset_legacy_acquire_ctx(&crtc->base); + state->acquire_ctx = crtc->base.dev->mode_config.acquire_ctx; pipe_config = intel_atomic_get_crtc_state(state, crtc); if (IS_ERR(pipe_config)) { ret = PTR_ERR(pipe_config); @@ -928,27 +887,22 @@ void intel_display_crc_init(struct drm_i915_private *dev_priv) int intel_pipe_crc_create(struct drm_minor *minor) { - int ret, i; - - for (i = 0; i < ARRAY_SIZE(i915_pipe_crc_data); i++) { - ret = i915_pipe_crc_create(minor->debugfs_root, minor, i); - if (ret) - return ret; - } - - return 0; -} - -void intel_pipe_crc_cleanup(struct drm_minor *minor) -{ + struct drm_i915_private *dev_priv = to_i915(minor->dev); + struct dentry *ent; int i; for (i = 0; i < ARRAY_SIZE(i915_pipe_crc_data); i++) { - struct drm_info_list *info_list = - (struct drm_info_list *)&i915_pipe_crc_data[i]; + struct pipe_crc_info *info = &i915_pipe_crc_data[i]; - drm_debugfs_remove_files(info_list, 1, minor); + info->dev_priv = dev_priv; + ent = debugfs_create_file(info->name, S_IRUGO, + minor->debugfs_root, info, + &i915_pipe_crc_fops); + if (!ent) + return -ENOMEM; } + + return 0; } int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6a29784d2b41..570bd603f401 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -65,12 +65,12 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_CONFIG0, I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); - /* WaEnableChickenDCPR:skl,bxt,kbl */ + /* WaEnableChickenDCPR:skl,bxt,kbl,glk */ I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */ - /* WaFbcWakeMemOn:skl,bxt,kbl */ + /* WaFbcWakeMemOn:skl,bxt,kbl,glk */ I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS | DISP_FBC_MEMORY_WAKE); @@ -99,9 +99,31 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) * Wa: Backlight PWM may stop in the asserted state, causing backlight * to stay fully on. */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | - PWM1_GATING_DIS | PWM2_GATING_DIS); + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + PWM1_GATING_DIS | PWM2_GATING_DIS); +} + +static void glk_init_clock_gating(struct drm_i915_private *dev_priv) +{ + gen9_init_clock_gating(dev_priv); + + /* + * WaDisablePWMClockGating:glk + * Backlight PWM may stop in the asserted state, causing backlight + * to stay fully on. + */ + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + PWM1_GATING_DIS | PWM2_GATING_DIS); + + /* WaDDIIOTimeout:glk */ + if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) { + u32 val = I915_READ(CHICKEN_MISC_2); + val &= ~(GLK_CL0_PWR_DOWN | + GLK_CL1_PWR_DOWN | + GLK_CL2_PWR_DOWN); + I915_WRITE(CHICKEN_MISC_2, val); + } + } static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) @@ -355,6 +377,8 @@ static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enabl return false; } + trace_intel_memory_cxsr(dev_priv, was_enabled, enable); + DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n", enableddisabled(enable), enableddisabled(was_enabled)); @@ -393,15 +417,15 @@ static const int pessimal_latency_ns = 5000; #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) -static int vlv_get_fifo_size(struct intel_plane *plane) +static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - int sprite0_start, sprite1_start, size; - - if (plane->id == PLANE_CURSOR) - return 63; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; + enum pipe pipe = crtc->pipe; + int sprite0_start, sprite1_start; - switch (plane->pipe) { + switch (pipe) { uint32_t dsparb, dsparb2, dsparb3; case PIPE_A: dsparb = I915_READ(DSPARB); @@ -422,26 +446,21 @@ static int vlv_get_fifo_size(struct intel_plane *plane) sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20); break; default: - return 0; - } - - switch (plane->id) { - case PLANE_PRIMARY: - size = sprite0_start; - break; - case PLANE_SPRITE0: - size = sprite1_start - sprite0_start; - break; - case PLANE_SPRITE1: - size = 512 - 1 - sprite1_start; - break; - default: - return 0; + MISSING_CASE(pipe); + return; } - DRM_DEBUG_KMS("%s FIFO size: %d\n", plane->base.name, size); + fifo_state->plane[PLANE_PRIMARY] = sprite0_start; + fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start; + fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start; + fifo_state->plane[PLANE_CURSOR] = 63; - return size; + DRM_DEBUG_KMS("Pipe %c FIFO size: %d/%d/%d/%d\n", + pipe_name(pipe), + fifo_state->plane[PLANE_PRIMARY], + fifo_state->plane[PLANE_SPRITE0], + fifo_state->plane[PLANE_SPRITE1], + fifo_state->plane[PLANE_CURSOR]); } static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane) @@ -636,6 +655,29 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz, return wm_size; } +static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + + /* FIXME check the 'enable' instead */ + if (!crtc_state->base.active) + return false; + + /* + * Treat cursor with fb as always visible since cursor updates + * can happen faster than the vrefresh rate, and the current + * watermark code doesn't handle that correctly. Cursor updates + * which set/clear the fb or change the cursor size are going + * to get throttled by intel_legacy_cursor_update() to work + * around this problem with the watermark code. + */ + if (plane->id == PLANE_CURSOR) + return plane_state->base.fb != NULL; + else + return plane_state->base.visible; +} + static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv) { struct intel_crtc *crtc, *enabled = NULL; @@ -871,6 +913,8 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv, enum pipe pipe; for_each_pipe(dev_priv, pipe) { + trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm); + I915_WRITE(VLV_DDL(pipe), (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) | (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) | @@ -941,12 +985,6 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv, #undef FW_WM_VLV -enum vlv_wm_level { - VLV_WM_LEVEL_PM2, - VLV_WM_LEVEL_PM5, - VLV_WM_LEVEL_DDR_DVFS, -}; - /* latency must be in 0.1us units. */ static unsigned int vlv_wm_method2(unsigned int pixel_rate, unsigned int pipe_htotal, @@ -1017,71 +1055,114 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, return min_t(int, wm, USHRT_MAX); } -static void vlv_compute_fifo(struct intel_crtc *crtc) +static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes) { - struct drm_device *dev = crtc->base.dev; - struct vlv_wm_state *wm_state = &crtc->wm_state; - struct intel_plane *plane; - unsigned int total_rate = 0; - const int fifo_size = 512 - 1; + return (active_planes & (BIT(PLANE_SPRITE0) | + BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1); +} + +static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + const struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2]; + struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; + unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); + int num_active_planes = hweight32(active_planes); + const int fifo_size = 511; int fifo_extra, fifo_left = fifo_size; + int sprite0_fifo_extra = 0; + unsigned int total_rate; + enum plane_id plane_id; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - struct intel_plane_state *state = - to_intel_plane_state(plane->base.state); + /* + * When enabling sprite0 after sprite1 has already been enabled + * we tend to get an underrun unless sprite0 already has some + * FIFO space allcoated. Hence we always allocate at least one + * cacheline for sprite0 whenever sprite1 is enabled. + * + * All other plane enable sequences appear immune to this problem. + */ + if (vlv_need_sprite0_fifo_workaround(active_planes)) + sprite0_fifo_extra = 1; - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) - continue; + total_rate = raw->plane[PLANE_PRIMARY] + + raw->plane[PLANE_SPRITE0] + + raw->plane[PLANE_SPRITE1] + + sprite0_fifo_extra; - if (state->base.visible) { - wm_state->num_active_planes++; - total_rate += state->base.fb->format->cpp[0]; - } - } + if (total_rate > fifo_size) + return -EINVAL; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - struct intel_plane_state *state = - to_intel_plane_state(plane->base.state); - unsigned int rate; + if (total_rate == 0) + total_rate = 1; - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { - plane->wm.fifo_size = 63; - continue; - } + for_each_plane_id_on_crtc(crtc, plane_id) { + unsigned int rate; - if (!state->base.visible) { - plane->wm.fifo_size = 0; + if ((active_planes & BIT(plane_id)) == 0) { + fifo_state->plane[plane_id] = 0; continue; } - rate = state->base.fb->format->cpp[0]; - plane->wm.fifo_size = fifo_size * rate / total_rate; - fifo_left -= plane->wm.fifo_size; + rate = raw->plane[plane_id]; + fifo_state->plane[plane_id] = fifo_size * rate / total_rate; + fifo_left -= fifo_state->plane[plane_id]; } - fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1); + fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra; + fifo_left -= sprite0_fifo_extra; + + fifo_state->plane[PLANE_CURSOR] = 63; + + fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1); /* spread the remainder evenly */ - for_each_intel_plane_on_crtc(dev, crtc, plane) { + for_each_plane_id_on_crtc(crtc, plane_id) { int plane_extra; if (fifo_left == 0) break; - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) - continue; - - /* give it all to the first plane if none are active */ - if (plane->wm.fifo_size == 0 && - wm_state->num_active_planes) + if ((active_planes & BIT(plane_id)) == 0) continue; plane_extra = min(fifo_extra, fifo_left); - plane->wm.fifo_size += plane_extra; + fifo_state->plane[plane_id] += plane_extra; fifo_left -= plane_extra; } - WARN_ON(fifo_left != 0); + WARN_ON(active_planes != 0 && fifo_left != 0); + + /* give it all to the first plane if none are active */ + if (active_planes == 0) { + WARN_ON(fifo_left != fifo_size); + fifo_state->plane[PLANE_PRIMARY] = fifo_left; + } + + return 0; +} + +static int vlv_num_wm_levels(struct drm_i915_private *dev_priv) +{ + return dev_priv->wm.max_level + 1; +} + +/* mark all levels starting from 'level' as invalid */ +static void vlv_invalidate_wms(struct intel_crtc *crtc, + struct vlv_wm_state *wm_state, int level) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + for (; level < vlv_num_wm_levels(dev_priv); level++) { + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) + wm_state->wm[level].plane[plane_id] = USHRT_MAX; + + wm_state->sr[level].cursor = USHRT_MAX; + wm_state->sr[level].plane = USHRT_MAX; + } } static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) @@ -1092,144 +1173,230 @@ static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) return fifo_size - wm; } -static void vlv_invert_wms(struct intel_crtc *crtc) +/* + * Starting from 'level' set all higher + * levels to 'value' in the "raw" watermarks. + */ +static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, + int level, enum plane_id plane_id, u16 value) { - struct vlv_wm_state *wm_state = &crtc->wm_state; - int level; + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + int num_levels = vlv_num_wm_levels(dev_priv); + bool dirty = false; - for (level = 0; level < wm_state->num_levels; level++) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const int sr_fifo_size = - INTEL_INFO(dev_priv)->num_pipes * 512 - 1; - struct intel_plane *plane; + for (; level < num_levels; level++) { + struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; - wm_state->sr[level].plane = - vlv_invert_wm_value(wm_state->sr[level].plane, - sr_fifo_size); - wm_state->sr[level].cursor = - vlv_invert_wm_value(wm_state->sr[level].cursor, - 63); - - for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { - wm_state->wm[level].plane[plane->id] = - vlv_invert_wm_value(wm_state->wm[level].plane[plane->id], - plane->wm.fifo_size); - } + dirty |= raw->plane[plane_id] != value; + raw->plane[plane_id] = value; } + + return dirty; } -static void vlv_compute_wm(struct intel_crtc *crtc) +static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct vlv_wm_state *wm_state = &crtc->wm_state; - struct intel_plane *plane; + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + enum plane_id plane_id = plane->id; + int num_levels = vlv_num_wm_levels(to_i915(plane->base.dev)); int level; + bool dirty = false; + + if (!plane_state->base.visible) { + dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); + goto out; + } - memset(wm_state, 0, sizeof(*wm_state)); + for (level = 0; level < num_levels; level++) { + struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + int wm = vlv_compute_wm_level(crtc_state, plane_state, level); + int max_wm = plane_id == PLANE_CURSOR ? 63 : 511; - wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed; - wm_state->num_levels = dev_priv->wm.max_level + 1; + if (wm > max_wm) + break; - wm_state->num_active_planes = 0; + dirty |= raw->plane[plane_id] != wm; + raw->plane[plane_id] = wm; + } - vlv_compute_fifo(crtc); + /* mark all higher levels as invalid */ + dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX); + +out: + if (dirty) + DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n", + plane->base.name, + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); + + return dirty; +} + +static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, + enum plane_id plane_id, int level) +{ + const struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[level]; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + + return raw->plane[plane_id] <= fifo_state->plane[plane_id]; +} + +static bool vlv_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level) +{ + return vlv_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) && + vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) && + vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) && + vlv_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); +} - if (wm_state->num_active_planes != 1) - wm_state->cxsr = false; +static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_atomic_state *state = + to_intel_atomic_state(crtc_state->base.state); + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + int num_active_planes = hweight32(crtc_state->active_planes & + ~BIT(PLANE_CURSOR)); + bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base); + struct intel_plane_state *plane_state; + struct intel_plane *plane; + enum plane_id plane_id; + int level, ret, i; + unsigned int dirty = 0; - for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { - struct intel_plane_state *state = + for_each_intel_plane_in_state(state, plane, plane_state, i) { + const struct intel_plane_state *old_plane_state = to_intel_plane_state(plane->base.state); - int level; - if (!state->base.visible) + if (plane_state->base.crtc != &crtc->base && + old_plane_state->base.crtc != &crtc->base) continue; - /* normal watermarks */ - for (level = 0; level < wm_state->num_levels; level++) { - int wm = vlv_compute_wm_level(crtc->config, state, level); - int max_wm = plane->wm.fifo_size; + if (vlv_plane_wm_compute(crtc_state, plane_state)) + dirty |= BIT(plane->id); + } - /* hack */ - if (WARN_ON(level == 0 && wm > max_wm)) - wm = max_wm; + /* + * DSPARB registers may have been reset due to the + * power well being turned off. Make sure we restore + * them to a consistent state even if no primary/sprite + * planes are initially active. + */ + if (needs_modeset) + crtc_state->fifo_changed = true; - if (wm > max_wm) - break; + if (!dirty) + return 0; - wm_state->wm[level].plane[plane->id] = wm; - } + /* cursor changes don't warrant a FIFO recompute */ + if (dirty & ~BIT(PLANE_CURSOR)) { + const struct intel_crtc_state *old_crtc_state = + to_intel_crtc_state(crtc->base.state); + const struct vlv_fifo_state *old_fifo_state = + &old_crtc_state->wm.vlv.fifo_state; - wm_state->num_levels = level; + ret = vlv_compute_fifo(crtc_state); + if (ret) + return ret; - if (!wm_state->cxsr) - continue; + if (needs_modeset || + memcmp(old_fifo_state, fifo_state, + sizeof(*fifo_state)) != 0) + crtc_state->fifo_changed = true; + } - /* maxfifo watermarks */ - if (plane->id == PLANE_CURSOR) { - for (level = 0; level < wm_state->num_levels; level++) - wm_state->sr[level].cursor = - wm_state->wm[level].plane[PLANE_CURSOR]; - } else { - for (level = 0; level < wm_state->num_levels; level++) - wm_state->sr[level].plane = - max(wm_state->sr[level].plane, - wm_state->wm[level].plane[plane->id]); + /* initially allow all levels */ + wm_state->num_levels = vlv_num_wm_levels(dev_priv); + /* + * Note that enabling cxsr with no primary/sprite planes + * enabled can wedge the pipe. Hence we only allow cxsr + * with exactly one enabled primary/sprite plane. + */ + wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1; + + for (level = 0; level < wm_state->num_levels; level++) { + const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; + + if (!vlv_crtc_wm_is_valid(crtc_state, level)) + break; + + for_each_plane_id_on_crtc(crtc, plane_id) { + wm_state->wm[level].plane[plane_id] = + vlv_invert_wm_value(raw->plane[plane_id], + fifo_state->plane[plane_id]); } - } - /* clear any (partially) filled invalid levels */ - for (level = wm_state->num_levels; level < dev_priv->wm.max_level + 1; level++) { - memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level])); - memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level])); + wm_state->sr[level].plane = + vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY], + raw->plane[PLANE_SPRITE0], + raw->plane[PLANE_SPRITE1]), + sr_fifo_size); + + wm_state->sr[level].cursor = + vlv_invert_wm_value(raw->plane[PLANE_CURSOR], + 63); } - vlv_invert_wms(crtc); + if (level == 0) + return -EINVAL; + + /* limit to only levels we can actually handle */ + wm_state->num_levels = level; + + /* invalidate the higher levels */ + vlv_invalidate_wms(crtc, wm_state, level); + + return 0; } #define VLV_FIFO(plane, value) \ (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) -static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) +static void vlv_atomic_update_fifo(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *plane; - int sprite0_start = 0, sprite1_start = 0, fifo_size = 0; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + int sprite0_start, sprite1_start, fifo_size; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - switch (plane->id) { - case PLANE_PRIMARY: - sprite0_start = plane->wm.fifo_size; - break; - case PLANE_SPRITE0: - sprite1_start = sprite0_start + plane->wm.fifo_size; - break; - case PLANE_SPRITE1: - fifo_size = sprite1_start + plane->wm.fifo_size; - break; - case PLANE_CURSOR: - WARN_ON(plane->wm.fifo_size != 63); - break; - default: - MISSING_CASE(plane->id); - break; - } - } + if (!crtc_state->fifo_changed) + return; + + sprite0_start = fifo_state->plane[PLANE_PRIMARY]; + sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start; + fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start; - WARN_ON(fifo_size != 512 - 1); + WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63); + WARN_ON(fifo_size != 511); - DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n", - pipe_name(crtc->pipe), sprite0_start, - sprite1_start, fifo_size); + trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size); - spin_lock(&dev_priv->wm.dsparb_lock); + /* + * uncore.lock serves a double purpose here. It allows us to + * use the less expensive I915_{READ,WRITE}_FW() functions, and + * it protects the DSPARB registers from getting clobbered by + * parallel updates from multiple pipes. + * + * intel_pipe_update_start() has already disabled interrupts + * for us, so a plain spin_lock() is sufficient here. + */ + spin_lock(&dev_priv->uncore.lock); switch (crtc->pipe) { uint32_t dsparb, dsparb2, dsparb3; case PIPE_A: - dsparb = I915_READ(DSPARB); - dsparb2 = I915_READ(DSPARB2); + dsparb = I915_READ_FW(DSPARB); + dsparb2 = I915_READ_FW(DSPARB2); dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) | VLV_FIFO(SPRITEB, 0xff)); @@ -1241,12 +1408,12 @@ static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) | VLV_FIFO(SPRITEB_HI, sprite1_start >> 8)); - I915_WRITE(DSPARB, dsparb); - I915_WRITE(DSPARB2, dsparb2); + I915_WRITE_FW(DSPARB, dsparb); + I915_WRITE_FW(DSPARB2, dsparb2); break; case PIPE_B: - dsparb = I915_READ(DSPARB); - dsparb2 = I915_READ(DSPARB2); + dsparb = I915_READ_FW(DSPARB); + dsparb2 = I915_READ_FW(DSPARB2); dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) | VLV_FIFO(SPRITED, 0xff)); @@ -1258,12 +1425,12 @@ static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) | VLV_FIFO(SPRITED_HI, sprite1_start >> 8)); - I915_WRITE(DSPARB, dsparb); - I915_WRITE(DSPARB2, dsparb2); + I915_WRITE_FW(DSPARB, dsparb); + I915_WRITE_FW(DSPARB2, dsparb2); break; case PIPE_C: - dsparb3 = I915_READ(DSPARB3); - dsparb2 = I915_READ(DSPARB2); + dsparb3 = I915_READ_FW(DSPARB3); + dsparb2 = I915_READ_FW(DSPARB2); dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) | VLV_FIFO(SPRITEF, 0xff)); @@ -1275,20 +1442,60 @@ static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) | VLV_FIFO(SPRITEF_HI, sprite1_start >> 8)); - I915_WRITE(DSPARB3, dsparb3); - I915_WRITE(DSPARB2, dsparb2); + I915_WRITE_FW(DSPARB3, dsparb3); + I915_WRITE_FW(DSPARB2, dsparb2); break; default: break; } - POSTING_READ(DSPARB); + POSTING_READ_FW(DSPARB); - spin_unlock(&dev_priv->wm.dsparb_lock); + spin_unlock(&dev_priv->uncore.lock); } #undef VLV_FIFO +static int vlv_compute_intermediate_wm(struct drm_device *dev, + struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) +{ + struct vlv_wm_state *intermediate = &crtc_state->wm.vlv.intermediate; + const struct vlv_wm_state *optimal = &crtc_state->wm.vlv.optimal; + const struct vlv_wm_state *active = &crtc->wm.active.vlv; + int level; + + intermediate->num_levels = min(optimal->num_levels, active->num_levels); + intermediate->cxsr = optimal->cxsr && active->cxsr && + !crtc_state->disable_cxsr; + + for (level = 0; level < intermediate->num_levels; level++) { + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) { + intermediate->wm[level].plane[plane_id] = + min(optimal->wm[level].plane[plane_id], + active->wm[level].plane[plane_id]); + } + + intermediate->sr[level].plane = min(optimal->sr[level].plane, + active->sr[level].plane); + intermediate->sr[level].cursor = min(optimal->sr[level].cursor, + active->sr[level].cursor); + } + + vlv_invalidate_wms(crtc, intermediate, level); + + /* + * If our intermediate WM are identical to the final WM, then we can + * omit the post-vblank programming; only update if it's different. + */ + if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0) + crtc_state->wm.need_postvbl_update = true; + + return 0; +} + static void vlv_merge_wm(struct drm_i915_private *dev_priv, struct vlv_wm_values *wm) { @@ -1299,7 +1506,7 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, wm->cxsr = true; for_each_intel_crtc(&dev_priv->drm, crtc) { - const struct vlv_wm_state *wm_state = &crtc->wm_state; + const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; if (!crtc->active) continue; @@ -1318,14 +1525,11 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, wm->level = VLV_WM_LEVEL_PM2; for_each_intel_crtc(&dev_priv->drm, crtc) { - struct vlv_wm_state *wm_state = &crtc->wm_state; + const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; enum pipe pipe = crtc->pipe; - if (!crtc->active) - continue; - wm->pipe[pipe] = wm_state->wm[wm->level]; - if (wm->cxsr) + if (crtc->active && wm->cxsr) wm->sr = wm_state->sr[wm->level]; wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2; @@ -1345,22 +1549,15 @@ static bool is_enabling(int old, int new, int threshold) return old < threshold && new >= threshold; } -static void vlv_update_wm(struct intel_crtc *crtc) +static void vlv_program_watermarks(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum pipe pipe = crtc->pipe; struct vlv_wm_values *old_wm = &dev_priv->wm.vlv; struct vlv_wm_values new_wm = {}; - vlv_compute_wm(crtc); vlv_merge_wm(dev_priv, &new_wm); - if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) { - /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(crtc); - + if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) return; - } if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS)) chv_set_memory_dvfs(dev_priv, false); @@ -1371,17 +1568,8 @@ static void vlv_update_wm(struct intel_crtc *crtc) if (is_disabling(old_wm->cxsr, new_wm.cxsr, true)) _intel_set_memory_cxsr(dev_priv, false); - /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(crtc); - vlv_write_wm_values(dev_priv, &new_wm); - DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, " - "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n", - pipe_name(pipe), new_wm.pipe[pipe].plane[PLANE_PRIMARY], new_wm.pipe[pipe].plane[PLANE_CURSOR], - new_wm.pipe[pipe].plane[PLANE_SPRITE0], new_wm.pipe[pipe].plane[PLANE_SPRITE1], - new_wm.sr.plane, new_wm.sr.cursor, new_wm.level, new_wm.cxsr); - if (is_enabling(old_wm->cxsr, new_wm.cxsr, true)) _intel_set_memory_cxsr(dev_priv, true); @@ -1394,6 +1582,33 @@ static void vlv_update_wm(struct intel_crtc *crtc) *old_wm = new_wm; } +static void vlv_initial_watermarks(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + + mutex_lock(&dev_priv->wm.wm_mutex); + crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate; + vlv_program_watermarks(dev_priv); + mutex_unlock(&dev_priv->wm.wm_mutex); +} + +static void vlv_optimize_watermarks(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + + if (!crtc_state->wm.need_postvbl_update) + return; + + mutex_lock(&dev_priv->wm.wm_mutex); + intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; + vlv_program_watermarks(dev_priv); + mutex_unlock(&dev_priv->wm.wm_mutex); +} + #define single_plane_enabled(mask) is_power_of_2(mask) static void g4x_update_wm(struct intel_crtc *crtc) @@ -1701,39 +1916,6 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) I915_WRITE(FW_BLC, fwater_lo); } -uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) -{ - uint32_t pixel_rate; - - pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; - - /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to - * adjust the pixel_rate here. */ - - if (pipe_config->pch_pfit.enabled) { - uint64_t pipe_w, pipe_h, pfit_w, pfit_h; - uint32_t pfit_size = pipe_config->pch_pfit.size; - - pipe_w = pipe_config->pipe_src_w; - pipe_h = pipe_config->pipe_src_h; - - pfit_w = (pfit_size >> 16) & 0xFFFF; - pfit_h = pfit_size & 0xFFFF; - if (pipe_w < pfit_w) - pipe_w = pfit_w; - if (pipe_h < pfit_h) - pipe_h = pfit_h; - - if (WARN_ON(!pfit_w || !pfit_h)) - return pixel_rate; - - pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, - pfit_w * pfit_h); - } - - return pixel_rate; -} - /* latency must be in 0.1us units. */ static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) { @@ -1802,17 +1984,17 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, uint32_t method1, method2; int cpp; - if (!cstate->base.active || !pstate->base.visible) + if (!intel_wm_plane_visible(cstate, pstate)) return 0; cpp = pstate->base.fb->format->cpp[0]; - method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); + method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); if (!is_lp) return method1; - method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + method2 = ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, drm_rect_width(&pstate->base.dst), cpp, mem_value); @@ -1831,13 +2013,13 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, uint32_t method1, method2; int cpp; - if (!cstate->base.active || !pstate->base.visible) + if (!intel_wm_plane_visible(cstate, pstate)) return 0; cpp = pstate->base.fb->format->cpp[0]; - method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); - method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); + method2 = ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, drm_rect_width(&pstate->base.dst), cpp, mem_value); @@ -1852,20 +2034,16 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, const struct intel_plane_state *pstate, uint32_t mem_value) { - /* - * We treat the cursor plane as always-on for the purposes of watermark - * calculation. Until we have two-stage watermark programming merged, - * this is necessary to avoid flickering. - */ - int cpp = 4; - int width = pstate->base.visible ? pstate->base.crtc_w : 64; + int cpp; - if (!cstate->base.active) + if (!intel_wm_plane_visible(cstate, pstate)) return 0; - return ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + cpp = pstate->base.fb->format->cpp[0]; + + return ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, - width, cpp, mem_value); + pstate->base.crtc_w, cpp, mem_value); } /* Only for WM_LP. */ @@ -1875,7 +2053,7 @@ static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, { int cpp; - if (!cstate->base.active || !pstate->base.visible) + if (!intel_wm_plane_visible(cstate, pstate)) return 0; cpp = pstate->base.fb->format->cpp[0]; @@ -2095,7 +2273,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) return 0; if (WARN_ON(adjusted_mode->crtc_clock == 0)) return 0; - if (WARN_ON(intel_state->cdclk == 0)) + if (WARN_ON(intel_state->cdclk.logical.cdclk == 0)) return 0; /* The WM are computed with base on how long it takes to fill a single @@ -2104,7 +2282,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, adjusted_mode->crtc_clock); ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, - intel_state->cdclk); + intel_state->cdclk.logical.cdclk); return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | PIPE_WM_LINETIME_TIME(linetime); @@ -2173,7 +2351,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, } /* - * WaWmMemoryReadLatency:skl + * WaWmMemoryReadLatency:skl,glk * * punit doesn't take into account the read latency so we need * to add 2us to the various latency levels we retrieve from the @@ -2498,8 +2676,8 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ - if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) == 0) - newstate->wm.need_postvbl_update = false; + if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0) + newstate->wm.need_postvbl_update = true; return 0; } @@ -2895,8 +3073,7 @@ static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || - IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) return true; return false; @@ -3184,19 +3361,29 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, * Caller should take care of dividing & rounding off the value. */ static uint32_t -skl_plane_downscale_amount(const struct intel_plane_state *pstate) +skl_plane_downscale_amount(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate) { + struct intel_plane *plane = to_intel_plane(pstate->base.plane); uint32_t downscale_h, downscale_w; uint32_t src_w, src_h, dst_w, dst_h; - if (WARN_ON(!pstate->base.visible)) + if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) return DRM_PLANE_HELPER_NO_SCALING; /* n.b., src is 16.16 fixed point, dst is whole integer */ - src_w = drm_rect_width(&pstate->base.src); - src_h = drm_rect_height(&pstate->base.src); - dst_w = drm_rect_width(&pstate->base.dst); - dst_h = drm_rect_height(&pstate->base.dst); + if (plane->id == PLANE_CURSOR) { + src_w = pstate->base.src_w; + src_h = pstate->base.src_h; + dst_w = pstate->base.crtc_w; + dst_h = pstate->base.crtc_h; + } else { + src_w = drm_rect_width(&pstate->base.src); + src_h = drm_rect_height(&pstate->base.src); + dst_w = drm_rect_width(&pstate->base.dst); + dst_h = drm_rect_height(&pstate->base.dst); + } + if (drm_rotation_90_or_270(pstate->base.rotation)) swap(dst_w, dst_h); @@ -3212,6 +3399,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, const struct drm_plane_state *pstate, int y) { + struct intel_plane *plane = to_intel_plane(pstate->plane); struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); uint32_t down_scale_amount, data_rate; uint32_t width = 0, height = 0; @@ -3224,7 +3412,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, fb = pstate->fb; format = fb->format->format; - if (pstate->plane->type == DRM_PLANE_TYPE_CURSOR) + if (plane->id == PLANE_CURSOR) return 0; if (y && format != DRM_FORMAT_NV12) return 0; @@ -3248,7 +3436,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, data_rate = width * height * fb->format->cpp[0]; } - down_scale_amount = skl_plane_downscale_amount(intel_pstate); + down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate); return (uint64_t)data_rate * down_scale_amount >> 16; } @@ -3540,15 +3728,15 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst uint64_t pixel_rate; /* Shouldn't reach here on disabled planes... */ - if (WARN_ON(!pstate->base.visible)) + if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) return 0; /* * Adjusted plane pixel rate is just the pipe's adjusted pixel rate * with additional adjustments for plane-specific scaling. */ - adjusted_pixel_rate = ilk_pipe_pixel_rate(cstate); - downscale_amount = skl_plane_downscale_amount(pstate); + adjusted_pixel_rate = cstate->pixel_rate; + downscale_amount = skl_plane_downscale_amount(cstate, pstate); pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0)); @@ -3565,6 +3753,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint8_t *out_lines, /* out */ bool *enabled /* out */) { + struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); struct drm_plane_state *pstate = &intel_pstate->base; struct drm_framebuffer *fb = pstate->fb; uint32_t latency = dev_priv->wm.skl_latency[level]; @@ -3584,7 +3773,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); bool y_tiled, x_tiled; - if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { + if (latency == 0 || + !intel_wm_plane_visible(cstate, intel_pstate)) { *enabled = false; return 0; } @@ -3600,8 +3790,13 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (apply_memory_bw_wa && x_tiled) latency += 15; - width = drm_rect_width(&intel_pstate->base.src) >> 16; - height = drm_rect_height(&intel_pstate->base.src) >> 16; + if (plane->id == PLANE_CURSOR) { + width = intel_pstate->base.crtc_w; + height = intel_pstate->base.crtc_h; + } else { + width = drm_rect_width(&intel_pstate->base.src) >> 16; + height = drm_rect_height(&intel_pstate->base.src) >> 16; + } if (drm_rotation_90_or_270(pstate->rotation)) swap(width, height); @@ -3775,7 +3970,7 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate) if (!cstate->base.active) return 0; - pixel_rate = ilk_pipe_pixel_rate(cstate); + pixel_rate = cstate->pixel_rate; if (WARN_ON(pixel_rate == 0)) return 0; @@ -3967,7 +4162,7 @@ pipes_modified(struct drm_atomic_state *state) struct drm_crtc_state *cstate; uint32_t i, ret = 0; - for_each_crtc_in_state(state, crtc, cstate, i) + for_each_new_crtc_in_state(state, crtc, cstate, i) ret |= drm_crtc_mask(crtc); return ret; @@ -4110,7 +4305,7 @@ skl_print_wm_changes(const struct drm_atomic_state *state) const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; int i; - for_each_crtc_in_state(state, crtc, cstate, i) { + for_each_new_crtc_in_state(state, crtc, cstate, i) { const struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; @@ -4152,7 +4347,7 @@ skl_compute_wm(struct drm_atomic_state *state) * since any racing commits that want to update them would need to * hold _all_ CRTC state mutexes. */ - for_each_crtc_in_state(state, crtc, cstate, i) + for_each_new_crtc_in_state(state, crtc, cstate, i) changed = true; if (!changed) return 0; @@ -4174,7 +4369,7 @@ skl_compute_wm(struct drm_atomic_state *state) * should allow skl_update_pipe_wm() to return failure in cases where * no suitable watermark values can be found. */ - for_each_crtc_in_state(state, crtc, cstate, i) { + for_each_new_crtc_in_state(state, crtc, cstate, i) { struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate); const struct skl_pipe_wm *old_pipe_wm = @@ -4539,15 +4734,11 @@ void vlv_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct vlv_wm_values *wm = &dev_priv->wm.vlv; - struct intel_plane *plane; - enum pipe pipe; + struct intel_crtc *crtc; u32 val; vlv_read_wm_values(dev_priv, wm); - for_each_intel_plane(dev, plane) - plane->wm.fifo_size = vlv_get_fifo_size(plane); - wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; wm->level = VLV_WM_LEVEL_PM2; @@ -4585,18 +4776,107 @@ void vlv_wm_get_hw_state(struct drm_device *dev) mutex_unlock(&dev_priv->rps.hw_lock); } - for_each_pipe(dev_priv, pipe) + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct vlv_wm_state *active = &crtc->wm.active.vlv; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + enum pipe pipe = crtc->pipe; + enum plane_id plane_id; + int level; + + vlv_get_fifo_size(crtc_state); + + active->num_levels = wm->level + 1; + active->cxsr = wm->cxsr; + + for (level = 0; level < active->num_levels; level++) { + struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[level]; + + active->sr[level].plane = wm->sr.plane; + active->sr[level].cursor = wm->sr.cursor; + + for_each_plane_id_on_crtc(crtc, plane_id) { + active->wm[level].plane[plane_id] = + wm->pipe[pipe].plane[plane_id]; + + raw->plane[plane_id] = + vlv_invert_wm_value(active->wm[level].plane[plane_id], + fifo_state->plane[plane_id]); + } + } + + for_each_plane_id_on_crtc(crtc, plane_id) + vlv_raw_plane_wm_set(crtc_state, level, + plane_id, USHRT_MAX); + vlv_invalidate_wms(crtc, active, level); + + crtc_state->wm.vlv.optimal = *active; + crtc_state->wm.vlv.intermediate = *active; + DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", pipe_name(pipe), wm->pipe[pipe].plane[PLANE_PRIMARY], wm->pipe[pipe].plane[PLANE_CURSOR], wm->pipe[pipe].plane[PLANE_SPRITE0], wm->pipe[pipe].plane[PLANE_SPRITE1]); + } DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); } +void vlv_wm_sanitize(struct drm_i915_private *dev_priv) +{ + struct intel_plane *plane; + struct intel_crtc *crtc; + + mutex_lock(&dev_priv->wm.wm_mutex); + + for_each_intel_plane(&dev_priv->drm, plane) { + struct intel_crtc *crtc = + intel_get_crtc_for_pipe(dev_priv, plane->pipe); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + enum plane_id plane_id = plane->id; + int level; + + if (plane_state->base.visible) + continue; + + for (level = 0; level < wm_state->num_levels; level++) { + struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[level]; + + raw->plane[plane_id] = 0; + + wm_state->wm[level].plane[plane_id] = + vlv_invert_wm_value(raw->plane[plane_id], + fifo_state->plane[plane_id]); + } + } + + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + crtc_state->wm.vlv.intermediate = + crtc_state->wm.vlv.optimal; + crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; + } + + vlv_program_watermarks(dev_priv); + + mutex_unlock(&dev_priv->wm.wm_mutex); +} + void ilk_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -4680,7 +4960,7 @@ bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val) { u16 rgvswctl; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); rgvswctl = I915_READ16(MEMSWCTL); if (rgvswctl & MEMCTL_CMD_STS) { @@ -4942,16 +5222,8 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) /* gen6_set_rps is called to update the frequency request, but should also be * called when the range (min_delay and max_delay) is modified so that we can * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ -static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) +static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - return; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq); - WARN_ON(val < dev_priv->rps.min_freq); - /* min/max delay may still have been modified so be sure to * write the limits value. */ @@ -4977,17 +5249,15 @@ static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - POSTING_READ(GEN6_RPNSWREQ); - dev_priv->rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); + + return 0; } -static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) +static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) { - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq); - WARN_ON(val < dev_priv->rps.min_freq); + int err; if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), "Odd GPU freq value\n")) @@ -4996,13 +5266,17 @@ static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); if (val != dev_priv->rps.cur_freq) { - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - if (!IS_CHERRYVIEW(dev_priv)) - gen6_set_rps_thresholds(dev_priv, val); + err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + if (err) + return err; + + gen6_set_rps_thresholds(dev_priv, val); } dev_priv->rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); + + return 0; } /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down @@ -5015,6 +5289,7 @@ static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { u32 val = dev_priv->rps.idle_freq; + int err; if (dev_priv->rps.cur_freq <= val) return; @@ -5032,14 +5307,19 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) * power than the render powerwell. */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); - valleyview_set_rps(dev_priv, val); + err = valleyview_set_rps(dev_priv, val); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); + + if (err) + DRM_ERROR("Failed to set RPS for idle\n"); } void gen6_rps_busy(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->rps.hw_lock); if (dev_priv->rps.enabled) { + u8 freq; + if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, @@ -5047,11 +5327,17 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) gen6_enable_rps_interrupts(dev_priv); - /* Ensure we start at the user's desired frequency */ - intel_set_rps(dev_priv, - clamp(dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit)); + /* Use the user's desired frequency as a guide, but for better + * performance, jump directly to RPe as our starting frequency. + */ + freq = max(dev_priv->rps.cur_freq, + dev_priv->rps.efficient_freq); + + if (intel_set_rps(dev_priv, + clamp(freq, + dev_priv->rps.min_freq_softlimit, + dev_priv->rps.max_freq_softlimit))) + DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); } mutex_unlock(&dev_priv->rps.hw_lock); } @@ -5119,12 +5405,25 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, spin_unlock(&dev_priv->rps.client_lock); } -void intel_set_rps(struct drm_i915_private *dev_priv, u8 val) +int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { + int err; + + lockdep_assert_held(&dev_priv->rps.hw_lock); + GEM_BUG_ON(val > dev_priv->rps.max_freq); + GEM_BUG_ON(val < dev_priv->rps.min_freq); + + if (!dev_priv->rps.enabled) { + dev_priv->rps.cur_freq = val; + return 0; + } + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - valleyview_set_rps(dev_priv, val); + err = valleyview_set_rps(dev_priv, val); else - gen6_set_rps(dev_priv, val); + err = gen6_set_rps(dev_priv, val); + + return err; } static void gen9_disable_rc6(struct drm_i915_private *dev_priv) @@ -5302,7 +5601,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + IS_GEN9_BC(dev_priv)) { u32 ddcc_status = 0; if (sandybridge_pcode_read(dev_priv, @@ -5315,7 +5614,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.max_freq); } - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ @@ -5328,7 +5627,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) } static void reset_rps(struct drm_i915_private *dev_priv, - void (*set)(struct drm_i915_private *, u8)) + int (*set)(struct drm_i915_private *, u8)) { u8 freq = dev_priv->rps.cur_freq; @@ -5336,7 +5635,8 @@ static void reset_rps(struct drm_i915_private *dev_priv, dev_priv->rps.power = -1; dev_priv->rps.cur_freq = -1; - set(dev_priv, freq); + if (set(dev_priv, freq)) + DRM_ERROR("Failed to reset RPS to initial values\n"); } /* See the Gen9_GT_PM_Programming_Guide doc for the below */ @@ -5344,22 +5644,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) { intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - /* - * BIOS could leave the Hw Turbo enabled, so need to explicitly - * clear out the Control register just to avoid inconsitency - * with debugfs interface, which will show Turbo as enabled - * only and that is not expected by the User after adding the - * WaGsvDisableTurbo. Apart from this there is no problem even - * if the Turbo is left enabled in the Control register, as the - * Up/Down interrupts would remain masked. - */ - gen9_disable_rps(dev_priv); - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - return; - } - /* Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RC_VIDEO_FREQ, GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); @@ -5419,18 +5703,9 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) if (intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); - /* WaRsUseTimeoutMode:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */ - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - rc6_mask); - } else { - I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_EI_MODE(1) | - rc6_mask); - } + I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + I915_WRITE(GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_EI_MODE(1) | rc6_mask); /* * 3b: Enable Coarse Power Gating only when RC6 is enabled. @@ -5645,7 +5920,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) /* convert DDR frequency from units of 266.6MHz to bandwidth */ min_ring_freq = mult_frac(min_ring_freq, 8, 3); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; @@ -5663,7 +5938,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) int diff = max_gpu_freq - gpu_freq; unsigned int ia_freq = 0, ring_freq = 0; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* * ring_freq = 2 * GT. ring_freq is in 100MHz units * No floor required for ring frequency on SKL. @@ -5747,6 +6022,17 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) return rp1; } +static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpn; + + val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); + rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & + FB_GFX_FREQ_FUSE_MASK); + + return rpn; +} + static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) { u32 val, rp1; @@ -5983,8 +6269,7 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), dev_priv->rps.rp1_freq); - /* PUnit validated range is only [RPe, RP0] */ - dev_priv->rps.min_freq = dev_priv->rps.efficient_freq; + dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); @@ -6140,7 +6425,8 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) /* allows RC6 residency counter to work */ I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN | + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC0_COUNT_EN | VLV_RENDER_RC0_COUNT_EN | VLV_MEDIA_RC6_COUNT_EN | VLV_RENDER_RC6_COUNT_EN)); @@ -6207,7 +6493,7 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) unsigned long now = jiffies_to_msecs(jiffies), diff1; int i; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); diff1 = now - dev_priv->ips.last_time1; @@ -6312,7 +6598,7 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) u64 now, diff, diffms; u32 count; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); now = ktime_get_raw_ns(); diffms = now - dev_priv->ips.last_time2; @@ -6357,7 +6643,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) unsigned long t, corr, state1, corr2, state2; u32 pxvid, ext_v; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); pxvid = (pxvid >> 24) & 0x7f; @@ -6783,7 +7069,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); @@ -6833,7 +7119,7 @@ static void __intel_autoenable_gt_powersave(struct work_struct *work) rcs->init_context(req); /* Mark the device busy, calling intel_enable_gt_powersave() */ - i915_add_request_no_flush(req); + i915_add_request(req); unlock: mutex_unlock(&dev_priv->drm.struct_mutex); @@ -7268,6 +7554,14 @@ static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv) | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); lpt_init_clock_gating(dev_priv); + + /* WaDisableDopClockGating:bdw + * + * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP + * clock gating. + */ + I915_WRITE(GEN6_UCGCTL1, + I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); } static void haswell_init_clock_gating(struct drm_i915_private *dev_priv) @@ -7664,8 +7958,10 @@ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) dev_priv->display.init_clock_gating = skylake_init_clock_gating; else if (IS_KABYLAKE(dev_priv)) dev_priv->display.init_clock_gating = kabylake_init_clock_gating; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_BROXTON(dev_priv)) dev_priv->display.init_clock_gating = bxt_init_clock_gating; + else if (IS_GEMINILAKE(dev_priv)) + dev_priv->display.init_clock_gating = glk_init_clock_gating; else if (IS_BROADWELL(dev_priv)) dev_priv->display.init_clock_gating = broadwell_init_clock_gating; else if (IS_CHERRYVIEW(dev_priv)) @@ -7735,7 +8031,11 @@ void intel_init_pm(struct drm_i915_private *dev_priv) } } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_setup_wm_latency(dev_priv); - dev_priv->display.update_wm = vlv_update_wm; + dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm; + dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm; + dev_priv->display.initial_watermarks = vlv_initial_watermarks; + dev_priv->display.optimize_watermarks = vlv_optimize_watermarks; + dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo; } else if (IS_PINEVIEW(dev_priv)) { if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), dev_priv->is_ddr3, @@ -7788,7 +8088,7 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) case GEN6_PCODE_TIMEOUT: return -ETIMEDOUT; default: - MISSING_CASE(flags) + MISSING_CASE(flags); return 0; } } @@ -8083,3 +8383,80 @@ void intel_pm_setup(struct drm_i915_private *dev_priv) dev_priv->pm.suspended = false; atomic_set(&dev_priv->pm.wakeref_count, 0); } + +static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, + const i915_reg_t reg) +{ + u32 lower, upper, tmp; + int loop = 2; + + /* The register accessed do not need forcewake. We borrow + * uncore lock to prevent concurrent access to range reg. + */ + spin_lock_irq(&dev_priv->uncore.lock); + + /* vlv and chv residency counters are 40 bits in width. + * With a control bit, we can choose between upper or lower + * 32bit window into this counter. + * + * Although we always use the counter in high-range mode elsewhere, + * userspace may attempt to read the value before rc6 is initialised, + * before we have set the default VLV_COUNTER_CONTROL value. So always + * set the high bit to be safe. + */ + I915_WRITE_FW(VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + upper = I915_READ_FW(reg); + do { + tmp = upper; + + I915_WRITE_FW(VLV_COUNTER_CONTROL, + _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); + lower = I915_READ_FW(reg); + + I915_WRITE_FW(VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + upper = I915_READ_FW(reg); + } while (upper != tmp && --loop); + + /* Everywhere else we always use VLV_COUNTER_CONTROL with the + * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set + * now. + */ + + spin_unlock_irq(&dev_priv->uncore.lock); + + return lower | (u64)upper << 8; +} + +u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, + const i915_reg_t reg) +{ + u64 time_hw, units, div; + + if (!intel_enable_rc6()) + return 0; + + intel_runtime_pm_get(dev_priv); + + /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + units = 1000; + div = dev_priv->czclk_freq; + + time_hw = vlv_residency_raw(dev_priv, reg); + } else if (IS_GEN9_LP(dev_priv)) { + units = 1000; + div = 1200; /* 833.33ns */ + + time_hw = I915_READ(reg); + } else { + units = 128000; /* 1.28us */ + div = 100000; + + time_hw = I915_READ(reg); + } + + intel_runtime_pm_put(dev_priv); + return DIV_ROUND_UP_ULL(time_hw * units, div); +} diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6c5f9958197d..66a2b8b83972 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -39,7 +39,7 @@ */ #define LEGACY_REQUEST_SIZE 200 -int __intel_ring_space(int head, int tail, int size) +static int __intel_ring_space(int head, int tail, int size) { int space = head - tail; if (space <= 0) @@ -49,34 +49,26 @@ int __intel_ring_space(int head, int tail, int size) void intel_ring_update_space(struct intel_ring *ring) { - if (ring->last_retired_head != -1) { - ring->head = ring->last_retired_head; - ring->last_retired_head = -1; - } - - ring->space = __intel_ring_space(ring->head & HEAD_ADDR, - ring->tail, ring->size); + ring->space = __intel_ring_space(ring->head, ring->tail, ring->size); } static int gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - u32 cmd; - int ret; + u32 cmd, *cs; cmd = MI_FLUSH; if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -84,9 +76,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - u32 cmd; - int ret; + u32 cmd, *cs; /* * read/write caches: @@ -123,13 +113,13 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) cmd |= MI_INVALIDATE_ISP; } - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -174,35 +164,33 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - int ret; - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); /* low dword */ - intel_ring_emit(ring, 0); /* high dword */ - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + u32 *cs; + + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; /* low dword */ + *cs++ = 0; /* high dword */ + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -210,10 +198,9 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) static int gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; + u32 *cs, flags = 0; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ @@ -247,15 +234,15 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -263,20 +250,17 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = 0; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -284,11 +268,9 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) static int gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; - int ret; + u32 *cs, flags = 0; /* * Ensure that any following seqno writes only happen when the render @@ -332,37 +314,15 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen7_render_ring_cs_stall_wa(req); } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); - - return 0; -} + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); -static int -gen8_emit_pipe_control(struct drm_i915_gem_request *req, - u32 flags, u32 scratch_addr) -{ - struct intel_ring *ring = req->ring; - int ret; - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -370,12 +330,14 @@ gen8_emit_pipe_control(struct drm_i915_gem_request *req, static int gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; - int ret; + u32 flags; + u32 *cs; - flags |= PIPE_CONTROL_CS_STALL; + cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + flags = PIPE_CONTROL_CS_STALL; if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; @@ -394,15 +356,19 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ - ret = gen8_emit_pipe_control(req, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD, - 0); - if (ret) - return ret; + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD, + 0); } - return gen8_emit_pipe_control(req, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, + i915_ggtt_offset(req->engine->scratch) + + 2 * CACHELINE_BYTES); + + intel_ring_advance(req, cs); + + return 0; } static void ring_setup_phys_status_page(struct intel_engine_cs *engine) @@ -646,52 +612,13 @@ static void reset_ring_common(struct intel_engine_cs *engine, } /* If the rq hung, jump to its breadcrumb and skip the batch */ - if (request->fence.error == -EIO) { - struct intel_ring *ring = request->ring; - - ring->head = request->postfix; - ring->last_retired_head = -1; - } + if (request->fence.error == -EIO) + request->ring->head = request->postfix; } else { engine->legacy_active_context = NULL; } } -static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) -{ - struct intel_ring *ring = req->ring; - struct i915_workarounds *w = &req->i915->workarounds; - int ret, i; - - if (w->count == 0) - return 0; - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - ret = intel_ring_begin(req, (w->count * 2 + 2)); - if (ret) - return ret; - - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); - for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(ring, w->reg[i].addr); - intel_ring_emit(ring, w->reg[i].value); - } - intel_ring_emit(ring, MI_NOOP); - - intel_ring_advance(ring); - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); - - return 0; -} - static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) { int ret; @@ -707,498 +634,6 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) return 0; } -static int wa_add(struct drm_i915_private *dev_priv, - i915_reg_t addr, - const u32 mask, const u32 val) -{ - const u32 idx = dev_priv->workarounds.count; - - if (WARN_ON(idx >= I915_MAX_WA_REGS)) - return -ENOSPC; - - dev_priv->workarounds.reg[idx].addr = addr; - dev_priv->workarounds.reg[idx].value = val; - dev_priv->workarounds.reg[idx].mask = mask; - - dev_priv->workarounds.count++; - - return 0; -} - -#define WA_REG(addr, mask, val) do { \ - const int r = wa_add(dev_priv, (addr), (mask), (val)); \ - if (r) \ - return r; \ - } while (0) - -#define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) - -#define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) - -#define WA_SET_FIELD_MASKED(addr, mask, value) \ - WA_REG(addr, mask, _MASKED_FIELD(mask, value)) - -#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) -#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) - -#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) - -static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, - i915_reg_t reg) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct i915_workarounds *wa = &dev_priv->workarounds; - const uint32_t index = wa->hw_whitelist_count[engine->id]; - - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) - return -EINVAL; - - WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); - wa->hw_whitelist_count[engine->id]++; - - return 0; -} - -static int gen8_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); - - /* WaDisableAsyncFlipPerfMode:bdw,chv */ - WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); - - /* WaDisablePartialInstShootdown:bdw,chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - /* WaForceEnableNonCoherent:bdw,chv */ - /* WaHdcDisableFetchWhenMasked:bdw,chv */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_DONOT_FETCH_MEM_WHEN_MASKED | - HDC_FORCE_NON_COHERENT); - - /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: - * "The Hierarchical Z RAW Stall Optimization allows non-overlapping - * polygons in the same 8x4 pixel/sample area to be processed without - * stalling waiting for the earlier ones to write to Hierarchical Z - * buffer." - * - * This optimization is off by default for BDW and CHV; turn it on. - */ - WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); - - /* Wa4x4STCOptimizationDisable:bdw,chv */ - WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4); - - return 0; -} - -static int bdw_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* WaDisableDopClockGating:bdw */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); - - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - WA_SET_BIT_MASKED(HDC_CHICKEN0, - /* WaForceContextSaveRestoreNonCoherent:bdw */ - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); - - return 0; -} - -static int chv_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* Improve HiZ throughput on CHV. */ - WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); - - return 0; -} - -static int gen9_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl */ - I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); - - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl */ - I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); - - /* WaDisableKillLogic:bxt,skl,kbl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - ECOCHK_DIS_TLB); - - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl */ - /* WaDisablePartialInstShootdown:skl,bxt,kbl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - - /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_DG_MIRROR_FIX_ENABLE); - - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, - GEN9_RHWO_OPTIMIZATION_DISABLE); - /* - * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set - * but we do that in per ctx batchbuffer as there is an issue - * with this register not getting restored on ctx restore - */ - } - - /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_GPGPU_PREEMPTION); - - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl */ - /* WaDisablePartialResolveInVc:skl,bxt,kbl */ - WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | - GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); - - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl */ - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); - - /* WaDisableMaskBasedCammingInRCC:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, - PIXEL_MASK_CAMMING_DISABLE); - - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); - - /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are - * both tied to WaForceContextSaveRestoreNonCoherent - * in some hsds for skl. We keep the tie for all gen9. The - * documentation is a bit hazy and so we want to get common behaviour, - * even though there is no clear evidence we would need both on kbl/bxt. - * This area has been source of system hangs so we play it safe - * and mimic the skl regardless of what bspec says. - * - * Use Force Non-Coherent whenever executing a 3D context. This - * is a workaround for a possible hang in the unlikely event - * a TLB invalidation occurs during a PSD flush. - */ - - /* WaForceEnableNonCoherent:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT); - - /* WaDisableHDCInvalidation:skl,bxt,kbl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - BDW_DISABLE_HDC_INVALIDATION); - - /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ - if (IS_SKYLAKE(dev_priv) || - IS_KABYLAKE(dev_priv) || - IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); - - /* WaOCLCoherentLineFlush:skl,bxt,kbl */ - I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_FLUSH_COHERENT_LINES)); - - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt */ - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); - if (ret) - return ret; - - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; - - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); - if (ret) - return ret; - - return 0; -} - -static int skl_tune_iz_hashing(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - u8 vals[3] = { 0, 0, 0 }; - unsigned int i; - - for (i = 0; i < 3; i++) { - u8 ss; - - /* - * Only consider slices where one, and only one, subslice has 7 - * EUs - */ - if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) - continue; - - /* - * subslice_7eu[i] != 0 (because of the check above) and - * ss_max == 4 (maximum number of subslices possible per slice) - * - * -> 0 <= ss <= 3; - */ - ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; - vals[i] = 3 - ss; - } - - if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) - return 0; - - /* Tune IZ hashing. See intel_device_info_runtime_init() */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN9_IZ_HASHING_MASK(2) | - GEN9_IZ_HASHING_MASK(1) | - GEN9_IZ_HASHING_MASK(0), - GEN9_IZ_HASHING(2, vals[2]) | - GEN9_IZ_HASHING(1, vals[1]) | - GEN9_IZ_HASHING(0, vals[0])); - - return 0; -} - -static int skl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* - * Actual WA is to disable percontext preemption granularity control - * until D0 which is the default case so this is equivalent to - * !WaDisablePerCtxtPreemptionGranularityControl:skl - */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - - /* WaEnableGapsTsvCreditFix:skl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableGafsUnitClkGating:skl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - /* WaDisableLSQCROPERFforOCL:skl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return skl_tune_iz_hashing(engine); -} - -static int bxt_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaStoreMultiplePTEenable:bxt */ - /* This is a requirement according to Hardware specification */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); - - /* WaSetClckGatingDisableMedia:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); - } - - /* WaDisableThreadStallDopClockGating:bxt */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); - - /* WaDisablePooledEuLoadBalancingFix:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { - WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, - GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); - } - - /* WaDisableSbeCacheDispatchPortSharing:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - } - - /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ - /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ - /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ - /* WaDisableLSQCROPERFforOCL:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); - if (ret) - return ret; - - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - } - - /* WaProgramL3SqcReg1DefaultForPerf:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); - - /* WaToEnableHwFixForPushConstHWBug:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaInPlaceDecompressionHang:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - return 0; -} - -static int kbl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:kbl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) - WA_SET_BIT(GAMT_CHKN_BIT_REG, - GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); - - /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FENCE_DEST_SLM_DISABLE); - - /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableGafsUnitClkGating:kbl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaDisableSbeCacheDispatchPortSharing:kbl */ - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - /* WaInPlaceDecompressionHang:kbl */ - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - /* WaDisableLSQCROPERFforOCL:kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return 0; -} - -int init_workarounds_ring(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WARN_ON(engine->id != RCS); - - dev_priv->workarounds.count = 0; - dev_priv->workarounds.hw_whitelist_count[RCS] = 0; - - if (IS_BROADWELL(dev_priv)) - return bdw_init_workarounds(engine); - - if (IS_CHERRYVIEW(dev_priv)) - return chv_init_workarounds(engine); - - if (IS_SKYLAKE(dev_priv)) - return skl_init_workarounds(engine); - - if (IS_BROXTON(dev_priv)) - return bxt_init_workarounds(engine); - - if (IS_KABYLAKE(dev_priv)) - return kbl_init_workarounds(engine); - - return 0; -} - static int init_render_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1257,7 +692,7 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) i915_vma_unpin_and_release(&dev_priv->semaphore); } -static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; @@ -1268,23 +703,22 @@ static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); - *out++ = lower_32_bits(gtt_offset); - *out++ = upper_32_bits(gtt_offset); - *out++ = req->global_seqno; - *out++ = 0; - *out++ = (MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - *out++ = 0; + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL; + *cs++ = lower_32_bits(gtt_offset); + *cs++ = upper_32_bits(gtt_offset); + *cs++ = req->global_seqno; + *cs++ = 0; + *cs++ = MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id); + *cs++ = 0; } - return out; + return cs; } -static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; @@ -1295,19 +729,19 @@ static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *out++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; - *out++ = upper_32_bits(gtt_offset); - *out++ = req->global_seqno; - *out++ = (MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - *out++ = 0; + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; + *cs++ = upper_32_bits(gtt_offset); + *cs++ = req->global_seqno; + *cs++ = MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id); + *cs++ = 0; } - return out; + return cs; } -static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *engine; @@ -1322,16 +756,16 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { - *out++ = MI_LOAD_REGISTER_IMM(1); - *out++ = i915_mmio_reg_offset(mbox_reg); - *out++ = req->global_seqno; + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(mbox_reg); + *cs++ = req->global_seqno; num_rings++; } } if (num_rings & 1) - *out++ = MI_NOOP; + *cs++ = MI_NOOP; - return out; + return cs; } static void i9xx_submit_request(struct drm_i915_gem_request *request) @@ -1340,18 +774,19 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) i915_gem_request_submit(request); + assert_ring_tail_valid(request->ring, request->tail); I915_WRITE_TAIL(request->engine, request->tail); } -static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) +static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) { - *out++ = MI_STORE_DWORD_INDEX; - *out++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; - *out++ = req->global_seqno; - *out++ = MI_USER_INTERRUPT; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; + *cs++ = req->global_seqno; + *cs++ = MI_USER_INTERRUPT; - req->tail = intel_ring_offset(req->ring, out); + req->tail = intel_ring_offset(req, cs); + assert_ring_tail_valid(req->ring, req->tail); } static const int i9xx_emit_breadcrumb_sz = 4; @@ -1364,34 +799,33 @@ static const int i9xx_emit_breadcrumb_sz = 4; * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) +static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) { return i9xx_emit_breadcrumb(req, - req->engine->semaphore.signal(req, out)); + req->engine->semaphore.signal(req, cs)); } static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) + u32 *cs) { struct intel_engine_cs *engine = req->engine; if (engine->semaphore.signal) - out = engine->semaphore.signal(req, out); - - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE); - *out++ = intel_hws_seqno_address(engine); - *out++ = 0; - *out++ = req->global_seqno; + cs = engine->semaphore.signal(req, cs); + + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = intel_hws_seqno_address(engine); + *cs++ = 0; + *cs++ = req->global_seqno; /* We're thrashing one dword of HWS. */ - *out++ = 0; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; + *cs++ = 0; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; - req->tail = intel_ring_offset(req->ring, out); + req->tail = intel_ring_offset(req, cs); + assert_ring_tail_valid(req->ring, req->tail); } static const int gen8_render_emit_breadcrumb_sz = 8; @@ -1408,24 +842,21 @@ static int gen8_ring_sync_to(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id); struct i915_hw_ppgtt *ppgtt; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_SAD_GTE_SDD); - intel_ring_emit(ring, signal->global_seqno); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_advance(ring); + *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = signal->global_seqno; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + intel_ring_advance(req, cs); /* When the !RCS engines idle waiting upon a semaphore, they lose their * pagetables and we must reload them before executing the batch. @@ -1442,28 +873,27 @@ static int gen6_ring_sync_to(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal) { - struct intel_ring *ring = req->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; - int ret; + u32 *cs; WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, dw1 | wait_mbox); + *cs++ = dw1 | wait_mbox; /* Throughout all of the GEM code, seqno passed implies our current * seqno is >= the last seqno executed. However for hardware the * comparison is strictly greater than. */ - intel_ring_emit(ring, signal->global_seqno - 1); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = signal->global_seqno - 1; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1564,16 +994,15 @@ i8xx_irq_disable(struct intel_engine_cs *engine) static int bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1639,20 +1068,16 @@ i965_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - MI_BATCH_GTT | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965)); - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & + I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -1666,59 +1091,56 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - u32 cs_offset = i915_ggtt_offset(req->engine->scratch); - int ret; + u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch); - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Evict the invalid PTE TLBs */ - intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); - intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */ - intel_ring_emit(ring, cs_offset); - intel_ring_emit(ring, 0xdeadbeef); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; + *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ + *cs++ = cs_offset; + *cs++ = 0xdeadbeef; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) return -ENOSPC; - ret = intel_ring_begin(req, 6 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 6 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Blit the batch (which has now all relocs applied) to the * stable batch scratch bo area (so that the CS never * stumbles over its tlb invalidation bug) ... */ - intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(ring, - BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); - intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096); - intel_ring_emit(ring, cs_offset); - intel_ring_emit(ring, 4096); - intel_ring_emit(ring, offset); - - intel_ring_emit(ring, MI_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; + *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; + *cs++ = cs_offset; + *cs++ = 4096; + *cs++ = offset; + + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); /* ... and execute it. */ offset = cs_offset; } - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : + MI_BATCH_NON_SECURE); + intel_ring_advance(req, cs); return 0; } @@ -1728,17 +1150,16 @@ i915_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : + MI_BATCH_NON_SECURE); + intel_ring_advance(req, cs); return 0; } @@ -1961,7 +1382,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) if (IS_I830(engine->i915) || IS_I845G(engine->i915)) ring->effective_size -= 2 * CACHELINE_BYTES; - ring->last_retired_head = -1; intel_ring_update_space(ring); vma = intel_ring_create_vma(engine->i915, size); @@ -1985,7 +1405,7 @@ intel_ring_free(struct intel_ring *ring) kfree(ring); } -static int context_pin(struct i915_gem_context *ctx, unsigned int flags) +static int context_pin(struct i915_gem_context *ctx) { struct i915_vma *vma = ctx->engine[RCS].state; int ret; @@ -2000,7 +1420,8 @@ static int context_pin(struct i915_gem_context *ctx, unsigned int flags) return ret; } - return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags); + return i915_vma_pin(vma, 0, I915_GTT_MIN_ALIGNMENT, + PIN_GLOBAL | PIN_HIGH); } static int intel_ring_context_pin(struct intel_engine_cs *engine, @@ -2013,15 +1434,10 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, if (ce->pin_count++) return 0; + GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ if (ce->state) { - unsigned int flags; - - flags = 0; - if (i915_gem_context_is_kernel(ctx)) - flags = PIN_HIGH; - - ret = context_pin(ctx, flags); + ret = context_pin(ctx); if (ret) goto error; @@ -2146,15 +1562,13 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, dev_priv, id) { + for_each_engine(engine, dev_priv, id) engine->buffer->head = engine->buffer->tail; - engine->buffer->last_retired_head = -1; - } } static int ring_request_alloc(struct drm_i915_gem_request *request) { - int ret; + u32 *cs; GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); @@ -2167,9 +1581,9 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) GEM_BUG_ON(!request->engine->buffer); request->ring = request->engine->buffer; - ret = intel_ring_begin(request, 0); - if (ret) - return ret; + cs = intel_ring_begin(request, 0); + if (IS_ERR(cs)) + return PTR_ERR(cs); request->reserved_space -= LEGACY_REQUEST_SIZE; return 0; @@ -2224,7 +1638,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) return 0; } -int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) +u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { struct intel_ring *ring = req->ring; int remain_actual = ring->size - ring->tail; @@ -2232,6 +1646,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) int bytes = num_dwords * sizeof(u32); int total_bytes, wait_bytes; bool need_wrap = false; + u32 *cs; total_bytes = bytes + req->reserved_space; @@ -2258,7 +1673,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) if (wait_bytes > ring->space) { int ret = wait_for_space(req, wait_bytes); if (unlikely(ret)) - return ret; + return ERR_PTR(ret); } if (unlikely(need_wrap)) { @@ -2271,31 +1686,34 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ring->space -= remain_actual; } + GEM_BUG_ON(ring->tail > ring->size - bytes); + cs = ring->vaddr + ring->tail; + ring->tail += bytes; ring->space -= bytes; GEM_BUG_ON(ring->space < 0); - return 0; + + return cs; } /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; int num_dwords = - (ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); - int ret; + (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + u32 *cs; if (num_dwords == 0) return 0; num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; - ret = intel_ring_begin(req, num_dwords); - if (ret) - return ret; + cs = intel_ring_begin(req, num_dwords); + if (IS_ERR(cs)) + return PTR_ERR(cs); while (num_dwords--) - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -2339,13 +1757,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - uint32_t cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW; if (INTEL_GEN(req->i915) >= 8) @@ -2367,16 +1783,16 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ } else { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -2385,23 +1801,21 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; bool ppgtt = USES_PPGTT(req->i915) && !(dispatch_flags & I915_DISPATCH_SECURE); - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & + I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -2411,22 +1825,19 @@ hsw_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | + (dispatch_flags & I915_DISPATCH_RS ? + MI_BATCH_RESOURCE_STREAMER : 0); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -2436,20 +1847,17 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965)); + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE_I965); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -2458,13 +1866,11 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - uint32_t cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW; if (INTEL_GEN(req->i915) >= 8) @@ -2485,17 +1891,16 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) */ if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, - I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ } else { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -2635,6 +2040,16 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv, } } +static void i9xx_set_default_submission(struct intel_engine_cs *engine) +{ + engine->submit_request = i9xx_submit_request; +} + +static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) +{ + engine->submit_request = gen6_bsd_submit_request; +} + static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { @@ -2665,7 +2080,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->emit_breadcrumb_sz++; } } - engine->submit_request = i9xx_submit_request; + + engine->set_default_submission = i9xx_set_default_submission; if (INTEL_GEN(dev_priv) >= 8) engine->emit_bb_start = gen8_emit_bb_start; @@ -2701,7 +2117,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1; - engine->emit_breadcrumb_sz += num_rings * 6; + engine->emit_breadcrumb_sz += num_rings * 8; } } else if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; @@ -2750,7 +2166,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 6) { /* gen6 bsd needs a special wa for tail updates */ if (IS_GEN6(dev_priv)) - engine->submit_request = gen6_bsd_submit_request; + engine->set_default_submission = gen6_bsd_set_default_submission; engine->emit_flush = gen6_bsd_ring_flush; if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8cb2078c5bfc..a82a0807f64d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,7 @@ #include "i915_gem_batch_pool.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_selftest.h" #define I915_CMD_HASH_ORDER 9 @@ -144,19 +145,10 @@ struct intel_ring { u32 head; u32 tail; + int space; int size; int effective_size; - - /** We track the position of the requests in the ring buffer, and - * when each is retired we increment last_retired_head as the GPU - * must have finished processing the request and so we know we - * can advance the ringbuffer up to that position. - * - * last_retired_head is set to -1 after the value is consumed so - * we can detect new retirements. - */ - u32 last_retired_head; }; struct i915_gem_context; @@ -184,26 +176,26 @@ struct i915_ctx_workarounds { struct drm_i915_gem_request; struct intel_render_state; +/* + * Engine IDs definitions. + * Keep instances of the same type engine together. + */ +enum intel_engine_id { + RCS = 0, + BCS, + VCS, + VCS2, +#define _VCS(n) (VCS + (n)) + VECS +}; + struct intel_engine_cs { struct drm_i915_private *i915; const char *name; - enum intel_engine_id { - RCS = 0, - BCS, - VCS, - VCS2, /* Keep instances of the same type engine together. */ - VECS - } id; -#define _VCS(n) (VCS + (n)) + enum intel_engine_id id; unsigned int exec_id; - enum intel_engine_hw_id { - RCS_HW = 0, - VCS_HW, - BCS_HW, - VECS_HW, - VCS2_HW - } hw_id; - enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */ + unsigned int hw_id; + unsigned int guc_id; u32 mmio_base; unsigned int irq_shift; struct intel_ring *buffer; @@ -211,6 +203,11 @@ struct intel_engine_cs { struct intel_render_state *render_state; + atomic_t irq_count; + unsigned long irq_posted; +#define ENGINE_IRQ_BREADCRUMB 0 +#define ENGINE_IRQ_EXECLIST 1 + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the @@ -228,22 +225,22 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */ - bool irq_posted; + spinlock_t irq_lock; /* protects irq_*; irqsafe */ + struct intel_wait *irq_wait; /* oldest waiter by retirement */ - spinlock_t lock; /* protects the lists of requests; irqsafe */ + spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ - struct intel_wait *first_wait; /* oldest waiter by retirement */ struct task_struct *signaler; /* used for fence signalling */ - struct drm_i915_gem_request *first_signal; + struct drm_i915_gem_request __rcu *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ - unsigned long timeout; + unsigned int hangcheck_interrupts; + bool irq_armed : 1; bool irq_enabled : 1; - bool rpm_wakelock : 1; + I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; /* @@ -266,6 +263,8 @@ struct intel_engine_cs { void (*reset_hw)(struct intel_engine_cs *engine, struct drm_i915_gem_request *req); + void (*set_default_submission)(struct intel_engine_cs *engine); + int (*context_pin)(struct intel_engine_cs *engine, struct i915_gem_context *ctx); void (*context_unpin)(struct intel_engine_cs *engine, @@ -285,7 +284,7 @@ struct intel_engine_cs { #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) void (*emit_breadcrumb)(struct drm_i915_gem_request *req, - u32 *out); + u32 *cs); int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into @@ -368,7 +367,7 @@ struct intel_engine_cs { /* AKA wait() */ int (*sync_to)(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal); - u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out); + u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); } semaphore; /* Execlists */ @@ -376,13 +375,11 @@ struct intel_engine_cs { struct execlist_port { struct drm_i915_gem_request *request; unsigned int count; + GEM_DEBUG_DECL(u32 context_id); } execlist_port[2]; struct rb_root execlist_queue; struct rb_node *execlist_first; unsigned int fw_domains; - bool disable_lite_restore_wa; - bool preempt_wa; - u32 ctx_desc_template; /* Contexts are pinned whilst they are active on the GPU. The last * context executed remains active whilst the GPU is idle - the @@ -435,18 +432,10 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; -static inline unsigned +static inline unsigned int intel_engine_flag(const struct intel_engine_cs *engine) { - return 1 << engine->id; -} - -static inline void -intel_flush_status_page(struct intel_engine_cs *engine, int reg) -{ - mb(); - clflush(&engine->status_page.page_addr[reg]); - mb(); + return BIT(engine->id); } static inline u32 @@ -457,10 +446,22 @@ intel_read_status_page(struct intel_engine_cs *engine, int reg) } static inline void -intel_write_status_page(struct intel_engine_cs *engine, - int reg, u32 value) +intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) { - engine->status_page.page_addr[reg] = value; + /* Writing into the status page should be done sparingly. Since + * we do when we are uncertain of the device state, we take a bit + * of extra paranoia to try and ensure that the HWS takes the value + * we give and that it doesn't end up trapped inside the CPU! + */ + if (static_cpu_has(X86_FEATURE_CLFLUSH)) { + mb(); + clflush(&engine->status_page.page_addr[reg]); + engine->status_page.page_addr[reg] = value; + clflush(&engine->status_page.page_addr[reg]); + mb(); + } else { + WRITE_ONCE(engine->status_page.page_addr[reg], value); + } } /* @@ -495,21 +496,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); -int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -static inline void intel_ring_emit(struct intel_ring *ring, u32 data) -{ - *(uint32_t *)(ring->vaddr + ring->tail) = data; - ring->tail += 4; -} - -static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg) -{ - intel_ring_emit(ring, i915_mmio_reg_offset(reg)); -} +u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n); -static inline void intel_ring_advance(struct intel_ring *ring) +static inline void +intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) { /* Dummy function. * @@ -519,6 +511,7 @@ static inline void intel_ring_advance(struct intel_ring *ring) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ + GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); } static inline u32 @@ -527,14 +520,26 @@ intel_ring_wrap(const struct intel_ring *ring, u32 pos) return pos & (ring->size - 1); } -static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) +static inline u32 +intel_ring_offset(const struct drm_i915_gem_request *req, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - ring->vaddr; - return intel_ring_wrap(ring, offset); + u32 offset = addr - req->ring->vaddr; + GEM_BUG_ON(offset > req->ring->size); + return intel_ring_wrap(req->ring, offset); +} + +static inline void +assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) +{ + /* We could combine these into a single tail operation, but keeping + * them as seperate tests will help identify the cause should one + * ever fire. + */ + GEM_BUG_ON(!IS_ALIGNED(tail, 8)); + GEM_BUG_ON(tail >= ring->size); } -int __intel_ring_space(int head, int tail, int size); void intel_ring_update_space(struct intel_ring *ring); void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); @@ -567,10 +572,11 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) * wtih serialising this hint with anything, so document it as * a hint and nothing more. */ - return READ_ONCE(engine->timeline->last_submitted_seqno); + return READ_ONCE(engine->timeline->seqno); } int init_workarounds_ring(struct intel_engine_cs *engine); +int intel_ring_workarounds_emit(struct drm_i915_gem_request *req); void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); @@ -592,12 +598,51 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) +static inline void intel_wait_init(struct intel_wait *wait, + struct drm_i915_gem_request *rq) +{ + wait->tsk = current; + wait->request = rq; +} + +static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) { wait->tsk = current; wait->seqno = seqno; } +static inline bool intel_wait_has_seqno(const struct intel_wait *wait) +{ + return wait->seqno; +} + +static inline bool +intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) +{ + wait->seqno = seqno; + return intel_wait_has_seqno(wait); +} + +static inline bool +intel_wait_update_request(struct intel_wait *wait, + const struct drm_i915_gem_request *rq) +{ + return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq)); +} + +static inline bool +intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) +{ + return wait->seqno == seqno; +} + +static inline bool +intel_wait_check_request(const struct intel_wait *wait, + const struct drm_i915_gem_request *rq) +{ + return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq)); +} + static inline bool intel_wait_complete(const struct intel_wait *wait) { return RB_EMPTY_NODE(&wait->node); @@ -608,38 +653,38 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_enable_signaling(struct drm_i915_gem_request *request); +void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { - return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh); + return READ_ONCE(engine->breadcrumbs.irq_wait); } -static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) +unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); +#define ENGINE_WAKEUP_WAITER BIT(0) +#define ENGINE_WAKEUP_ASLEEP BIT(1) + +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); + +void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); + +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) { - bool wakeup = false; + memset(batch, 0, 6 * sizeof(u32)); - /* Note that for this not to dangerously chase a dangling pointer, - * we must hold the rcu_read_lock here. - * - * Also note that tsk is likely to be in !TASK_RUNNING state so an - * early test for tsk->state != TASK_RUNNING before wake_up_process() - * is unlikely to be beneficial. - */ - if (intel_engine_has_waiter(engine)) { - struct task_struct *tsk; - - rcu_read_lock(); - tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); - if (tsk) - wakeup = wake_up_process(tsk); - rcu_read_unlock(); - } + batch[0] = GFX_OP_PIPE_CONTROL(6); + batch[1] = flags; + batch[2] = offset; - return wakeup; + return batch + 6; } -void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915); +bool intel_engine_is_idle(struct intel_engine_cs *engine); +bool intel_engines_are_idle(struct drm_i915_private *dev_priv); + +void intel_engines_reset_default_submission(struct drm_i915_private *i915); #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index c0b7e95b5b8e..f8a375f8dde6 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -49,19 +49,6 @@ * present for a given platform. */ -#define for_each_power_well(i, power_well, domain_mask, power_domains) \ - for (i = 0; \ - i < (power_domains)->power_well_count && \ - ((power_well) = &(power_domains)->power_wells[i]); \ - i++) \ - for_each_if ((power_well)->domains & (domain_mask)) - -#define for_each_power_well_rev(i, power_well, domain_mask, power_domains) \ - for (i = (power_domains)->power_well_count - 1; \ - i >= 0 && ((power_well) = &(power_domains)->power_wells[i]);\ - i--) \ - for_each_if ((power_well)->domains & (domain_mask)) - bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, int power_well_id); @@ -106,6 +93,16 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "PORT_DDI_D_LANES"; case POWER_DOMAIN_PORT_DDI_E_LANES: return "PORT_DDI_E_LANES"; + case POWER_DOMAIN_PORT_DDI_A_IO: + return "PORT_DDI_A_IO"; + case POWER_DOMAIN_PORT_DDI_B_IO: + return "PORT_DDI_B_IO"; + case POWER_DOMAIN_PORT_DDI_C_IO: + return "PORT_DDI_C_IO"; + case POWER_DOMAIN_PORT_DDI_D_IO: + return "PORT_DDI_D_IO"; + case POWER_DOMAIN_PORT_DDI_E_IO: + return "PORT_DDI_E_IO"; case POWER_DOMAIN_PORT_DSI: return "PORT_DSI"; case POWER_DOMAIN_PORT_CRT: @@ -198,19 +195,15 @@ static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains; struct i915_power_well *power_well; bool is_enabled; - int i; if (dev_priv->pm.suspended) return false; - power_domains = &dev_priv->power_domains; - is_enabled = true; - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { + for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain)) { if (power_well->always_on) continue; @@ -385,124 +378,121 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, } #define SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_E_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_A_E_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_E_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_D_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \ BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DPIO_CMN_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_INIT)) -#define GLK_DISPLAY_DDI_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define GLK_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define GLK_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO)) +#define GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO)) +#define GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO)) #define GLK_DPIO_CMN_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DPIO_CMN_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DPIO_CMN_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DC_OFF_POWER_DOMAINS ( \ GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) static void assert_can_enable_dc9(struct drm_i915_private *dev_priv) { @@ -732,7 +722,7 @@ gen9_sanitize_power_well_requests(struct drm_i915_private *dev_priv, * other request bits to be set, so WARN for those. */ if (power_well_id == SKL_DISP_PW_1 || - ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && + (IS_GEN9_BC(dev_priv) && power_well_id == SKL_DISP_PW_MISC_IO)) DRM_DEBUG_DRIVER("Clearing auxiliary requests for %s forced on " "by DMC\n", power_well->name); @@ -847,14 +837,14 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv, static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - hsw_set_power_well(dev_priv, power_well, power_well->count > 0); - - /* - * We're taking over the BIOS, so clear any requests made by it since - * the driver is in charge now. - */ - if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) + /* Take over the request bit if set by BIOS. */ + if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) { + if (!(I915_READ(HSW_PWR_WELL_DRIVER) & + HSW_PWR_WELL_ENABLE_REQUEST)) + I915_WRITE(HSW_PWR_WELL_DRIVER, + HSW_PWR_WELL_ENABLE_REQUEST); I915_WRITE(HSW_PWR_WELL_BIOS, 0); + } } static void hsw_power_well_enable(struct drm_i915_private *dev_priv, @@ -881,10 +871,17 @@ static bool skl_power_well_enabled(struct drm_i915_private *dev_priv, static void skl_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - skl_set_power_well(dev_priv, power_well, power_well->count > 0); + uint32_t mask = SKL_POWER_WELL_REQ(power_well->id); + uint32_t bios_req = I915_READ(HSW_PWR_WELL_BIOS); + + /* Take over the request bit if set by BIOS. */ + if (bios_req & mask) { + uint32_t drv_req = I915_READ(HSW_PWR_WELL_DRIVER); - /* Clear any request made by BIOS as driver is taking over */ - I915_WRITE(HSW_PWR_WELL_BIOS, 0); + if (!(drv_req & mask)) + I915_WRITE(HSW_PWR_WELL_DRIVER, drv_req | mask); + I915_WRITE(HSW_PWR_WELL_BIOS, bios_req & ~mask); + } } static void skl_power_well_enable(struct drm_i915_private *dev_priv, @@ -917,16 +914,6 @@ static bool bxt_dpio_cmn_power_well_enabled(struct drm_i915_private *dev_priv, return bxt_ddi_phy_is_enabled(dev_priv, power_well->data); } -static void bxt_dpio_cmn_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - if (power_well->count > 0) - bxt_dpio_cmn_power_well_enable(dev_priv, power_well); - else - bxt_dpio_cmn_power_well_disable(dev_priv, power_well); -} - - static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv) { struct i915_power_well *power_well; @@ -964,10 +951,12 @@ static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv) static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { + struct intel_cdclk_state cdclk_state = {}; + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); - WARN_ON(dev_priv->cdclk_freq != - dev_priv->display.get_display_clock_speed(dev_priv)); + dev_priv->display.get_cdclk(dev_priv, &cdclk_state); + WARN_ON(!intel_cdclk_state_compare(&dev_priv->cdclk.hw, &cdclk_state)); gen9_assert_dbuf_enabled(dev_priv); @@ -987,13 +976,9 @@ static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv, gen9_enable_dc5(dev_priv); } -static void gen9_dc_off_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) { - if (power_well->count > 0) - gen9_dc_off_power_well_enable(dev_priv, power_well); - else - gen9_dc_off_power_well_disable(dev_priv, power_well); } static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv, @@ -1043,12 +1028,6 @@ out: mutex_unlock(&dev_priv->rps.hw_lock); } -static void vlv_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - vlv_set_power_well(dev_priv, power_well, power_well->count > 0); -} - static void vlv_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -1249,7 +1228,7 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, false); } -#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1) +#define POWER_DOMAIN_MASK (GENMASK_ULL(POWER_DOMAIN_NUM - 1, 0)) static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, int power_well_id) @@ -1659,14 +1638,6 @@ out: mutex_unlock(&dev_priv->rps.hw_lock); } -static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->id != PIPE_A); - - chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0); -} - static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -1693,9 +1664,8 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *power_well; - int i; - for_each_power_well(i, power_well, BIT(domain), power_domains) + for_each_power_domain_well(dev_priv, power_well, BIT_ULL(domain)) intel_power_well_get(dev_priv, power_well); power_domains->domain_use_count[domain]++; @@ -1779,7 +1749,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains; struct i915_power_well *power_well; - int i; power_domains = &dev_priv->power_domains; @@ -1790,7 +1759,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, intel_display_power_domain_str(domain)); power_domains->domain_use_count[domain]--; - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) + for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain)) intel_power_well_put(dev_priv, power_well); mutex_unlock(&power_domains->lock); @@ -1799,134 +1768,134 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, } #define HSW_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BDW_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DSI) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_DSI) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DPIO_CMN_D_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_INIT)) static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { - .sync_hw = i9xx_always_on_power_well_noop, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = i9xx_always_on_power_well_noop, .disable = i9xx_always_on_power_well_noop, .is_enabled = i9xx_always_on_power_well_enabled, }; static const struct i915_power_well_ops chv_pipe_power_well_ops = { - .sync_hw = chv_pipe_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = chv_pipe_power_well_enable, .disable = chv_pipe_power_well_disable, .is_enabled = chv_pipe_power_well_enabled, }; static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = chv_dpio_cmn_power_well_enable, .disable = chv_dpio_cmn_power_well_disable, .is_enabled = vlv_power_well_enabled, @@ -1956,14 +1925,14 @@ static const struct i915_power_well_ops skl_power_well_ops = { }; static const struct i915_power_well_ops gen9_dc_off_power_well_ops = { - .sync_hw = gen9_dc_off_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = gen9_dc_off_power_well_enable, .disable = gen9_dc_off_power_well_disable, .is_enabled = gen9_dc_off_power_well_enabled, }; static const struct i915_power_well_ops bxt_dpio_cmn_power_well_ops = { - .sync_hw = bxt_dpio_cmn_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = bxt_dpio_cmn_power_well_enable, .disable = bxt_dpio_cmn_power_well_disable, .is_enabled = bxt_dpio_cmn_power_well_enabled, @@ -1998,21 +1967,21 @@ static struct i915_power_well bdw_power_wells[] = { }; static const struct i915_power_well_ops vlv_display_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_display_power_well_enable, .disable = vlv_display_power_well_disable, .is_enabled = vlv_power_well_enabled, }; static const struct i915_power_well_ops vlv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_dpio_cmn_power_well_enable, .disable = vlv_dpio_cmn_power_well_disable, .is_enabled = vlv_power_well_enabled, }; static const struct i915_power_well_ops vlv_dpio_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_power_well_enable, .disable = vlv_power_well_disable, .is_enabled = vlv_power_well_enabled, @@ -2155,26 +2124,26 @@ static struct i915_power_well skl_power_wells[] = { .id = SKL_DISP_PW_2, }, { - .name = "DDI A/E power well", - .domains = SKL_DISPLAY_DDI_A_E_POWER_DOMAINS, + .name = "DDI A/E IO power well", + .domains = SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_A_E, }, { - .name = "DDI B power well", - .domains = SKL_DISPLAY_DDI_B_POWER_DOMAINS, + .name = "DDI B IO power well", + .domains = SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_B, }, { - .name = "DDI C power well", - .domains = SKL_DISPLAY_DDI_C_POWER_DOMAINS, + .name = "DDI C IO power well", + .domains = SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_C, }, { - .name = "DDI D power well", - .domains = SKL_DISPLAY_DDI_D_POWER_DOMAINS, + .name = "DDI D IO power well", + .domains = SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_D, }, @@ -2287,20 +2256,20 @@ static struct i915_power_well glk_power_wells[] = { .id = GLK_DISP_PW_AUX_C, }, { - .name = "DDI A power well", - .domains = GLK_DISPLAY_DDI_A_POWER_DOMAINS, + .name = "DDI A IO power well", + .domains = GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = GLK_DISP_PW_DDI_A, }, { - .name = "DDI B power well", - .domains = GLK_DISPLAY_DDI_B_POWER_DOMAINS, + .name = "DDI B IO power well", + .domains = GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_B, }, { - .name = "DDI C power well", - .domains = GLK_DISPLAY_DDI_C_POWER_DOMAINS, + .name = "DDI C IO power well", + .domains = GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_C, }, @@ -2323,7 +2292,7 @@ static uint32_t get_allowed_dc_mask(const struct drm_i915_private *dev_priv, int requested_dc; int max_dc; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { max_dc = 2; mask = 0; } else if (IS_GEN9_LP(dev_priv)) { @@ -2386,7 +2355,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) dev_priv->csr.allowed_dc_mask = get_allowed_dc_mask(dev_priv, i915.enable_dc); - BUILD_BUG_ON(POWER_DOMAIN_NUM > 31); + BUILD_BUG_ON(POWER_DOMAIN_NUM > 64); mutex_init(&power_domains->lock); @@ -2398,7 +2367,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) set_power_wells(power_domains, hsw_power_wells); } else if (IS_BROADWELL(dev_priv)) { set_power_wells(power_domains, bdw_power_wells); - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { set_power_wells(power_domains, skl_power_wells); } else if (IS_BROXTON(dev_priv)) { set_power_wells(power_domains, bxt_power_wells); @@ -2454,10 +2423,9 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *power_well; - int i; mutex_lock(&power_domains->lock); - for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { + for_each_power_well(dev_priv, power_well) { power_well->ops->sync_hw(dev_priv, power_well); power_well->hw_enabled = power_well->ops->is_enabled(dev_priv, power_well); @@ -2722,7 +2690,10 @@ static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv) * @resume: Called from resume code paths or not * * This function initializes the hardware power domain state and enables all - * power domains using intel_display_set_init_power(). + * power wells belonging to the INIT power domain. Power wells in other + * domains (and not in the INIT domain) are referenced or disabled during the + * modeset state HW readout. After that the reference count of each power well + * must match its HW enabled state, see intel_power_domains_verify_state(). */ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) { @@ -2730,7 +2701,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) power_domains->initializing = true; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { skl_display_core_init(dev_priv, resume); } else if (IS_GEN9_LP(dev_priv)) { bxt_display_core_init(dev_priv, resume); @@ -2769,12 +2740,92 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv) if (!i915.disable_power_well) intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_display_core_uninit(dev_priv); else if (IS_GEN9_LP(dev_priv)) bxt_display_core_uninit(dev_priv); } +static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + + for_each_power_well(dev_priv, power_well) { + enum intel_display_power_domain domain; + + DRM_DEBUG_DRIVER("%-25s %d\n", + power_well->name, power_well->count); + + for_each_power_domain(domain, power_well->domains) + DRM_DEBUG_DRIVER(" %-23s %d\n", + intel_display_power_domain_str(domain), + power_domains->domain_use_count[domain]); + } +} + +/** + * intel_power_domains_verify_state - verify the HW/SW state for all power wells + * @dev_priv: i915 device instance + * + * Verify if the reference count of each power well matches its HW enabled + * state and the total refcount of the domains it belongs to. This must be + * called after modeset HW state sanitization, which is responsible for + * acquiring reference counts for any power wells in use and disabling the + * ones left on by BIOS but not required by any active output. + */ +void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + bool dump_domain_info; + + mutex_lock(&power_domains->lock); + + dump_domain_info = false; + for_each_power_well(dev_priv, power_well) { + enum intel_display_power_domain domain; + int domains_count; + bool enabled; + + /* + * Power wells not belonging to any domain (like the MISC_IO + * and PW1 power wells) are under FW control, so ignore them, + * since their state can change asynchronously. + */ + if (!power_well->domains) + continue; + + enabled = power_well->ops->is_enabled(dev_priv, power_well); + if ((power_well->count || power_well->always_on) != enabled) + DRM_ERROR("power well %s state mismatch (refcount %d/enabled %d)", + power_well->name, power_well->count, enabled); + + domains_count = 0; + for_each_power_domain(domain, power_well->domains) + domains_count += power_domains->domain_use_count[domain]; + + if (power_well->count != domains_count) { + DRM_ERROR("power well %s refcount/domain refcount mismatch " + "(refcount %d/domains refcount %d)\n", + power_well->name, power_well->count, + domains_count); + dump_domain_info = true; + } + } + + if (dump_domain_info) { + static bool dumped; + + if (!dumped) { + intel_power_domains_dump_info(dev_priv); + dumped = true; + } + } + + mutex_unlock(&power_domains->lock); +} + /** * intel_runtime_pm_get - grab a runtime pm reference * @dev_priv: i915 device instance @@ -2789,8 +2840,10 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; struct device *kdev = &pdev->dev; + int ret; - pm_runtime_get_sync(kdev); + ret = pm_runtime_get_sync(kdev); + WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); atomic_inc(&dev_priv->pm.wakeref_count); assert_rpm_wakelock_held(dev_priv); @@ -2820,7 +2873,8 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) * function, since the power state is undefined. This applies * atm to the late/early system suspend/resume handlers. */ - WARN_ON_ONCE(ret < 0); + WARN_ONCE(ret < 0, + "pm_runtime_get_if_in_use() failed: %d\n", ret); if (ret <= 0) return false; } @@ -2904,8 +2958,11 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv) * platforms without RPM support. */ if (!HAS_RUNTIME_PM(dev_priv)) { + int ret; + pm_runtime_dont_use_autosuspend(kdev); - pm_runtime_get_sync(kdev); + ret = pm_runtime_get_sync(kdev); + WARN(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); } else { pm_runtime_use_autosuspend(kdev); } diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 2ad13903a054..816a6f5a3fd9 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2981,6 +2981,7 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, /* encoder type will be decided later */ intel_encoder = &intel_sdvo->base; intel_encoder->type = INTEL_OUTPUT_SDVO; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; drm_encoder_init(&dev_priv->drm, &intel_encoder->base, &intel_sdvo_enc_funcs, 0, diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 1a840bf92eea..7d971cb56116 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -60,8 +60,7 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, } I915_WRITE(VLV_IOSF_ADDR, addr); - if (!is_read) - I915_WRITE(VLV_IOSF_DATA, *val); + I915_WRITE(VLV_IOSF_DATA, is_read ? 0 : *val); I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd); if (intel_wait_for_register(dev_priv, @@ -74,7 +73,6 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, if (is_read) *val = I915_READ(VLV_IOSF_DATA); - I915_WRITE(VLV_IOSF_DATA, 0); return 0; } @@ -93,14 +91,18 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr) return val; } -void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) +int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) { + int err; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); mutex_lock(&dev_priv->sb_lock); - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, - SB_CRWRDA_NP, addr, &val); + err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, + SB_CRWRDA_NP, addr, &val); mutex_unlock(&dev_priv->sb_lock); + + return err; } u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg) @@ -214,6 +216,7 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, } I915_WRITE(SBI_ADDR, (reg << 16)); + I915_WRITE(SBI_DATA, 0); if (destination == SBI_ICLK) value = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRRD; @@ -223,10 +226,15 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, if (intel_wait_for_register(dev_priv, SBI_CTL_STAT, - SBI_BUSY | SBI_RESPONSE_FAIL, + SBI_BUSY, 0, 100)) { - DRM_ERROR("timeout waiting for SBI to complete read transaction\n"); + DRM_ERROR("timeout waiting for SBI to complete read\n"); + return 0; + } + + if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) { + DRM_ERROR("error during SBI read of reg %x\n", reg); return 0; } @@ -258,10 +266,16 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, if (intel_wait_for_register(dev_priv, SBI_CTL_STAT, - SBI_BUSY | SBI_RESPONSE_FAIL, + SBI_BUSY, 0, 100)) { - DRM_ERROR("timeout waiting for SBI to complete write transaction\n"); + DRM_ERROR("timeout waiting for SBI to complete write\n"); + return; + } + + if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) { + DRM_ERROR("error during SBI write of %x to reg %x\n", + value, reg); return; } } diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 9481ca9a3ae7..f7d431427115 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -65,6 +65,8 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, 1000 * adjusted_mode->crtc_htotal); } +#define VBLANK_EVASION_TIME_US 100 + /** * intel_pipe_update_start() - start update of a set of display registers * @crtc: the crtc of which the registers are going to be updated @@ -92,7 +94,8 @@ void intel_pipe_update_start(struct intel_crtc *crtc) vblank_start = DIV_ROUND_UP(vblank_start, 2); /* FIXME needs to be calibrated sensibly */ - min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100); + min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, + VBLANK_EVASION_TIME_US); max = vblank_start - 1; local_irq_disable(); @@ -158,6 +161,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work int scanline_end = intel_get_crtc_scanline(crtc); u32 end_vbl_count = intel_crtc_get_vblank_counter(crtc); ktime_t end_vbl_time = ktime_get(); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); if (work) { work->flip_queued_vblank = end_vbl_count; @@ -183,6 +187,9 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work local_irq_enable(); + if (intel_vgpu_active(dev_priv)) + return; + if (crtc->debug.start_vbl_count && crtc->debug.start_vbl_count != end_vbl_count) { DRM_ERROR("Atomic update failure on pipe %c (start=%u end=%u) time %lld us, min %d, max %d, scanline start %d, end %d\n", @@ -191,7 +198,12 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time), crtc->debug.min_vbl, crtc->debug.max_vbl, crtc->debug.scanline_start, scanline_end); - } + } else if (ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time) > + VBLANK_EVASION_TIME_US) + DRM_WARN("Atomic update on pipe (%c) took %lld us, max time under evasion is %u us\n", + pipe_name(pipe), + ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time), + VBLANK_EVASION_TIME_US); } static void @@ -205,7 +217,7 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_framebuffer *fb = plane_state->base.fb; enum plane_id plane_id = intel_plane->id; enum pipe pipe = intel_plane->pipe; - u32 plane_ctl; + u32 plane_ctl = plane_state->ctl; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 surf_addr = plane_state->main.offset; unsigned int rotation = plane_state->base.rotation; @@ -218,26 +230,7 @@ skl_update_plane(struct drm_plane *drm_plane, uint32_t y = plane_state->main.y; uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; - - plane_ctl = PLANE_CTL_ENABLE | - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE; - - plane_ctl |= skl_plane_ctl_format(fb->format->format); - plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - - plane_ctl |= skl_plane_ctl_rotation(rotation); - - if (key->flags) { - I915_WRITE(PLANE_KEYVAL(pipe, plane_id), key->min_value); - I915_WRITE(PLANE_KEYMAX(pipe, plane_id), key->max_value); - I915_WRITE(PLANE_KEYMSK(pipe, plane_id), key->channel_mask); - } - - if (key->flags & I915_SET_COLORKEY_DESTINATION) - plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION; - else if (key->flags & I915_SET_COLORKEY_SOURCE) - plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE; + unsigned long irqflags; /* Sizes are 0 based */ src_w--; @@ -245,9 +238,24 @@ skl_update_plane(struct drm_plane *drm_plane, crtc_w--; crtc_h--; - I915_WRITE(PLANE_OFFSET(pipe, plane_id), (y << 16) | x); - I915_WRITE(PLANE_STRIDE(pipe, plane_id), stride); - I915_WRITE(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), + PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PIPE_CSC_ENABLE | + PLANE_COLOR_PLANE_GAMMA_DISABLE); + } + + if (key->flags) { + I915_WRITE_FW(PLANE_KEYVAL(pipe, plane_id), key->min_value); + I915_WRITE_FW(PLANE_KEYMAX(pipe, plane_id), key->max_value); + I915_WRITE_FW(PLANE_KEYMSK(pipe, plane_id), key->channel_mask); + } + + I915_WRITE_FW(PLANE_OFFSET(pipe, plane_id), (y << 16) | x); + I915_WRITE_FW(PLANE_STRIDE(pipe, plane_id), stride); + I915_WRITE_FW(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w); /* program plane scaler */ if (plane_state->scaler_id >= 0) { @@ -256,22 +264,24 @@ skl_update_plane(struct drm_plane *drm_plane, scaler = &crtc_state->scaler_state.scalers[scaler_id]; - I915_WRITE(SKL_PS_CTRL(pipe, scaler_id), - PS_SCALER_EN | PS_PLANE_SEL(plane_id) | scaler->mode); - I915_WRITE(SKL_PS_PWR_GATE(pipe, scaler_id), 0); - I915_WRITE(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y); - I915_WRITE(SKL_PS_WIN_SZ(pipe, scaler_id), - ((crtc_w + 1) << 16)|(crtc_h + 1)); + I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), + PS_SCALER_EN | PS_PLANE_SEL(plane_id) | scaler->mode); + I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0); + I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y); + I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), + ((crtc_w + 1) << 16)|(crtc_h + 1)); - I915_WRITE(PLANE_POS(pipe, plane_id), 0); + I915_WRITE_FW(PLANE_POS(pipe, plane_id), 0); } else { - I915_WRITE(PLANE_POS(pipe, plane_id), (crtc_y << 16) | crtc_x); + I915_WRITE_FW(PLANE_POS(pipe, plane_id), (crtc_y << 16) | crtc_x); } - I915_WRITE(PLANE_CTL(pipe, plane_id), plane_ctl); - I915_WRITE(PLANE_SURF(pipe, plane_id), - intel_plane_ggtt_offset(plane_state) + surf_addr); - POSTING_READ(PLANE_SURF(pipe, plane_id)); + I915_WRITE_FW(PLANE_CTL(pipe, plane_id), plane_ctl); + I915_WRITE_FW(PLANE_SURF(pipe, plane_id), + intel_plane_ggtt_offset(plane_state) + surf_addr); + POSTING_READ_FW(PLANE_SURF(pipe, plane_id)); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static void @@ -282,11 +292,16 @@ skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc) struct intel_plane *intel_plane = to_intel_plane(dplane); enum plane_id plane_id = intel_plane->id; enum pipe pipe = intel_plane->pipe; + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + + I915_WRITE_FW(PLANE_CTL(pipe, plane_id), 0); - I915_WRITE(PLANE_CTL(pipe, plane_id), 0); + I915_WRITE_FW(PLANE_SURF(pipe, plane_id), 0); + POSTING_READ_FW(PLANE_SURF(pipe, plane_id)); - I915_WRITE(PLANE_SURF(pipe, plane_id), 0); - POSTING_READ(PLANE_SURF(pipe, plane_id)); + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static void @@ -309,50 +324,34 @@ chv_update_csc(struct intel_plane *intel_plane, uint32_t format) * Cb and Cr apparently come in as signed already, so no * need for any offset. For Y we need to remove the offset. */ - I915_WRITE(SPCSCYGOFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(-64)); - I915_WRITE(SPCSCCBOFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); - I915_WRITE(SPCSCCROFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); - - I915_WRITE(SPCSCC01(plane_id), SPCSC_C1(4769) | SPCSC_C0(6537)); - I915_WRITE(SPCSCC23(plane_id), SPCSC_C1(-3330) | SPCSC_C0(0)); - I915_WRITE(SPCSCC45(plane_id), SPCSC_C1(-1605) | SPCSC_C0(4769)); - I915_WRITE(SPCSCC67(plane_id), SPCSC_C1(4769) | SPCSC_C0(0)); - I915_WRITE(SPCSCC8(plane_id), SPCSC_C0(8263)); - - I915_WRITE(SPCSCYGICLAMP(plane_id), SPCSC_IMAX(940) | SPCSC_IMIN(64)); - I915_WRITE(SPCSCCBICLAMP(plane_id), SPCSC_IMAX(448) | SPCSC_IMIN(-448)); - I915_WRITE(SPCSCCRICLAMP(plane_id), SPCSC_IMAX(448) | SPCSC_IMIN(-448)); - - I915_WRITE(SPCSCYGOCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); - I915_WRITE(SPCSCCBOCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); - I915_WRITE(SPCSCCROCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); + I915_WRITE_FW(SPCSCYGOFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(-64)); + I915_WRITE_FW(SPCSCCBOFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); + I915_WRITE_FW(SPCSCCROFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); + + I915_WRITE_FW(SPCSCC01(plane_id), SPCSC_C1(4769) | SPCSC_C0(6537)); + I915_WRITE_FW(SPCSCC23(plane_id), SPCSC_C1(-3330) | SPCSC_C0(0)); + I915_WRITE_FW(SPCSCC45(plane_id), SPCSC_C1(-1605) | SPCSC_C0(4769)); + I915_WRITE_FW(SPCSCC67(plane_id), SPCSC_C1(4769) | SPCSC_C0(0)); + I915_WRITE_FW(SPCSCC8(plane_id), SPCSC_C0(8263)); + + I915_WRITE_FW(SPCSCYGICLAMP(plane_id), SPCSC_IMAX(940) | SPCSC_IMIN(64)); + I915_WRITE_FW(SPCSCCBICLAMP(plane_id), SPCSC_IMAX(448) | SPCSC_IMIN(-448)); + I915_WRITE_FW(SPCSCCRICLAMP(plane_id), SPCSC_IMAX(448) | SPCSC_IMIN(-448)); + + I915_WRITE_FW(SPCSCYGOCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); + I915_WRITE_FW(SPCSCCBOCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); + I915_WRITE_FW(SPCSCCROCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); } -static void -vlv_update_plane(struct drm_plane *dplane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_device *dev = dplane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(dplane); - struct drm_framebuffer *fb = plane_state->base.fb; - enum pipe pipe = intel_plane->pipe; - enum plane_id plane_id = intel_plane->id; - u32 sprctl; - u32 sprsurf_offset, linear_offset; + const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->base.dst.x1; - int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->base.src.x1 >> 16; - uint32_t y = plane_state->base.src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 sprctl; - sprctl = SP_ENABLE; + sprctl = SP_ENABLE | SP_GAMMA_ENABLE; switch (fb->format->format) { case DRM_FORMAT_YUYV: @@ -389,20 +388,10 @@ vlv_update_plane(struct drm_plane *dplane, sprctl |= SP_FORMAT_RGBA8888; break; default: - /* - * If we get here one of the upper layers failed to filter - * out the unsupported plane formats - */ - BUG(); - break; + MISSING_CASE(fb->format->format); + return 0; } - /* - * Enable gamma to match primary/cursor plane behaviour. - * FIXME should be user controllable via propertiesa. - */ - sprctl |= SP_GAMMA_ENABLE; - if (fb->modifier == I915_FORMAT_MOD_X_TILED) sprctl |= SP_TILED; @@ -412,51 +401,68 @@ vlv_update_plane(struct drm_plane *dplane, if (rotation & DRM_REFLECT_X) sprctl |= SP_MIRROR; + if (key->flags & I915_SET_COLORKEY_SOURCE) + sprctl |= SP_SOURCE_KEY; + + return sprctl; +} + +static void +vlv_update_plane(struct drm_plane *dplane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_device *dev = dplane->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_plane *intel_plane = to_intel_plane(dplane); + struct drm_framebuffer *fb = plane_state->base.fb; + enum pipe pipe = intel_plane->pipe; + enum plane_id plane_id = intel_plane->id; + u32 sprctl = plane_state->ctl; + u32 sprsurf_offset = plane_state->main.offset; + u32 linear_offset; + const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->main.x; + uint32_t y = plane_state->main.y; + unsigned long irqflags; + /* Sizes are 0 based */ - src_w--; - src_h--; crtc_w--; crtc_h--; - intel_add_fb_offsets(&x, &y, plane_state, 0); - sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - - if (rotation & DRM_ROTATE_180) { - x += src_w; - y += src_h; - } else if (rotation & DRM_REFLECT_X) { - x += src_w; - } - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - if (key->flags) { - I915_WRITE(SPKEYMINVAL(pipe, plane_id), key->min_value); - I915_WRITE(SPKEYMAXVAL(pipe, plane_id), key->max_value); - I915_WRITE(SPKEYMSK(pipe, plane_id), key->channel_mask); - } - - if (key->flags & I915_SET_COLORKEY_SOURCE) - sprctl |= SP_SOURCE_KEY; + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) chv_update_csc(intel_plane, fb->format->format); - I915_WRITE(SPSTRIDE(pipe, plane_id), fb->pitches[0]); - I915_WRITE(SPPOS(pipe, plane_id), (crtc_y << 16) | crtc_x); + if (key->flags) { + I915_WRITE_FW(SPKEYMINVAL(pipe, plane_id), key->min_value); + I915_WRITE_FW(SPKEYMAXVAL(pipe, plane_id), key->max_value); + I915_WRITE_FW(SPKEYMSK(pipe, plane_id), key->channel_mask); + } + I915_WRITE_FW(SPSTRIDE(pipe, plane_id), fb->pitches[0]); + I915_WRITE_FW(SPPOS(pipe, plane_id), (crtc_y << 16) | crtc_x); if (fb->modifier == I915_FORMAT_MOD_X_TILED) - I915_WRITE(SPTILEOFF(pipe, plane_id), (y << 16) | x); + I915_WRITE_FW(SPTILEOFF(pipe, plane_id), (y << 16) | x); else - I915_WRITE(SPLINOFF(pipe, plane_id), linear_offset); + I915_WRITE_FW(SPLINOFF(pipe, plane_id), linear_offset); - I915_WRITE(SPCONSTALPHA(pipe, plane_id), 0); + I915_WRITE_FW(SPCONSTALPHA(pipe, plane_id), 0); - I915_WRITE(SPSIZE(pipe, plane_id), (crtc_h << 16) | crtc_w); - I915_WRITE(SPCNTR(pipe, plane_id), sprctl); - I915_WRITE(SPSURF(pipe, plane_id), - intel_plane_ggtt_offset(plane_state) + sprsurf_offset); - POSTING_READ(SPSURF(pipe, plane_id)); + I915_WRITE_FW(SPSIZE(pipe, plane_id), (crtc_h << 16) | crtc_w); + I915_WRITE_FW(SPCNTR(pipe, plane_id), sprctl); + I915_WRITE_FW(SPSURF(pipe, plane_id), + intel_plane_ggtt_offset(plane_state) + sprsurf_offset); + POSTING_READ_FW(SPSURF(pipe, plane_id)); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static void @@ -467,37 +473,35 @@ vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc) struct intel_plane *intel_plane = to_intel_plane(dplane); enum pipe pipe = intel_plane->pipe; enum plane_id plane_id = intel_plane->id; + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + + I915_WRITE_FW(SPCNTR(pipe, plane_id), 0); - I915_WRITE(SPCNTR(pipe, plane_id), 0); + I915_WRITE_FW(SPSURF(pipe, plane_id), 0); + POSTING_READ_FW(SPSURF(pipe, plane_id)); - I915_WRITE(SPSURF(pipe, plane_id), 0); - POSTING_READ(SPSURF(pipe, plane_id)); + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void -ivb_update_plane(struct drm_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(plane); - struct drm_framebuffer *fb = plane_state->base.fb; - enum pipe pipe = intel_plane->pipe; - u32 sprctl, sprscale = 0; - u32 sprsurf_offset, linear_offset; + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->base.dst.x1; - int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->base.src.x1 >> 16; - uint32_t y = plane_state->base.src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 sprctl; - sprctl = SPRITE_ENABLE; + sprctl = SPRITE_ENABLE | SPRITE_GAMMA_ENABLE; + + if (IS_IVYBRIDGE(dev_priv)) + sprctl |= SPRITE_TRICKLE_FEED_DISABLE; + + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + sprctl |= SPRITE_PIPE_CSC_ENABLE; switch (fb->format->format) { case DRM_FORMAT_XBGR8888: @@ -519,28 +523,47 @@ ivb_update_plane(struct drm_plane *plane, sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_VYUY; break; default: - BUG(); + MISSING_CASE(fb->format->format); + return 0; } - /* - * Enable gamma to match primary/cursor plane behaviour. - * FIXME should be user controllable via propertiesa. - */ - sprctl |= SPRITE_GAMMA_ENABLE; - if (fb->modifier == I915_FORMAT_MOD_X_TILED) sprctl |= SPRITE_TILED; if (rotation & DRM_ROTATE_180) sprctl |= SPRITE_ROTATE_180; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - sprctl &= ~SPRITE_TRICKLE_FEED_DISABLE; - else - sprctl |= SPRITE_TRICKLE_FEED_DISABLE; + if (key->flags & I915_SET_COLORKEY_DESTINATION) + sprctl |= SPRITE_DEST_KEY; + else if (key->flags & I915_SET_COLORKEY_SOURCE) + sprctl |= SPRITE_SOURCE_KEY; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - sprctl |= SPRITE_PIPE_CSC_ENABLE; + return sprctl; +} + +static void +ivb_update_plane(struct drm_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_plane *intel_plane = to_intel_plane(plane); + struct drm_framebuffer *fb = plane_state->base.fb; + enum pipe pipe = intel_plane->pipe; + u32 sprctl = plane_state->ctl, sprscale = 0; + u32 sprsurf_offset = plane_state->main.offset; + u32 linear_offset; + const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->main.x; + uint32_t y = plane_state->main.y; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + unsigned long irqflags; /* Sizes are 0 based */ src_w--; @@ -551,48 +574,37 @@ ivb_update_plane(struct drm_plane *plane, if (crtc_w != src_w || crtc_h != src_h) sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h; - intel_add_fb_offsets(&x, &y, plane_state, 0); - sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - - /* HSW+ does this automagically in hardware */ - if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv) && - rotation & DRM_ROTATE_180) { - x += src_w; - y += src_h; - } - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + if (key->flags) { - I915_WRITE(SPRKEYVAL(pipe), key->min_value); - I915_WRITE(SPRKEYMAX(pipe), key->max_value); - I915_WRITE(SPRKEYMSK(pipe), key->channel_mask); + I915_WRITE_FW(SPRKEYVAL(pipe), key->min_value); + I915_WRITE_FW(SPRKEYMAX(pipe), key->max_value); + I915_WRITE_FW(SPRKEYMSK(pipe), key->channel_mask); } - if (key->flags & I915_SET_COLORKEY_DESTINATION) - sprctl |= SPRITE_DEST_KEY; - else if (key->flags & I915_SET_COLORKEY_SOURCE) - sprctl |= SPRITE_SOURCE_KEY; - - I915_WRITE(SPRSTRIDE(pipe), fb->pitches[0]); - I915_WRITE(SPRPOS(pipe), (crtc_y << 16) | crtc_x); + I915_WRITE_FW(SPRSTRIDE(pipe), fb->pitches[0]); + I915_WRITE_FW(SPRPOS(pipe), (crtc_y << 16) | crtc_x); /* HSW consolidates SPRTILEOFF and SPRLINOFF into a single SPROFFSET * register */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - I915_WRITE(SPROFFSET(pipe), (y << 16) | x); + I915_WRITE_FW(SPROFFSET(pipe), (y << 16) | x); else if (fb->modifier == I915_FORMAT_MOD_X_TILED) - I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x); + I915_WRITE_FW(SPRTILEOFF(pipe), (y << 16) | x); else - I915_WRITE(SPRLINOFF(pipe), linear_offset); + I915_WRITE_FW(SPRLINOFF(pipe), linear_offset); - I915_WRITE(SPRSIZE(pipe), (crtc_h << 16) | crtc_w); + I915_WRITE_FW(SPRSIZE(pipe), (crtc_h << 16) | crtc_w); if (intel_plane->can_scale) - I915_WRITE(SPRSCALE(pipe), sprscale); - I915_WRITE(SPRCTL(pipe), sprctl); - I915_WRITE(SPRSURF(pipe), - intel_plane_ggtt_offset(plane_state) + sprsurf_offset); - POSTING_READ(SPRSURF(pipe)); + I915_WRITE_FW(SPRSCALE(pipe), sprscale); + I915_WRITE_FW(SPRCTL(pipe), sprctl); + I915_WRITE_FW(SPRSURF(pipe), + intel_plane_ggtt_offset(plane_state) + sprsurf_offset); + POSTING_READ_FW(SPRSURF(pipe)); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static void @@ -602,40 +614,35 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = to_intel_plane(plane); int pipe = intel_plane->pipe; + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - I915_WRITE(SPRCTL(pipe), 0); + I915_WRITE_FW(SPRCTL(pipe), 0); /* Can't leave the scaler enabled... */ if (intel_plane->can_scale) - I915_WRITE(SPRSCALE(pipe), 0); + I915_WRITE_FW(SPRSCALE(pipe), 0); - I915_WRITE(SPRSURF(pipe), 0); - POSTING_READ(SPRSURF(pipe)); + I915_WRITE_FW(SPRSURF(pipe), 0); + POSTING_READ_FW(SPRSURF(pipe)); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void -ilk_update_plane(struct drm_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(plane); - struct drm_framebuffer *fb = plane_state->base.fb; - int pipe = intel_plane->pipe; - u32 dvscntr, dvsscale; - u32 dvssurf_offset, linear_offset; + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->base.dst.x1; - int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->base.src.x1 >> 16; - uint32_t y = plane_state->base.src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 dvscntr; - dvscntr = DVS_ENABLE; + dvscntr = DVS_ENABLE | DVS_GAMMA_ENABLE; + + if (IS_GEN6(dev_priv)) + dvscntr |= DVS_TRICKLE_FEED_DISABLE; switch (fb->format->format) { case DRM_FORMAT_XBGR8888: @@ -657,23 +664,47 @@ ilk_update_plane(struct drm_plane *plane, dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_VYUY; break; default: - BUG(); + MISSING_CASE(fb->format->format); + return 0; } - /* - * Enable gamma to match primary/cursor plane behaviour. - * FIXME should be user controllable via propertiesa. - */ - dvscntr |= DVS_GAMMA_ENABLE; - if (fb->modifier == I915_FORMAT_MOD_X_TILED) dvscntr |= DVS_TILED; if (rotation & DRM_ROTATE_180) dvscntr |= DVS_ROTATE_180; - if (IS_GEN6(dev_priv)) - dvscntr |= DVS_TRICKLE_FEED_DISABLE; /* must disable */ + if (key->flags & I915_SET_COLORKEY_DESTINATION) + dvscntr |= DVS_DEST_KEY; + else if (key->flags & I915_SET_COLORKEY_SOURCE) + dvscntr |= DVS_SOURCE_KEY; + + return dvscntr; +} + +static void +ilk_update_plane(struct drm_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_plane *intel_plane = to_intel_plane(plane); + struct drm_framebuffer *fb = plane_state->base.fb; + int pipe = intel_plane->pipe; + u32 dvscntr = plane_state->ctl, dvsscale = 0; + u32 dvssurf_offset = plane_state->main.offset; + u32 linear_offset; + const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->main.x; + uint32_t y = plane_state->main.y; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + unsigned long irqflags; /* Sizes are 0 based */ src_w--; @@ -681,45 +712,35 @@ ilk_update_plane(struct drm_plane *plane, crtc_w--; crtc_h--; - dvsscale = 0; if (crtc_w != src_w || crtc_h != src_h) dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h; - intel_add_fb_offsets(&x, &y, plane_state, 0); - dvssurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - - if (rotation & DRM_ROTATE_180) { - x += src_w; - y += src_h; - } - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + if (key->flags) { - I915_WRITE(DVSKEYVAL(pipe), key->min_value); - I915_WRITE(DVSKEYMAX(pipe), key->max_value); - I915_WRITE(DVSKEYMSK(pipe), key->channel_mask); + I915_WRITE_FW(DVSKEYVAL(pipe), key->min_value); + I915_WRITE_FW(DVSKEYMAX(pipe), key->max_value); + I915_WRITE_FW(DVSKEYMSK(pipe), key->channel_mask); } - if (key->flags & I915_SET_COLORKEY_DESTINATION) - dvscntr |= DVS_DEST_KEY; - else if (key->flags & I915_SET_COLORKEY_SOURCE) - dvscntr |= DVS_SOURCE_KEY; - - I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]); - I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x); + I915_WRITE_FW(DVSSTRIDE(pipe), fb->pitches[0]); + I915_WRITE_FW(DVSPOS(pipe), (crtc_y << 16) | crtc_x); if (fb->modifier == I915_FORMAT_MOD_X_TILED) - I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x); + I915_WRITE_FW(DVSTILEOFF(pipe), (y << 16) | x); else - I915_WRITE(DVSLINOFF(pipe), linear_offset); - - I915_WRITE(DVSSIZE(pipe), (crtc_h << 16) | crtc_w); - I915_WRITE(DVSSCALE(pipe), dvsscale); - I915_WRITE(DVSCNTR(pipe), dvscntr); - I915_WRITE(DVSSURF(pipe), - intel_plane_ggtt_offset(plane_state) + dvssurf_offset); - POSTING_READ(DVSSURF(pipe)); + I915_WRITE_FW(DVSLINOFF(pipe), linear_offset); + + I915_WRITE_FW(DVSSIZE(pipe), (crtc_h << 16) | crtc_w); + I915_WRITE_FW(DVSSCALE(pipe), dvsscale); + I915_WRITE_FW(DVSCNTR(pipe), dvscntr); + I915_WRITE_FW(DVSSURF(pipe), + intel_plane_ggtt_offset(plane_state) + dvssurf_offset); + POSTING_READ_FW(DVSSURF(pipe)); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static void @@ -729,13 +750,18 @@ ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = to_intel_plane(plane); int pipe = intel_plane->pipe; + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - I915_WRITE(DVSCNTR(pipe), 0); + I915_WRITE_FW(DVSCNTR(pipe), 0); /* Disable the scaler */ - I915_WRITE(DVSSCALE(pipe), 0); + I915_WRITE_FW(DVSSCALE(pipe), 0); - I915_WRITE(DVSSURF(pipe), 0); - POSTING_READ(DVSSURF(pipe)); + I915_WRITE_FW(DVSSURF(pipe), 0); + POSTING_READ_FW(DVSSURF(pipe)); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static int @@ -919,6 +945,26 @@ intel_check_sprite_plane(struct drm_plane *plane, ret = skl_check_plane_surface(state); if (ret) return ret; + + state->ctl = skl_plane_ctl(crtc_state, state); + } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + ret = i9xx_check_plane_surface(state); + if (ret) + return ret; + + state->ctl = vlv_sprite_ctl(crtc_state, state); + } else if (INTEL_GEN(dev_priv) >= 7) { + ret = i9xx_check_plane_surface(state); + if (ret) + return ret; + + state->ctl = ivb_sprite_ctl(crtc_state, state); + } else { + ret = i9xx_check_plane_surface(state); + if (ret) + return ret; + + state->ctl = ilk_sprite_ctl(crtc_state, state); } return 0; diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index eb692e4ffe01..e077c2a9e694 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1315,8 +1315,10 @@ static void intel_tv_find_better_format(struct drm_connector *connector) * Currently this always returns CONNECTOR_STATUS_UNKNOWN, as we need to be sure * we have a pipe programmed in order to probe the TV. */ -static enum drm_connector_status -intel_tv_detect(struct drm_connector *connector, bool force) +static int +intel_tv_detect(struct drm_connector *connector, + struct drm_modeset_acquire_ctx *ctx, + bool force) { struct drm_display_mode mode; struct intel_tv *intel_tv = intel_attached_tv(connector); @@ -1331,21 +1333,20 @@ intel_tv_detect(struct drm_connector *connector, bool force) if (force) { struct intel_load_detect_pipe tmp; - struct drm_modeset_acquire_ctx ctx; + int ret; - drm_modeset_acquire_init(&ctx, 0); + ret = intel_get_load_detect_pipe(connector, &mode, &tmp, ctx); + if (ret < 0) + return ret; - if (intel_get_load_detect_pipe(connector, &mode, &tmp, &ctx)) { + if (ret > 0) { type = intel_tv_detect_type(intel_tv, connector); - intel_release_load_detect_pipe(connector, &tmp, &ctx); + intel_release_load_detect_pipe(connector, &tmp, ctx); status = type < 0 ? connector_status_disconnected : connector_status_connected; } else status = connector_status_unknown; - - drm_modeset_drop_locks(&ctx); - drm_modeset_acquire_fini(&ctx); } else return connector->status; @@ -1516,7 +1517,6 @@ out: static const struct drm_connector_funcs intel_tv_connector_funcs = { .dpms = drm_atomic_helper_connector_dpms, - .detect = intel_tv_detect, .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_tv_destroy, @@ -1528,6 +1528,7 @@ static const struct drm_connector_funcs intel_tv_connector_funcs = { }; static const struct drm_connector_helper_funcs intel_tv_connector_helper_funcs = { + .detect_ctx = intel_tv_detect, .mode_valid = intel_tv_mode_valid, .get_modes = intel_tv_get_modes, }; @@ -1621,6 +1622,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); intel_encoder->type = INTEL_OUTPUT_TVOUT; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->crtc_mask = (1 << 0) | (1 << 1); intel_encoder->cloneable = 0; diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index c46bc8594f22..c117424f1f50 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -24,17 +24,346 @@ #include "i915_drv.h" #include "intel_uc.h" +#include <linux/firmware.h> + +/* Cleans up uC firmware by releasing the firmware GEM obj. + */ +static void __intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj; + + obj = fetch_and_zero(&uc_fw->obj); + if (obj) + i915_gem_object_put(obj); + + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; +} + +/* Reset GuC providing us with fresh state for both GuC and HuC. + */ +static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv) +{ + int ret; + u32 guc_status; + + ret = intel_guc_reset(dev_priv); + if (ret) { + DRM_ERROR("GuC reset failed, ret = %d\n", ret); + return ret; + } + + guc_status = I915_READ(GUC_STATUS); + WARN(!(guc_status & GS_MIA_IN_RESET), + "GuC status: 0x%x, MIA core expected to be in reset\n", + guc_status); + + return ret; +} + +void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) +{ + if (!HAS_GUC(dev_priv)) { + if (i915.enable_guc_loading > 0 || + i915.enable_guc_submission > 0) + DRM_INFO("Ignoring GuC options, no hardware\n"); + + i915.enable_guc_loading = 0; + i915.enable_guc_submission = 0; + return; + } + + /* A negative value means "use platform default" */ + if (i915.enable_guc_loading < 0) + i915.enable_guc_loading = HAS_GUC_UCODE(dev_priv); + + /* Verify firmware version */ + if (i915.enable_guc_loading) { + if (HAS_HUC_UCODE(dev_priv)) + intel_huc_select_fw(&dev_priv->huc); + + if (intel_guc_select_fw(&dev_priv->guc)) + i915.enable_guc_loading = 0; + } + + /* Can't enable guc submission without guc loaded */ + if (!i915.enable_guc_loading) + i915.enable_guc_submission = 0; + + /* A negative value means "use platform default" */ + if (i915.enable_guc_submission < 0) + i915.enable_guc_submission = HAS_GUC_SCHED(dev_priv); +} void intel_uc_init_early(struct drm_i915_private *dev_priv) { - mutex_init(&dev_priv->guc.send_mutex); + struct intel_guc *guc = &dev_priv->guc; + + mutex_init(&guc->send_mutex); + guc->send = intel_guc_send_mmio; +} + +static void fetch_uc_fw(struct drm_i915_private *dev_priv, + struct intel_uc_fw *uc_fw) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + struct drm_i915_gem_object *obj; + const struct firmware *fw = NULL; + struct uc_css_header *css; + size_t size; + int err; + + if (!uc_fw->path) + return; + + uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; + + DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", + intel_uc_fw_status_repr(uc_fw->fetch_status)); + + err = request_firmware(&fw, uc_fw->path, &pdev->dev); + if (err) + goto fail; + if (!fw) + goto fail; + + DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", + uc_fw->path, fw); + + /* Check the size of the blob before examining buffer contents */ + if (fw->size < sizeof(struct uc_css_header)) { + DRM_NOTE("Firmware header is missing\n"); + goto fail; + } + + css = (struct uc_css_header *)fw->data; + + /* Firmware bits always start from header */ + uc_fw->header_offset = 0; + uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - + css->key_size_dw - css->exponent_size_dw) * sizeof(u32); + + if (uc_fw->header_size != sizeof(struct uc_css_header)) { + DRM_NOTE("CSS header definition mismatch\n"); + goto fail; + } + + /* then, uCode */ + uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + + /* now RSA */ + if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { + DRM_NOTE("RSA key size is bad\n"); + goto fail; + } + uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + + /* At least, it should have header, uCode and RSA. Size of all three. */ + size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; + if (fw->size < size) { + DRM_NOTE("Missing firmware components\n"); + goto fail; + } + + /* + * The GuC firmware image has the version number embedded at a + * well-known offset within the firmware blob; note that major / minor + * version are TWO bytes each (i.e. u16), although all pointers and + * offsets are defined in terms of bytes (u8). + */ + switch (uc_fw->type) { + case INTEL_UC_FW_TYPE_GUC: + /* Header and uCode will be loaded to WOPCM. Size of the two. */ + size = uc_fw->header_size + uc_fw->ucode_size; + + /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ + if (size > intel_guc_wopcm_size(dev_priv)) { + DRM_ERROR("Firmware is too large to fit in WOPCM\n"); + goto fail; + } + uc_fw->major_ver_found = css->guc.sw_version >> 16; + uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; + break; + + case INTEL_UC_FW_TYPE_HUC: + uc_fw->major_ver_found = css->huc.sw_version >> 16; + uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; + break; + + default: + DRM_ERROR("Unknown firmware type %d\n", uc_fw->type); + err = -ENOEXEC; + goto fail; + } + + if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { + DRM_NOTE("Skipping %s firmware version check\n", + intel_uc_fw_type_repr(uc_fw->type)); + } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { + DRM_NOTE("%s firmware version %d.%d, required %d.%d\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + err = -ENOEXEC; + goto fail; + } + + DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + + obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto fail; + } + + uc_fw->obj = obj; + uc_fw->size = fw->size; + + DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", + uc_fw->obj); + + release_firmware(fw); + uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; + return; + +fail: + DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", + uc_fw->path, err); + DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", + err, fw, uc_fw->obj); + + release_firmware(fw); /* OK even if fw is NULL */ + uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; +} + +void intel_uc_init_fw(struct drm_i915_private *dev_priv) +{ + fetch_uc_fw(dev_priv, &dev_priv->huc.fw); + fetch_uc_fw(dev_priv, &dev_priv->guc.fw); +} + +void intel_uc_fini_fw(struct drm_i915_private *dev_priv) +{ + __intel_uc_fw_fini(&dev_priv->guc.fw); + __intel_uc_fw_fini(&dev_priv->huc.fw); +} + +int intel_uc_init_hw(struct drm_i915_private *dev_priv) +{ + int ret, attempts; + + if (!i915.enable_guc_loading) + return 0; + + gen9_reset_guc_interrupts(dev_priv); + + /* We need to notify the guc whenever we change the GGTT */ + i915_ggtt_enable_guc(dev_priv); + + if (i915.enable_guc_submission) { + /* + * This is stuff we need to have available at fw load time + * if we are planning to enable submission later + */ + ret = i915_guc_submission_init(dev_priv); + if (ret) + goto err_guc; + } + + /* WaEnableuKernelHeaderValidFix:skl */ + /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ + if (IS_GEN9(dev_priv)) + attempts = 3; + else + attempts = 1; + + while (attempts--) { + /* + * Always reset the GuC just before (re)loading, so + * that the state and timing are fairly predictable + */ + ret = __intel_uc_reset_hw(dev_priv); + if (ret) + goto err_submission; + + intel_huc_init_hw(&dev_priv->huc); + ret = intel_guc_init_hw(&dev_priv->guc); + if (ret == 0 || ret != -EAGAIN) + break; + + DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and " + "retry %d more time(s)\n", ret, attempts); + } + + /* Did we succeded or run out of retries? */ + if (ret) + goto err_submission; + + intel_guc_auth_huc(dev_priv); + if (i915.enable_guc_submission) { + if (i915.guc_log_level >= 0) + gen9_enable_guc_interrupts(dev_priv); + + ret = i915_guc_submission_enable(dev_priv); + if (ret) + goto err_interrupts; + } + + return 0; + + /* + * We've failed to load the firmware :( + * + * Decide whether to disable GuC submission and fall back to + * execlist mode, and whether to hide the error by returning + * zero or to return -EIO, which the caller will treat as a + * nonfatal error (i.e. it doesn't prevent driver load, but + * marks the GPU as wedged until reset). + */ +err_interrupts: + gen9_disable_guc_interrupts(dev_priv); +err_submission: + if (i915.enable_guc_submission) + i915_guc_submission_fini(dev_priv); +err_guc: + i915_ggtt_disable_guc(dev_priv); + + DRM_ERROR("GuC init failed\n"); + if (i915.enable_guc_loading > 1 || i915.enable_guc_submission > 1) + ret = -EIO; + else + ret = 0; + + if (i915.enable_guc_submission) { + i915.enable_guc_submission = 0; + DRM_NOTE("Falling back from GuC submission to execlist mode\n"); + } + + return ret; +} + +void intel_uc_fini_hw(struct drm_i915_private *dev_priv) +{ + if (!i915.enable_guc_loading) + return; + + if (i915.enable_guc_submission) { + i915_guc_submission_disable(dev_priv); + gen9_disable_guc_interrupts(dev_priv); + i915_guc_submission_fini(dev_priv); + } + i915_ggtt_disable_guc(dev_priv); } /* * Read GuC command/status register (SOFT_SCRATCH_0) * Return true if it contains a response rather than a command */ -static bool intel_guc_recv(struct intel_guc *guc, u32 *status) +static bool guc_recv(struct intel_guc *guc, u32 *status) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -43,7 +372,10 @@ static bool intel_guc_recv(struct intel_guc *guc, u32 *status) return INTEL_GUC_RECV_IS_RESPONSE(val); } -int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) +/* + * This function implements the MMIO based host to GuC interface. + */ +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) { struct drm_i915_private *dev_priv = guc_to_i915(guc); u32 status; @@ -54,7 +386,7 @@ int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) return -EINVAL; mutex_lock(&guc->send_mutex); - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER); dev_priv->guc.action_count += 1; dev_priv->guc.action_cmd = action[0]; @@ -71,9 +403,9 @@ int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) * up to that length of time, then switch to a slower sleep-wait loop. * No inte_guc_send command should ever take longer than 10ms. */ - ret = wait_for_us(intel_guc_recv(guc, &status), 10); + ret = wait_for_us(guc_recv(guc, &status), 10); if (ret) - ret = wait_for(intel_guc_recv(guc, &status), 10); + ret = wait_for(guc_recv(guc, &status), 10); if (status != INTEL_GUC_STATUS_SUCCESS) { /* * Either the GuC explicitly returned an error (which @@ -92,7 +424,7 @@ int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) } dev_priv->guc.action_status = status; - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER); mutex_unlock(&guc->send_mutex); return ret; @@ -113,4 +445,3 @@ int intel_guc_sample_forcewake(struct intel_guc *guc) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } - diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index d74f4d3ad8dc..4b7f73aeddac 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -34,7 +34,9 @@ struct drm_i915_gem_request; /* * This structure primarily describes the GEM object shared with the GuC. - * The GEM object is held for the entire lifetime of our interaction with + * The specs sometimes refer to this object as a "GuC context", but we use + * the term "client" to avoid confusion with hardware contexts. This + * GEM object is held for the entire lifetime of our interaction with * the GuC, being allocated before the GuC is loaded with its firmware. * Because there's no way to update the address used by the GuC after * initialisation, the shared object must stay pinned into the GGTT as @@ -44,7 +46,7 @@ struct drm_i915_gem_request; * * The single GEM object described here is actually made up of several * separate areas, as far as the GuC is concerned. The first page (kept - * kmap'd) includes the "process decriptor" which holds sequence data for + * kmap'd) includes the "process descriptor" which holds sequence data for * the doorbell, and one cacheline which actually *is* the doorbell; a * write to this will "ring the doorbell" (i.e. send an interrupt to the * GuC). The subsequent pages of the client object constitute the work @@ -72,13 +74,12 @@ struct i915_guc_client { uint32_t engines; /* bitmap of (host) engine ids */ uint32_t priority; - uint32_t ctx_index; + u32 stage_id; uint32_t proc_desc_offset; - uint32_t doorbell_offset; - uint32_t doorbell_cookie; - uint16_t doorbell_id; - uint16_t padding[3]; /* Maintain alignment */ + u16 doorbell_id; + unsigned long doorbell_offset; + u32 doorbell_cookie; spinlock_t wq_lock; uint32_t wq_offset; @@ -100,11 +101,40 @@ enum intel_uc_fw_status { INTEL_UC_FIRMWARE_SUCCESS }; +/* User-friendly representation of an enum */ +static inline +const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) +{ + switch (status) { + case INTEL_UC_FIRMWARE_FAIL: + return "FAIL"; + case INTEL_UC_FIRMWARE_NONE: + return "NONE"; + case INTEL_UC_FIRMWARE_PENDING: + return "PENDING"; + case INTEL_UC_FIRMWARE_SUCCESS: + return "SUCCESS"; + } + return "<invalid>"; +} + enum intel_uc_fw_type { INTEL_UC_FW_TYPE_GUC, INTEL_UC_FW_TYPE_HUC }; +/* User-friendly representation of an enum */ +static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) +{ + switch (type) { + case INTEL_UC_FW_TYPE_GUC: + return "GuC"; + case INTEL_UC_FW_TYPE_HUC: + return "HuC"; + } + return "uC"; +} + /* * This structure encapsulates all the data needed during the process * of fetching, caching, and loading the firmware image into the GuC. @@ -121,7 +151,7 @@ struct intel_uc_fw { uint16_t major_ver_found; uint16_t minor_ver_found; - enum intel_uc_fw_type fw; + enum intel_uc_fw_type type; uint32_t header_size; uint32_t header_offset; uint32_t rsa_size; @@ -133,11 +163,13 @@ struct intel_uc_fw { struct intel_guc_log { uint32_t flags; struct i915_vma *vma; - void *buf_addr; - struct workqueue_struct *flush_wq; - struct work_struct flush_work; - struct rchan *relay_chan; - + /* The runtime stuff gets created only when GuC logging gets enabled */ + struct { + void *buf_addr; + struct workqueue_struct *flush_wq; + struct work_struct flush_work; + struct rchan *relay_chan; + } runtime; /* logging related stats */ u32 capture_miss_count; u32 flush_interrupt_count; @@ -154,12 +186,13 @@ struct intel_guc { bool interrupts_enabled; struct i915_vma *ads_vma; - struct i915_vma *ctx_pool_vma; - struct ida ctx_ids; + struct i915_vma *stage_desc_pool; + void *stage_desc_pool_vaddr; + struct ida stage_ids; struct i915_guc_client *execbuf_client; - DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS); + DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); uint32_t db_cacheline; /* Cyclic counter mod pagesize */ /* Action status & statistics */ @@ -174,6 +207,9 @@ struct intel_guc { /* To serialize the intel_guc_send actions */ struct mutex send_mutex; + + /* GuC's FW specific send function */ + int (*send)(struct intel_guc *guc, const u32 *data, u32 len); }; struct intel_huc { @@ -184,19 +220,24 @@ struct intel_huc { }; /* intel_uc.c */ +void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); void intel_uc_init_early(struct drm_i915_private *dev_priv); -int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len); +void intel_uc_init_fw(struct drm_i915_private *dev_priv); +void intel_uc_fini_fw(struct drm_i915_private *dev_priv); +int intel_uc_init_hw(struct drm_i915_private *dev_priv); +void intel_uc_fini_hw(struct drm_i915_private *dev_priv); int intel_guc_sample_forcewake(struct intel_guc *guc); +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); +static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) +{ + return guc->send(guc, action, len); +} /* intel_guc_loader.c */ -extern void intel_guc_init(struct drm_i915_private *dev_priv); -extern int intel_guc_setup(struct drm_i915_private *dev_priv); -extern void intel_guc_fini(struct drm_i915_private *dev_priv); -extern const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status); -extern int intel_guc_suspend(struct drm_i915_private *dev_priv); -extern int intel_guc_resume(struct drm_i915_private *dev_priv); -void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, - struct intel_uc_fw *uc_fw); +int intel_guc_select_fw(struct intel_guc *guc); +int intel_guc_init_hw(struct intel_guc *guc); +int intel_guc_suspend(struct drm_i915_private *dev_priv); +int intel_guc_resume(struct drm_i915_private *dev_priv); u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); /* i915_guc_submission.c */ @@ -209,10 +250,11 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); /* intel_guc_log.c */ -void intel_guc_log_create(struct intel_guc *guc); +int intel_guc_log_create(struct intel_guc *guc); +void intel_guc_log_destroy(struct intel_guc *guc); +int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); void i915_guc_log_register(struct drm_i915_private *dev_priv); void i915_guc_log_unregister(struct drm_i915_private *dev_priv); -int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); static inline u32 guc_ggtt_offset(struct i915_vma *vma) { @@ -223,9 +265,8 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma) } /* intel_huc.c */ -void intel_huc_init(struct drm_i915_private *dev_priv); -void intel_huc_fini(struct drm_i915_private *dev_priv); -int intel_huc_load(struct drm_i915_private *dev_priv); +void intel_huc_select_fw(struct intel_huc *huc); +int intel_huc_init_hw(struct intel_huc *huc); void intel_guc_auth_huc(struct drm_i915_private *dev_priv); #endif diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index b7ff592b14f5..6d1ea26b2493 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -25,6 +25,7 @@ #include "intel_drv.h" #include "i915_vgpu.h" +#include <asm/iosf_mbi.h> #include <linux/pm_runtime.h> #define FORCEWAKE_ACK_TIMEOUT_MS 50 @@ -51,10 +52,10 @@ intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id) } static inline void -fw_domain_reset(const struct intel_uncore_forcewake_domain *d) +fw_domain_reset(struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { - WARN_ON(!i915_mmio_reg_valid(d->reg_set)); - __raw_i915_write32(d->i915, d->reg_set, d->val_reset); + __raw_i915_write32(i915, d->reg_set, i915->uncore.fw_reset); } static inline void @@ -68,9 +69,10 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) } static inline void -fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d) +fw_domain_wait_ack_clear(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { - if (wait_for_atomic((__raw_i915_read32(d->i915, d->reg_ack) & + if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & FORCEWAKE_KERNEL) == 0, FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n", @@ -78,15 +80,17 @@ fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d) } static inline void -fw_domain_get(const struct intel_uncore_forcewake_domain *d) +fw_domain_get(struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { - __raw_i915_write32(d->i915, d->reg_set, d->val_set); + __raw_i915_write32(i915, d->reg_set, i915->uncore.fw_set); } static inline void -fw_domain_wait_ack(const struct intel_uncore_forcewake_domain *d) +fw_domain_wait_ack(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { - if (wait_for_atomic((__raw_i915_read32(d->i915, d->reg_ack) & + if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & FORCEWAKE_KERNEL), FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("%s: timed out waiting for forcewake ack request.\n", @@ -94,72 +98,59 @@ fw_domain_wait_ack(const struct intel_uncore_forcewake_domain *d) } static inline void -fw_domain_put(const struct intel_uncore_forcewake_domain *d) +fw_domain_put(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { - __raw_i915_write32(d->i915, d->reg_set, d->val_clear); -} - -static inline void -fw_domain_posting_read(const struct intel_uncore_forcewake_domain *d) -{ - /* something from same cacheline, but not from the set register */ - if (i915_mmio_reg_valid(d->reg_post)) - __raw_posting_read(d->i915, d->reg_post); + __raw_i915_write32(i915, d->reg_set, i915->uncore.fw_clear); } static void -fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) +fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *d; + unsigned int tmp; + + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); - for_each_fw_domain_masked(d, fw_domains, dev_priv) { - fw_domain_wait_ack_clear(d); - fw_domain_get(d); + for_each_fw_domain_masked(d, fw_domains, i915, tmp) { + fw_domain_wait_ack_clear(i915, d); + fw_domain_get(i915, d); } - for_each_fw_domain_masked(d, fw_domains, dev_priv) - fw_domain_wait_ack(d); + for_each_fw_domain_masked(d, fw_domains, i915, tmp) + fw_domain_wait_ack(i915, d); - dev_priv->uncore.fw_domains_active |= fw_domains; + i915->uncore.fw_domains_active |= fw_domains; } static void -fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) +fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *d; + unsigned int tmp; - for_each_fw_domain_masked(d, fw_domains, dev_priv) { - fw_domain_put(d); - fw_domain_posting_read(d); - } + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); - dev_priv->uncore.fw_domains_active &= ~fw_domains; -} + for_each_fw_domain_masked(d, fw_domains, i915, tmp) + fw_domain_put(i915, d); -static void -fw_domains_posting_read(struct drm_i915_private *dev_priv) -{ - struct intel_uncore_forcewake_domain *d; - - /* No need to do for all, just do for first found */ - for_each_fw_domain(d, dev_priv) { - fw_domain_posting_read(d); - break; - } + i915->uncore.fw_domains_active &= ~fw_domains; } static void -fw_domains_reset(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) +fw_domains_reset(struct drm_i915_private *i915, + enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *d; + unsigned int tmp; - if (dev_priv->uncore.fw_domains == 0) + if (!fw_domains) return; - for_each_fw_domain_masked(d, fw_domains, dev_priv) - fw_domain_reset(d); + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); - fw_domains_posting_read(dev_priv); + for_each_fw_domain_masked(d, fw_domains, i915, tmp) + fw_domain_reset(i915, d); } static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv) @@ -235,7 +226,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) { struct intel_uncore_forcewake_domain *domain = container_of(timer, struct intel_uncore_forcewake_domain, timer); - struct drm_i915_private *dev_priv = domain->i915; + struct drm_i915_private *dev_priv = + container_of(domain, struct drm_i915_private, uncore.fw_domain[domain->id]); unsigned long irqflags; assert_rpm_device_not_suspended(dev_priv); @@ -252,8 +244,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) return HRTIMER_NORESTART; } -void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, - bool restore) +static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, + bool restore) { unsigned long irqflags; struct intel_uncore_forcewake_domain *domain; @@ -265,9 +257,11 @@ void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, * timers are run before holding. */ while (1) { + unsigned int tmp; + active_domains = 0; - for_each_fw_domain(domain, dev_priv) { + for_each_fw_domain(domain, dev_priv, tmp) { if (hrtimer_cancel(&domain->timer) == 0) continue; @@ -276,7 +270,7 @@ void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - for_each_fw_domain(domain, dev_priv) { + for_each_fw_domain(domain, dev_priv, tmp) { if (hrtimer_active(&domain->timer)) active_domains |= domain->mask; } @@ -299,7 +293,7 @@ void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, if (fw) dev_priv->uncore.funcs.force_wake_put(dev_priv, fw); - fw_domains_reset(dev_priv, FORCEWAKE_ALL); + fw_domains_reset(dev_priv, dev_priv->uncore.fw_domains); if (restore) { /* If reset with a user forcewake, try to restore */ if (fw) @@ -429,10 +423,18 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, intel_uncore_forcewake_reset(dev_priv, restore_forcewake); } -void intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, - bool restore_forcewake) +void intel_uncore_suspend(struct drm_i915_private *dev_priv) +{ + iosf_mbi_unregister_pmic_bus_access_notifier( + &dev_priv->uncore.pmic_bus_access_nb); + intel_uncore_forcewake_reset(dev_priv, false); +} + +void intel_uncore_resume_early(struct drm_i915_private *dev_priv) { - __intel_uncore_early_sanitize(dev_priv, restore_forcewake); + __intel_uncore_early_sanitize(dev_priv, true); + iosf_mbi_register_pmic_bus_access_notifier( + &dev_priv->uncore.pmic_bus_access_nb); i915_check_and_clear_faults(dev_priv); } @@ -448,13 +450,13 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *domain; + unsigned int tmp; fw_domains &= dev_priv->uncore.fw_domains; - for_each_fw_domain_masked(domain, fw_domains, dev_priv) { + for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) if (domain->wake_count++) fw_domains &= ~domain->mask; - } if (fw_domains) dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); @@ -499,7 +501,7 @@ void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { - assert_spin_locked(&dev_priv->uncore.lock); + lockdep_assert_held(&dev_priv->uncore.lock); if (!dev_priv->uncore.funcs.force_wake_get) return; @@ -511,10 +513,11 @@ static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *domain; + unsigned int tmp; fw_domains &= dev_priv->uncore.fw_domains; - for_each_fw_domain_masked(domain, fw_domains, dev_priv) { + for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) { if (WARN_ON(domain->wake_count == 0)) continue; @@ -557,7 +560,7 @@ void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv, void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { - assert_spin_locked(&dev_priv->uncore.lock); + lockdep_assert_held(&dev_priv->uncore.lock); if (!dev_priv->uncore.funcs.force_wake_put) return; @@ -635,33 +638,6 @@ find_fw_domain(struct drm_i915_private *dev_priv, u32 offset) return entry->domains; } -static void -intel_fw_table_check(struct drm_i915_private *dev_priv) -{ - const struct intel_forcewake_range *ranges; - unsigned int num_ranges; - s32 prev; - unsigned int i; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG)) - return; - - ranges = dev_priv->uncore.fw_domains_table; - if (!ranges) - return; - - num_ranges = dev_priv->uncore.fw_domains_table_entries; - - for (i = 0, prev = -1; i < num_ranges; i++, ranges++) { - WARN_ON_ONCE(IS_GEN9(dev_priv) && - (prev + 1) != (s32)ranges->start); - WARN_ON_ONCE(prev >= (s32)ranges->start); - prev = ranges->start; - WARN_ON_ONCE(prev >= (s32)ranges->end); - prev = ranges->end; - } -} - #define GEN_FW_RANGE(s, e, d) \ { .start = (s), .end = (e), .domains = (d) } @@ -700,23 +676,6 @@ static const i915_reg_t gen8_shadowed_regs[] = { /* TODO: Other registers are not yet used */ }; -static void intel_shadow_table_check(void) -{ - const i915_reg_t *reg = gen8_shadowed_regs; - s32 prev; - u32 offset; - unsigned int i; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG)) - return; - - for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { - offset = i915_mmio_reg_offset(*reg); - WARN_ON_ONCE(prev >= (s32)offset); - prev = offset; - } -} - static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) { u32 offset = i915_mmio_reg_offset(*reg); @@ -963,8 +922,11 @@ static noinline void ___force_wake_auto(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *domain; + unsigned int tmp; + + GEM_BUG_ON(fw_domains & ~dev_priv->uncore.fw_domains); - for_each_fw_domain_masked(domain, fw_domains, dev_priv) + for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) fw_domain_arm_timer(domain); dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); @@ -984,29 +946,19 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, ___force_wake_auto(dev_priv, fw_domains); } -#define __gen6_read(x) \ +#define __gen_read(func, x) \ static u##x \ -gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ +func##_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_READ_HEADER(x); \ - fw_engine = __gen6_reg_read_fw_domains(offset); \ - if (fw_engine) \ - __force_wake_auto(dev_priv, fw_engine); \ - val = __raw_i915_read##x(dev_priv, reg); \ - GEN6_READ_FOOTER; \ -} - -#define __fwtable_read(x) \ -static u##x \ -fwtable_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - enum forcewake_domains fw_engine; \ - GEN6_READ_HEADER(x); \ - fw_engine = __fwtable_reg_read_fw_domains(offset); \ + fw_engine = __##func##_reg_read_fw_domains(offset); \ if (fw_engine) \ __force_wake_auto(dev_priv, fw_engine); \ val = __raw_i915_read##x(dev_priv, reg); \ GEN6_READ_FOOTER; \ } +#define __gen6_read(x) __gen_read(gen6, x) +#define __fwtable_read(x) __gen_read(fwtable, x) #define __gen9_decoupled_read(x) \ static u##x \ @@ -1044,34 +996,6 @@ __gen6_read(64) #undef GEN6_READ_FOOTER #undef GEN6_READ_HEADER -#define VGPU_READ_HEADER(x) \ - unsigned long irqflags; \ - u##x val = 0; \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags) - -#define VGPU_READ_FOOTER \ - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \ - trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \ - return val - -#define __vgpu_read(x) \ -static u##x \ -vgpu_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - VGPU_READ_HEADER(x); \ - val = __raw_i915_read##x(dev_priv, reg); \ - VGPU_READ_FOOTER; \ -} - -__vgpu_read(8) -__vgpu_read(16) -__vgpu_read(32) -__vgpu_read(64) - -#undef __vgpu_read -#undef VGPU_READ_FOOTER -#undef VGPU_READ_HEADER - #define GEN2_WRITE_HEADER \ trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ assert_rpm_wakelock_held(dev_priv); \ @@ -1135,29 +1059,19 @@ gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool GEN6_WRITE_FOOTER; \ } -#define __gen8_write(x) \ +#define __gen_write(func, x) \ static void \ -gen8_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ +func##_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_WRITE_HEADER; \ - fw_engine = __gen8_reg_write_fw_domains(offset); \ - if (fw_engine) \ - __force_wake_auto(dev_priv, fw_engine); \ - __raw_i915_write##x(dev_priv, reg, val); \ - GEN6_WRITE_FOOTER; \ -} - -#define __fwtable_write(x) \ -static void \ -fwtable_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ - enum forcewake_domains fw_engine; \ - GEN6_WRITE_HEADER; \ - fw_engine = __fwtable_reg_write_fw_domains(offset); \ + fw_engine = __##func##_reg_write_fw_domains(offset); \ if (fw_engine) \ __force_wake_auto(dev_priv, fw_engine); \ __raw_i915_write##x(dev_priv, reg, val); \ GEN6_WRITE_FOOTER; \ } +#define __gen8_write(x) __gen_write(gen8, x) +#define __fwtable_write(x) __gen_write(fwtable, x) #define __gen9_decoupled_write(x) \ static void \ @@ -1194,31 +1108,6 @@ __gen6_write(32) #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER -#define VGPU_WRITE_HEADER \ - unsigned long irqflags; \ - trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags) - -#define VGPU_WRITE_FOOTER \ - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags) - -#define __vgpu_write(x) \ -static void vgpu_write##x(struct drm_i915_private *dev_priv, \ - i915_reg_t reg, u##x val, bool trace) { \ - VGPU_WRITE_HEADER; \ - __raw_i915_write##x(dev_priv, reg, val); \ - VGPU_WRITE_FOOTER; \ -} - -__vgpu_write(8) -__vgpu_write(16) -__vgpu_write(32) - -#undef __vgpu_write -#undef VGPU_WRITE_FOOTER -#undef VGPU_WRITE_HEADER - #define ASSIGN_WRITE_MMIO_VFUNCS(x) \ do { \ dev_priv->uncore.funcs.mmio_writeb = x##_write8; \ @@ -1249,48 +1138,45 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, WARN_ON(d->wake_count); + WARN_ON(!i915_mmio_reg_valid(reg_set)); + WARN_ON(!i915_mmio_reg_valid(reg_ack)); + d->wake_count = 0; d->reg_set = reg_set; d->reg_ack = reg_ack; - if (IS_GEN6(dev_priv)) { - d->val_reset = 0; - d->val_set = FORCEWAKE_KERNEL; - d->val_clear = 0; - } else { - /* WaRsClearFWBitsAtReset:bdw,skl */ - d->val_reset = _MASKED_BIT_DISABLE(0xffff); - d->val_set = _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL); - d->val_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL); - } - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - d->reg_post = FORCEWAKE_ACK_VLV; - else if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv) || IS_GEN8(dev_priv)) - d->reg_post = ECOBUS; - - d->i915 = dev_priv; d->id = domain_id; BUILD_BUG_ON(FORCEWAKE_RENDER != (1 << FW_DOMAIN_ID_RENDER)); BUILD_BUG_ON(FORCEWAKE_BLITTER != (1 << FW_DOMAIN_ID_BLITTER)); BUILD_BUG_ON(FORCEWAKE_MEDIA != (1 << FW_DOMAIN_ID_MEDIA)); - d->mask = 1 << domain_id; + d->mask = BIT(domain_id); hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); d->timer.function = intel_uncore_fw_release_timer; - dev_priv->uncore.fw_domains |= (1 << domain_id); + dev_priv->uncore.fw_domains |= BIT(domain_id); - fw_domain_reset(d); + fw_domain_reset(dev_priv, d); } static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) { - if (INTEL_INFO(dev_priv)->gen <= 5) + if (INTEL_GEN(dev_priv) <= 5 || intel_vgpu_active(dev_priv)) return; + if (IS_GEN6(dev_priv)) { + dev_priv->uncore.fw_reset = 0; + dev_priv->uncore.fw_set = FORCEWAKE_KERNEL; + dev_priv->uncore.fw_clear = 0; + } else { + /* WaRsClearFWBitsAtReset:bdw,skl */ + dev_priv->uncore.fw_reset = _MASKED_BIT_DISABLE(0xffff); + dev_priv->uncore.fw_set = _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL); + dev_priv->uncore.fw_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL); + } + if (IS_GEN9(dev_priv)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get; dev_priv->uncore.funcs.force_wake_put = fw_domains_put; @@ -1354,9 +1240,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) FORCEWAKE_MT, FORCEWAKE_MT_ACK); spin_lock_irq(&dev_priv->uncore.lock); - fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_ALL); + fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_RENDER); ecobus = __raw_i915_read32(dev_priv, ECOBUS); - fw_domains_put_with_fifo(dev_priv, FORCEWAKE_ALL); + fw_domains_put_with_fifo(dev_priv, FORCEWAKE_RENDER); spin_unlock_irq(&dev_priv->uncore.lock); if (!(ecobus & FORCEWAKE_MT_ENABLE)) { @@ -1385,6 +1271,32 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) dev_priv->uncore.fw_domains_table_entries = ARRAY_SIZE((d)); \ } +static int i915_pmic_bus_access_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct drm_i915_private *dev_priv = container_of(nb, + struct drm_i915_private, uncore.pmic_bus_access_nb); + + switch (action) { + case MBI_PMIC_BUS_ACCESS_BEGIN: + /* + * forcewake all now to make sure that we don't need to do a + * forcewake later which on systems where this notifier gets + * called requires the punit to access to the shared pmic i2c + * bus, which will be busy after this notification, leading to: + * "render: timed out waiting for forcewake ack request." + * errors. + */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + break; + case MBI_PMIC_BUS_ACCESS_END: + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + break; + } + + return NOTIFY_OK; +} + void intel_uncore_init(struct drm_i915_private *dev_priv) { i915_check_vgpu(dev_priv); @@ -1394,23 +1306,25 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) __intel_uncore_early_sanitize(dev_priv, false); dev_priv->uncore.unclaimed_mmio_check = 1; + dev_priv->uncore.pmic_bus_access_nb.notifier_call = + i915_pmic_bus_access_notifier; - switch (INTEL_INFO(dev_priv)->gen) { - default: - case 9: - ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(fwtable); - ASSIGN_READ_MMIO_VFUNCS(fwtable); - if (HAS_DECOUPLED_MMIO(dev_priv)) { - dev_priv->uncore.funcs.mmio_readl = - gen9_decoupled_read32; - dev_priv->uncore.funcs.mmio_readq = - gen9_decoupled_read64; - dev_priv->uncore.funcs.mmio_writel = - gen9_decoupled_write32; + if (IS_GEN(dev_priv, 2, 4) || intel_vgpu_active(dev_priv)) { + ASSIGN_WRITE_MMIO_VFUNCS(gen2); + ASSIGN_READ_MMIO_VFUNCS(gen2); + } else if (IS_GEN5(dev_priv)) { + ASSIGN_WRITE_MMIO_VFUNCS(gen5); + ASSIGN_READ_MMIO_VFUNCS(gen5); + } else if (IS_GEN(dev_priv, 6, 7)) { + ASSIGN_WRITE_MMIO_VFUNCS(gen6); + + if (IS_VALLEYVIEW(dev_priv)) { + ASSIGN_FW_DOMAINS_TABLE(__vlv_fw_ranges); + ASSIGN_READ_MMIO_VFUNCS(fwtable); + } else { + ASSIGN_READ_MMIO_VFUNCS(gen6); } - break; - case 8: + } else if (IS_GEN8(dev_priv)) { if (IS_CHERRYVIEW(dev_priv)) { ASSIGN_FW_DOMAINS_TABLE(__chv_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(fwtable); @@ -1420,38 +1334,22 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) ASSIGN_WRITE_MMIO_VFUNCS(gen8); ASSIGN_READ_MMIO_VFUNCS(gen6); } - break; - case 7: - case 6: - ASSIGN_WRITE_MMIO_VFUNCS(gen6); - - if (IS_VALLEYVIEW(dev_priv)) { - ASSIGN_FW_DOMAINS_TABLE(__vlv_fw_ranges); - ASSIGN_READ_MMIO_VFUNCS(fwtable); - } else { - ASSIGN_READ_MMIO_VFUNCS(gen6); + } else { + ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(fwtable); + ASSIGN_READ_MMIO_VFUNCS(fwtable); + if (HAS_DECOUPLED_MMIO(dev_priv)) { + dev_priv->uncore.funcs.mmio_readl = + gen9_decoupled_read32; + dev_priv->uncore.funcs.mmio_readq = + gen9_decoupled_read64; + dev_priv->uncore.funcs.mmio_writel = + gen9_decoupled_write32; } - break; - case 5: - ASSIGN_WRITE_MMIO_VFUNCS(gen5); - ASSIGN_READ_MMIO_VFUNCS(gen5); - break; - case 4: - case 3: - case 2: - ASSIGN_WRITE_MMIO_VFUNCS(gen2); - ASSIGN_READ_MMIO_VFUNCS(gen2); - break; } - intel_fw_table_check(dev_priv); - if (INTEL_GEN(dev_priv) >= 8) - intel_shadow_table_check(); - - if (intel_vgpu_active(dev_priv)) { - ASSIGN_WRITE_MMIO_VFUNCS(vgpu); - ASSIGN_READ_MMIO_VFUNCS(vgpu); - } + iosf_mbi_register_pmic_bus_access_notifier( + &dev_priv->uncore.pmic_bus_access_nb); i915_check_and_clear_faults(dev_priv); } @@ -1460,6 +1358,9 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) void intel_uncore_fini(struct drm_i915_private *dev_priv) { + iosf_mbi_unregister_pmic_bus_access_notifier( + &dev_priv->uncore.pmic_bus_access_nb); + /* Paranoia: make sure we have disabled everything before we exit. */ intel_uncore_sanitize(dev_priv); intel_uncore_forcewake_reset(dev_priv, false); @@ -1970,3 +1871,7 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, return fw_domains; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_uncore.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c new file mode 100644 index 000000000000..4e681fc13be4 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -0,0 +1,135 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "huge_gem_object.h" + +static void huge_free_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + unsigned long nreal = obj->scratch / PAGE_SIZE; + struct scatterlist *sg; + + for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg)) + __free_page(sg_page(sg)); + + sg_free_table(pages); + kfree(pages); +} + +static struct sg_table * +huge_get_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + const unsigned long nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct scatterlist *sg, *src, *end; + struct sg_table *pages; + unsigned long n; + + pages = kmalloc(sizeof(*pages), GFP); + if (!pages) + return ERR_PTR(-ENOMEM); + + if (sg_alloc_table(pages, npages, GFP)) { + kfree(pages); + return ERR_PTR(-ENOMEM); + } + + sg = pages->sgl; + for (n = 0; n < nreal; n++) { + struct page *page; + + page = alloc_page(GFP | __GFP_HIGHMEM); + if (!page) { + sg_mark_end(sg); + goto err; + } + + sg_set_page(sg, page, PAGE_SIZE, 0); + sg = __sg_next(sg); + } + if (nreal < npages) { + for (end = sg, src = pages->sgl; sg; sg = __sg_next(sg)) { + sg_set_page(sg, sg_page(src), PAGE_SIZE, 0); + src = __sg_next(src); + if (src == end) + src = pages->sgl; + } + } + + if (i915_gem_gtt_prepare_pages(obj, pages)) + goto err; + + return pages; + +err: + huge_free_pages(obj, pages); + return ERR_PTR(-ENOMEM); +#undef GFP +} + +static void huge_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_free_pages(obj, pages); + + obj->mm.dirty = false; +} + +static const struct drm_i915_gem_object_ops huge_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = huge_get_pages, + .put_pages = huge_put_pages, +}; + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!phys_size || phys_size > dma_size); + GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(dma_size, I915_GTT_PAGE_SIZE)); + + if (overflows_type(dma_size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); + i915_gem_object_init(obj, &huge_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + obj->scratch = phys_size; + + return obj; +} diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/selftests/huge_gem_object.h new file mode 100644 index 000000000000..a6133a9e8029 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __HUGE_GEM_OBJECT_H +#define __HUGE_GEM_OBJECT_H + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size); + +static inline phys_addr_t +huge_gem_object_phys_size(struct drm_i915_gem_object *obj) +{ + return obj->scratch; +} + +static inline dma_addr_t +huge_gem_object_dma_size(struct drm_i915_gem_object *obj) +{ + return obj->base.size; +} + +#endif /* !__HUGE_GEM_OBJECT_H */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c new file mode 100644 index 000000000000..f08d0179b3df --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -0,0 +1,385 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" +#include "i915_random.h" + +static int cpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + unsigned int needs_clflush; + struct page *page; + typeof(v) *map; + int err; + + err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + if (needs_clflush & CLFLUSH_BEFORE) + clflush(map+offset_in_page(offset) / sizeof(*map)); + map[offset_in_page(offset) / sizeof(*map)] = v; + if (needs_clflush & CLFLUSH_AFTER) + clflush(map+offset_in_page(offset) / sizeof(*map)); + kunmap_atomic(map); + + i915_gem_obj_finish_shmem_access(obj); + return 0; +} + +static int cpu_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + unsigned int needs_clflush; + struct page *page; + typeof(v) map; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + if (needs_clflush & CLFLUSH_BEFORE) + clflush(map+offset_in_page(offset) / sizeof(*map)); + *v = map[offset_in_page(offset) / sizeof(*map)]; + kunmap_atomic(map); + + i915_gem_obj_finish_shmem_access(obj); + return 0; +} + +static int gtt_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct i915_vma *vma; + typeof(v) *map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + map[offset / sizeof(*map)] = v; + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int gtt_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + struct i915_vma *vma; + typeof(v) map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = map[offset / sizeof(*map)]; + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int wc_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + typeof(v) *map; + int err; + + /* XXX GTT write followed by WC write go missing */ + i915_gem_object_flush_gtt_write_domain(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + map[offset / sizeof(*map)] = v; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int wc_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + typeof(v) map; + int err; + + /* XXX WC write followed by GTT write go missing */ + i915_gem_object_flush_gtt_write_domain(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = map[offset / sizeof(*map)]; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int gpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + u32 *cs; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + __i915_add_request(rq, false); + i915_vma_unpin(vma); + return PTR_ERR(cs); + } + + if (INTEL_GEN(i915) >= 8) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = v; + } else if (INTEL_GEN(i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; + } else { + *cs++ = MI_STORE_DWORD_IMM | 1 << 22; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; + *cs++ = MI_NOOP; + } + intel_ring_advance(rq, cs); + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unpin(vma); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &rq->fence); + reservation_object_unlock(obj->resv); + + __i915_add_request(rq, true); + + return 0; +} + +static bool always_valid(struct drm_i915_private *i915) +{ + return true; +} + +static bool needs_mi_store_dword(struct drm_i915_private *i915) +{ + return igt_can_mi_store_dword_imm(i915); +} + +static const struct igt_coherency_mode { + const char *name; + int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); + int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); + bool (*valid)(struct drm_i915_private *i915); +} igt_coherency_mode[] = { + { "cpu", cpu_set, cpu_get, always_valid }, + { "gtt", gtt_set, gtt_get, always_valid }, + { "wc", wc_set, wc_get, always_valid }, + { "gpu", gpu_set, NULL, needs_mi_store_dword }, + { }, +}; + +static int igt_gem_coherency(void *arg) +{ + const unsigned int ncachelines = PAGE_SIZE/64; + I915_RND_STATE(prng); + struct drm_i915_private *i915 = arg; + const struct igt_coherency_mode *read, *write, *over; + struct drm_i915_gem_object *obj; + unsigned long count, n; + u32 *offsets, *values; + int err = 0; + + /* We repeatedly write, overwrite and read from a sequence of + * cachelines in order to try and detect incoherency (unflushed writes + * from either the CPU or GPU). Each setter/getter uses our cache + * domain API which should prevent incoherency. + */ + + offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); + if (!offsets) + return -ENOMEM; + for (count = 0; count < ncachelines; count++) + offsets[count] = count * 64 + 4 * (count % 16); + + values = offsets + ncachelines; + + mutex_lock(&i915->drm.struct_mutex); + for (over = igt_coherency_mode; over->name; over++) { + if (!over->set) + continue; + + if (!over->valid(i915)) + continue; + + for (write = igt_coherency_mode; write->name; write++) { + if (!write->set) + continue; + + if (!write->valid(i915)) + continue; + + for (read = igt_coherency_mode; read->name; read++) { + if (!read->get) + continue; + + if (!read->valid(i915)) + continue; + + for_each_prime_number_from(count, 1, ncachelines) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto unlock; + } + + i915_random_reorder(offsets, ncachelines, &prng); + for (n = 0; n < count; n++) + values[n] = prandom_u32_state(&prng); + + for (n = 0; n < count; n++) { + err = over->set(obj, offsets[n], ~values[n]); + if (err) { + pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", + n, count, over->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + err = write->set(obj, offsets[n], values[n]); + if (err) { + pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", + n, count, write->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + u32 found; + + err = read->get(obj, offsets[n], &found); + if (err) { + pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", + n, count, read->name, err); + goto put_object; + } + + if (found != values[n]) { + pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", + n, count, over->name, + write->name, values[n], + read->name, found, + ~values[n], offsets[n]); + err = -EINVAL; + goto put_object; + } + } + + __i915_gem_object_release_unless_active(obj); + } + } + } + } +unlock: + mutex_unlock(&i915->drm.struct_mutex); + kfree(offsets); + return err; + +put_object: + __i915_gem_object_release_unless_active(obj); + goto unlock; +} + +int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_coherency), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c new file mode 100644 index 000000000000..1afb8b06e3e1 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -0,0 +1,463 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_drm.h" +#include "huge_gem_object.h" + +#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) + +static struct i915_vma * +gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) +{ + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(vma->vm->i915); + unsigned long n, size; + u32 *cmd; + int err; + + GEM_BUG_ON(!igt_can_mi_store_dword_imm(vma->vm->i915)); + + size = (4 * count + 1) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(vma->vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = value; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? 1 << 22 : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = value; + } else { + *cmd++ = MI_STORE_DWORD_IMM | 1 << 22; + *cmd++ = offset; + *cmd++ = value; + } + offset += PAGE_SIZE; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static unsigned long real_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; +} + +static unsigned long fake_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; +} + +static int gpu_fill(struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + unsigned int dw) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + struct i915_vma *batch; + unsigned int flags; + int err; + + GEM_BUG_ON(obj->base.size > vm->total); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); + if (err) + return err; + + /* Within the GTT the huge objects maps every page onto + * its 1024 real pages (using phys_pfn = dma_pfn % 1024). + * We set the nth dword within the page using the nth + * mapping via the GTT - this should exercise the GTT mapping + * whilst checking that each context provides a unique view + * into the object. + */ + batch = gpu_fill_dw(vma, + (dw * real_page_count(obj)) << PAGE_SHIFT | + (dw * sizeof(u32)), + real_page_count(obj), + dw); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_vma; + } + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto err_request; + + err = i915_switch_context(rq); + if (err) + goto err_request; + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + i915_vma_move_to_active(batch, rq, 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + i915_vma_move_to_active(vma, rq, 0); + i915_vma_unpin(vma); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &rq->fence); + reservation_object_unlock(obj->resv); + + __i915_add_request(rq, true); + + return 0; + +err_request: + __i915_add_request(rq, false); +err_batch: + i915_vma_unpin(batch); +err_vma: + i915_vma_unpin(vma); + return err; +} + +static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) +{ + const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); + unsigned int n, m, need_flush; + int err; + + err = i915_gem_obj_prepare_shmem_write(obj, &need_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + for (m = 0; m < DW_PER_PAGE; m++) + map[m] = value; + if (!has_llc) + drm_clflush_virt_range(map, PAGE_SIZE); + kunmap_atomic(map); + } + + i915_gem_obj_finish_shmem_access(obj); + obj->base.read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; + obj->base.write_domain = 0; + return 0; +} + +static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max) +{ + unsigned int n, m, needs_flush; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(map, PAGE_SIZE); + + for (m = 0; m < max; m++) { + if (map[m] != m) { + pr_err("Invalid value at page %d, offset %d: found %x expected %x\n", + n, m, map[m], m); + err = -EINVAL; + goto out_unmap; + } + } + + for (; m < DW_PER_PAGE; m++) { + if (map[m] != 0xdeadbeef) { + pr_err("Invalid value at page %d, offset %d: found %x expected %x\n", + n, m, map[m], 0xdeadbeef); + err = -EINVAL; + goto out_unmap; + } + } + +out_unmap: + kunmap_atomic(map); + if (err) + break; + } + + i915_gem_obj_finish_shmem_access(obj); + return err; +} + +static struct drm_i915_gem_object * +create_test_object(struct i915_gem_context *ctx, + struct drm_file *file, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; + u64 size; + u32 handle; + int err; + + size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); + size = round_down(size, DW_PER_PAGE * PAGE_SIZE); + + obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + if (IS_ERR(obj)) + return obj; + + /* tie the handle to the drm_file for easy reaping */ + err = drm_gem_handle_create(file, &obj->base, &handle); + i915_gem_object_put(obj); + if (err) + return ERR_PTR(err); + + err = cpu_fill(obj, 0xdeadbeef); + if (err) { + pr_err("Failed to fill object with cpu, err=%d\n", + err); + return ERR_PTR(err); + } + + list_add_tail(&obj->st_link, objects); + return obj; +} + +static unsigned long max_dwords(struct drm_i915_gem_object *obj) +{ + unsigned long npages = fake_page_count(obj); + + GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); + return npages / DW_PER_PAGE; +} + +static int igt_ctx_exec(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct drm_file *file; + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + unsigned long ncontexts, ndwords, dw; + bool first_shared_gtt = true; + int err; + + /* Create a few different contexts (with different mm) and write + * through each ctx/mm using the GPU making sure those writes end + * up in the expected pages of our obj. + */ + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + ncontexts = 0; + ndwords = 0; + dw = 0; + while (!time_after(jiffies, end_time)) { + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + unsigned int id; + + if (first_shared_gtt) { + ctx = __create_hw_context(i915, file->driver_priv); + first_shared_gtt = false; + } else { + ctx = i915_gem_create_context(i915, file->driver_priv); + } + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + if (dw == 0) { + obj = create_test_object(ctx, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + } + + err = gpu_fill(obj, ctx, engine, dw); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + goto out_unlock; + } + + if (++dw == max_dwords(obj)) + dw = 0; + ndwords++; + } + ncontexts++; + } + pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n", + ncontexts, INTEL_INFO(i915)->num_rings, ndwords); + + dw = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + + err = cpu_check(obj, rem); + if (err) + break; + + dw += rem; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + +static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + int err; + + err = i915_gem_init_aliasing_ppgtt(i915); + if (err) + return err; + + list_for_each_entry(obj, &i915->mm.bound_list, global_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + continue; + + vma->flags &= ~I915_VMA_LOCAL_BIND; + } + + return 0; +} + +static void fake_aliasing_ppgtt_disable(struct drm_i915_private *i915) +{ + i915_gem_fini_aliasing_ppgtt(i915); +} + +int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ctx_exec), + }; + bool fake_alias = false; + int err; + + /* Install a fake aliasing gtt for exercise */ + if (USES_PPGTT(dev_priv) && !dev_priv->mm.aliasing_ppgtt) { + mutex_lock(&dev_priv->drm.struct_mutex); + err = fake_aliasing_ppgtt_enable(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + if (err) + return err; + + GEM_BUG_ON(!dev_priv->mm.aliasing_ppgtt); + fake_alias = true; + } + + err = i915_subtests(tests, dev_priv); + + if (fake_alias) { + mutex_lock(&dev_priv->drm.struct_mutex); + fake_aliasing_ppgtt_disable(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + } + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c new file mode 100644 index 000000000000..817bef74bbcb --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c @@ -0,0 +1,303 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "mock_dmabuf.h" + +static int igt_dmabuf_export(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + i915_gem_object_put(obj); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + return PTR_ERR(dmabuf); + } + + dma_buf_put(dmabuf); + return 0; +} + +static int igt_dmabuf_import_self(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + int err; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (IS_ERR(import)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(import)); + err = PTR_ERR(import); + goto out_dmabuf; + } + + if (import != &obj->base) { + pr_err("i915_gem_prime_import created a new object!\n"); + err = -EINVAL; + goto out_import; + } + + err = 0; +out_import: + i915_gem_object_put(to_intel_bo(import)); +out_dmabuf: + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_dmabuf_import(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *obj_map, *dma_map; + u32 pattern[] = { 0, 0xaa, 0xcc, 0x55, 0xff }; + int err, i; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto out_dmabuf; + } + + if (obj->base.dev != &i915->drm) { + pr_err("i915_gem_prime_import created a non-i915 object!\n"); + err = -EINVAL; + goto out_obj; + } + + if (obj->base.size != PAGE_SIZE) { + pr_err("i915_gem_prime_import is wrong size found %lld, expected %ld\n", + (long long)obj->base.size, PAGE_SIZE); + err = -EINVAL; + goto out_obj; + } + + dma_map = dma_buf_vmap(dmabuf); + if (!dma_map) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto out_obj; + } + + if (0) { /* Can not yet map dmabuf */ + obj_map = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(obj_map)) { + err = PTR_ERR(obj_map); + pr_err("i915_gem_object_pin_map failed with err=%d\n", err); + goto out_dma_map; + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(dma_map, pattern[i], PAGE_SIZE); + if (memchr_inv(obj_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("imported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(obj_map, pattern[i], PAGE_SIZE); + if (memchr_inv(dma_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("exported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + i915_gem_object_unpin_map(obj); + } + + err = 0; +out_dma_map: + dma_buf_vunmap(dmabuf, dma_map); +out_obj: + i915_gem_object_put(obj); +out_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_import_ownership(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + ptr = dma_buf_vmap(dmabuf); + if (!ptr) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto err_dmabuf; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_vunmap(dmabuf, ptr); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto err_dmabuf; + } + + dma_buf_put(dmabuf); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("i915_gem_object_pin_pages failed with err=%d\n", err); + goto out_obj; + } + + err = 0; + i915_gem_object_unpin_pages(obj); +out_obj: + i915_gem_object_put(obj); + return err; + +err_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_export_vmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto err_obj; + } + i915_gem_object_put(obj); + + ptr = dma_buf_vmap(dmabuf); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + pr_err("dma_buf_vmap failed with err=%d\n", err); + goto out; + } + + if (memchr_inv(ptr, 0, dmabuf->size)) { + pr_err("Exported object not initialiased to zero!\n"); + err = -EINVAL; + goto out; + } + + memset(ptr, 0xc5, dmabuf->size); + + err = 0; + dma_buf_vunmap(dmabuf, ptr); +out: + dma_buf_put(dmabuf); + return err; + +err_obj: + i915_gem_object_put(obj); + return err; +} + +int i915_gem_dmabuf_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + SUBTEST(igt_dmabuf_import_self), + SUBTEST(igt_dmabuf_import), + SUBTEST(igt_dmabuf_import_ownership), + SUBTEST(igt_dmabuf_export_vmap), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_unref(&i915->drm); + return err; +} + +int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c new file mode 100644 index 000000000000..14e9c2fbc4e6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -0,0 +1,350 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" + +static int populate_ggtt(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + u64 size; + + for (size = 0; + size + I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + size += I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + } + + if (!list_empty(&i915->mm.unbound_list)) { + size = 0; + list_for_each_entry(obj, &i915->mm.unbound_list, global_link) + size++; + + pr_err("Found %lld objects unbound!\n", size); + return -EINVAL; + } + + if (list_empty(&i915->ggtt.base.inactive_list)) { + pr_err("No objects on the GGTT inactive list!\n"); + return -EINVAL; + } + + return 0; +} + +static void unpin_ggtt(struct drm_i915_private *i915) +{ + struct i915_vma *vma; + + list_for_each_entry(vma, &i915->ggtt.base.inactive_list, vm_link) + i915_vma_unpin(vma); +} + +static void cleanup_objects(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, global_link) + i915_gem_object_put(obj); + + list_for_each_entry_safe(obj, on, &i915->mm.bound_list, global_link) + i915_gem_object_put(obj); + + mutex_unlock(&i915->drm.struct_mutex); + + i915_gem_drain_freed_objects(i915); + + mutex_lock(&i915->drm.struct_mutex); +} + +static int igt_evict_something(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + int err; + + /* Fill the GGTT with pinned objects and try to evict one. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_something(&ggtt->base, + I915_GTT_PAGE_SIZE, 0, 0, + 0, U64_MAX, + 0); + if (err != -ENOSPC) { + pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + /* Everything is unpinned, we should be able to evict something */ + err = i915_gem_evict_something(&ggtt->base, + I915_GTT_PAGE_SIZE, 0, 0, + 0, U64_MAX, + 0); + if (err) { + pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_overcommit(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + /* Fill the GGTT with pinned objects and then try to pin one more. + * We expect it to fail. + */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto cleanup; + } + + list_move(&obj->global_link, &i915->mm.unbound_list); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) { + pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma)); + err = -EINVAL; + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_evict_for_vma(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node target = { + .start = 0, + .size = 4096, + }; + int err; + + /* Fill the GGTT with pinned objects and try to evict a range. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + if (err != -ENOSPC) { + pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + /* Everything is unpinned, we should be able to evict the node */ + err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + if (err) { + pr_err("i915_gem_evict_for_node returned err=%d\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static void mock_color_adjust(const struct drm_mm_node *node, + unsigned long color, + u64 *start, + u64 *end) +{ +} + +static int igt_evict_for_cache_color(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + const unsigned long flags = PIN_OFFSET_FIXED; + struct drm_mm_node target = { + .start = I915_GTT_PAGE_SIZE * 2, + .size = I915_GTT_PAGE_SIZE, + .color = I915_CACHE_LLC, + }; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + /* Currently the use of color_adjust is limited to cache domains within + * the ggtt, and so the presence of mm.color_adjust is assumed to be + * i915_gtt_color_adjust throughout our driver, so using a mock color + * adjust will work just fine for our purposes. + */ + ggtt->base.mm.color_adjust = mock_color_adjust; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto cleanup; + } + i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + I915_GTT_PAGE_SIZE | flags); + if (IS_ERR(vma)) { + pr_err("[0]i915_gem_object_ggtt_pin failed\n"); + err = PTR_ERR(vma); + goto cleanup; + } + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto cleanup; + } + i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + + /* Neighbouring; same colour - should fit */ + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + (I915_GTT_PAGE_SIZE * 2) | flags); + if (IS_ERR(vma)) { + pr_err("[1]i915_gem_object_ggtt_pin failed\n"); + err = PTR_ERR(vma); + goto cleanup; + } + + i915_vma_unpin(vma); + + /* Remove just the second vma */ + err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + if (err) { + pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err); + goto cleanup; + } + + /* Attempt to remove the first *pinned* vma, by removing the (empty) + * neighbour -- this should fail. + */ + target.color = I915_CACHE_L3_LLC; + + err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + if (!err) { + pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err); + err = -EINVAL; + goto cleanup; + } + + err = 0; + +cleanup: + unpin_ggtt(i915); + cleanup_objects(i915); + ggtt->base.mm.color_adjust = NULL; + return err; +} + +static int igt_evict_vm(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + int err; + + /* Fill the GGTT with pinned objects and try to evict everything. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_vm(&ggtt->base, false); + if (err) { + pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + err = i915_gem_evict_vm(&ggtt->base, false); + if (err) { + pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +int i915_gem_evict_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_evict_something), + SUBTEST(igt_evict_for_vma), + SUBTEST(igt_evict_for_cache_color), + SUBTEST(igt_evict_vm), + SUBTEST(igt_overcommit), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c new file mode 100644 index 000000000000..50710e3f1caa --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -0,0 +1,1562 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/list_sort.h> +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" +#include "i915_random.h" + +#include "mock_context.h" +#include "mock_drm.h" +#include "mock_gem_device.h" + +static void fake_free_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static struct sg_table * +fake_get_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) +#define PFN_BIAS 0x1000 + struct sg_table *pages; + struct scatterlist *sg; + typeof(obj->base.size) rem; + + pages = kmalloc(sizeof(*pages), GFP); + if (!pages) + return ERR_PTR(-ENOMEM); + + rem = round_up(obj->base.size, BIT(31)) >> 31; + if (sg_alloc_table(pages, rem, GFP)) { + kfree(pages); + return ERR_PTR(-ENOMEM); + } + + rem = obj->base.size; + for (sg = pages->sgl; sg; sg = sg_next(sg)) { + unsigned long len = min_t(typeof(rem), rem, BIT(31)); + + GEM_BUG_ON(!len); + sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); + sg_dma_address(sg) = page_to_phys(sg_page(sg)); + sg_dma_len(sg) = len; + + rem -= len; + } + GEM_BUG_ON(rem); + + obj->mm.madv = I915_MADV_DONTNEED; + return pages; +#undef GFP +} + +static void fake_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + fake_free_pages(obj, pages); + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops fake_ops = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_pages, + .put_pages = fake_put_pages, +}; + +static struct drm_i915_gem_object * +fake_dma_object(struct drm_i915_private *i915, u64 size) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + goto err; + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &fake_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + /* Preallocate the "backing storage" */ + if (i915_gem_object_pin_pages(obj)) + goto err_obj; + + i915_gem_object_unpin_pages(obj); + return obj; + +err_obj: + i915_gem_object_put(obj); +err: + return ERR_PTR(-ENOMEM); +} + +static int igt_ppgtt_alloc(void *arg) +{ + struct drm_i915_private *dev_priv = arg; + struct i915_hw_ppgtt *ppgtt; + u64 size, last; + int err; + + /* Allocate a ppggt and try to fill the entire range */ + + if (!USES_PPGTT(dev_priv)) + return 0; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return -ENOMEM; + + mutex_lock(&dev_priv->drm.struct_mutex); + err = __hw_ppgtt_init(ppgtt, dev_priv); + if (err) + goto err_ppgtt; + + if (!ppgtt->base.allocate_va_range) + goto err_ppgtt_cleanup; + + /* Check we can allocate the entire range */ + for (size = 4096; + size <= ppgtt->base.total; + size <<= 2) { + err = ppgtt->base.allocate_va_range(&ppgtt->base, 0, size); + if (err) { + if (err == -ENOMEM) { + pr_info("[1] Ran out of memory for va_range [0 + %llx] [bit %d]\n", + size, ilog2(size)); + err = 0; /* virtual space too large! */ + } + goto err_ppgtt_cleanup; + } + + ppgtt->base.clear_range(&ppgtt->base, 0, size); + } + + /* Check we can incrementally allocate the entire range */ + for (last = 0, size = 4096; + size <= ppgtt->base.total; + last = size, size <<= 2) { + err = ppgtt->base.allocate_va_range(&ppgtt->base, + last, size - last); + if (err) { + if (err == -ENOMEM) { + pr_info("[2] Ran out of memory for va_range [%llx + %llx] [bit %d]\n", + last, size - last, ilog2(size)); + err = 0; /* virtual space too large! */ + } + goto err_ppgtt_cleanup; + } + } + +err_ppgtt_cleanup: + ppgtt->base.cleanup(&ppgtt->base); +err_ppgtt: + mutex_unlock(&dev_priv->drm.struct_mutex); + kfree(ppgtt); + return err; +} + +static int lowlevel_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + I915_RND_STATE(seed_prng); + unsigned int size; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (size = 12; (hole_end - hole_start) >> size; size++) { + I915_RND_SUBSTATE(prng, seed_prng); + struct drm_i915_gem_object *obj; + unsigned int *order, count, n; + u64 hole_size; + + hole_size = (hole_end - hole_start) >> size; + if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) + hole_size = KMALLOC_MAX_SIZE / sizeof(u32); + count = hole_size; + do { + count >>= 1; + order = i915_random_order(count, &prng); + } while (!order && count); + if (!order) + break; + + GEM_BUG_ON(count * BIT_ULL(size) > vm->total); + GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end); + + /* Ignore allocation failures (i.e. don't report them as + * a test failure) as we are purposefully allocating very + * large objects without checking that we have sufficient + * memory. We expect to hit -ENOMEM. + */ + + obj = fake_dma_object(i915, BIT_ULL(size)); + if (IS_ERR(obj)) { + kfree(order); + break; + } + + GEM_BUG_ON(obj->base.size != BIT_ULL(size)); + + if (i915_gem_object_pin_pages(obj)) { + i915_gem_object_put(obj); + kfree(order); + break; + } + + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + + if (igt_timeout(end_time, + "%s timed out before %d/%d\n", + __func__, n, count)) { + hole_end = hole_start; /* quit */ + break; + } + + if (vm->allocate_va_range && + vm->allocate_va_range(vm, addr, BIT_ULL(size))) + break; + + vm->insert_entries(vm, obj->mm.pages, addr, + I915_CACHE_NONE, 0); + } + count = n; + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + vm->clear_range(vm, addr, BIT_ULL(size)); + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + + kfree(order); + } + + return 0; +} + +static void close_object_list(struct list_head *objects, + struct i915_address_space *vm) +{ + struct drm_i915_gem_object *obj, *on; + int ignored; + + list_for_each_entry_safe(obj, on, objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, vm, NULL); + if (!IS_ERR(vma)) + ignored = i915_vma_unbind(vma); + /* Only ppgtt vma may be closed before the object is freed */ + if (!IS_ERR(vma) && !i915_vma_is_ggtt(vma)) + i915_vma_close(vma); + + list_del(&obj->st_link); + i915_gem_object_put(obj); + } +} + +static int fill_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + const u64 hole_size = hole_end - hole_start; + struct drm_i915_gem_object *obj; + const unsigned long max_pages = + min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT); + const unsigned long max_step = max(int_sqrt(max_pages), 2UL); + unsigned long npages, prime, flags; + struct i915_vma *vma; + LIST_HEAD(objects); + int err; + + /* Try binding many VMA working inwards from either edge */ + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + for_each_prime_number_from(prime, 2, max_step) { + for (npages = 1; npages <= max_pages; npages *= prime) { + const u64 full_size = npages << PAGE_SHIFT; + const struct { + const char *name; + u64 offset; + int step; + } phases[] = { + { "top-down", hole_end, -1, }, + { "bottom-up", hole_start, 1, }, + { } + }, *p; + + obj = fake_dma_object(i915, full_size); + if (IS_ERR(obj)) + break; + + list_add(&obj->st_link, &objects); + + /* Align differing sized objects against the edges, and + * check we don't walk off into the void when binding + * them into the GTT. + */ + for (p = phases; p->name; p++) { + u64 offset; + + offset = p->offset; + list_for_each_entry(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + err = i915_vma_pin(vma, 0, 0, offset | flags); + if (err) { + pr_err("%s(%s) pin (forward) failed with err=%d on size=%lu pages (prime=%lu), offset=%llx\n", + __func__, p->name, err, npages, prime, offset); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (forward) insert failed: vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (forward) moved vma.node=%llx + %llx, expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, + offset); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s(%s) (forward) unbind of vma.node=%llx + %llx failed with err=%d\n", + __func__, p->name, vma->node.start, vma->node.size, + err); + goto err; + } + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry_reverse(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + err = i915_vma_pin(vma, 0, 0, offset | flags); + if (err) { + pr_err("%s(%s) pin (backward) failed with err=%d on size=%lu pages (prime=%lu), offset=%llx\n", + __func__, p->name, err, npages, prime, offset); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (backward) insert failed: vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry_reverse(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (backward) moved vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s(%s) (backward) unbind of vma.node=%llx + %llx failed with err=%d\n", + __func__, p->name, vma->node.start, vma->node.size, + err); + goto err; + } + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + } + + if (igt_timeout(end_time, "%s timed out (npages=%lu, prime=%lu)\n", + __func__, npages, prime)) { + err = -EINTR; + goto err; + } + } + + close_object_list(&objects, vm); + } + + return 0; + +err: + close_object_list(&objects, vm); + return err; +} + +static int walk_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + const u64 hole_size = hole_end - hole_start; + const unsigned long max_pages = + min_t(u64, ULONG_MAX - 1, hole_size >> PAGE_SHIFT); + unsigned long flags; + u64 size; + + /* Try binding a single VMA in different positions within the hole */ + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + for_each_prime_number_from(size, 1, max_pages) { + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u64 addr; + int err = 0; + + obj = fake_dma_object(i915, size << PAGE_SHIFT); + if (IS_ERR(obj)) + break; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_put; + } + + for (addr = hole_start; + addr + obj->base.size < hole_end; + addr += obj->base.size) { + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n", + __func__, addr, vma->size, + hole_start, hole_end, err); + goto err_close; + } + i915_vma_unpin(vma); + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, vma->size); + err = -EINVAL; + goto err_close; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s unbind failed at %llx + %llx with err=%d\n", + __func__, addr, vma->size, err); + goto err_close; + } + + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + + if (igt_timeout(end_time, + "%s timed out at %llx\n", + __func__, addr)) { + err = -EINTR; + goto err_close; + } + } + +err_close: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); +err_put: + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + +static int pot_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned long flags; + unsigned int pot; + int err = 0; + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + obj = i915_gem_object_create_internal(i915, 2 * I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + /* Insert a pair of pages across every pot boundary within the hole */ + for (pot = fls64(hole_end - 1) - 1; + pot > ilog2(2 * I915_GTT_PAGE_SIZE); + pot--) { + u64 step = BIT_ULL(pot); + u64 addr; + + for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; + addr <= round_down(hole_end - 2*I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; + addr += step) { + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx in hole [%llx - %llx], with err=%d\n", + __func__, + addr, + hole_start, hole_end, + err); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, vma->size); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + GEM_BUG_ON(err); + } + + if (igt_timeout(end_time, + "%s timed out after %d/%d\n", + __func__, pot, fls64(hole_end - 1) - 1)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); +err_obj: + i915_gem_object_put(obj); + return err; +} + +static int drunk_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + I915_RND_STATE(prng); + unsigned int size; + unsigned long flags; + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (size = 12; (hole_end - hole_start) >> size; size++) { + struct drm_i915_gem_object *obj; + unsigned int *order, count, n; + struct i915_vma *vma; + u64 hole_size; + int err; + + hole_size = (hole_end - hole_start) >> size; + if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) + hole_size = KMALLOC_MAX_SIZE / sizeof(u32); + count = hole_size; + do { + count >>= 1; + order = i915_random_order(count, &prng); + } while (!order && count); + if (!order) + break; + + /* Ignore allocation failures (i.e. don't report them as + * a test failure) as we are purposefully allocating very + * large objects without checking that we have sufficient + * memory. We expect to hit -ENOMEM. + */ + + obj = fake_dma_object(i915, BIT_ULL(size)); + if (IS_ERR(obj)) { + kfree(order); + break; + } + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + GEM_BUG_ON(vma->size != BIT_ULL(size)); + + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx + %llx in hole [%llx - %llx], with err=%d\n", + __func__, + addr, BIT_ULL(size), + hole_start, hole_end, + err); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, BIT_ULL(size)); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + GEM_BUG_ON(err); + + if (igt_timeout(end_time, + "%s timed out after %d/%d\n", + __func__, n, count)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); +err_obj: + i915_gem_object_put(obj); + kfree(order); + if (err) + return err; + } + + return 0; +} + +static int __shrink_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + struct drm_i915_gem_object *obj; + unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + unsigned int order = 12; + LIST_HEAD(objects); + int err = 0; + u64 addr; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (addr = hole_start; addr < hole_end; ) { + struct i915_vma *vma; + u64 size = BIT_ULL(order++); + + size = min(size, hole_end - addr); + obj = fake_dma_object(i915, size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + break; + } + + GEM_BUG_ON(vma->size != size); + + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx + %llx in hole [%llx - %llx], with err=%d\n", + __func__, addr, size, hole_start, hole_end, err); + break; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, size); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + break; + } + + i915_vma_unpin(vma); + addr += size; + + if (igt_timeout(end_time, + "%s timed out at ofset %llx [%llx - %llx]\n", + __func__, addr, hole_start, hole_end)) { + err = -EINTR; + break; + } + } + + close_object_list(&objects, vm); + return err; +} + +static int shrink_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + unsigned long prime; + int err; + + vm->fault_attr.probability = 999; + atomic_set(&vm->fault_attr.times, -1); + + for_each_prime_number_from(prime, 0, ULONG_MAX - 1) { + vm->fault_attr.interval = prime; + err = __shrink_hole(i915, vm, hole_start, hole_end, end_time); + if (err) + break; + } + + memset(&vm->fault_attr, 0, sizeof(vm->fault_attr)); + + return err; +} + +static int exercise_ppgtt(struct drm_i915_private *dev_priv, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct drm_file *file; + struct i915_hw_ppgtt *ppgtt; + IGT_TIMEOUT(end_time); + int err; + + if (!USES_FULL_PPGTT(dev_priv)) + return 0; + + file = mock_file(dev_priv); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&dev_priv->drm.struct_mutex); + ppgtt = i915_ppgtt_create(dev_priv, file->driver_priv, "mock"); + if (IS_ERR(ppgtt)) { + err = PTR_ERR(ppgtt); + goto out_unlock; + } + GEM_BUG_ON(offset_in_page(ppgtt->base.total)); + GEM_BUG_ON(ppgtt->base.closed); + + err = func(dev_priv, &ppgtt->base, 0, ppgtt->base.total, end_time); + + i915_ppgtt_close(&ppgtt->base); + i915_ppgtt_put(ppgtt); +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + + mock_file_free(dev_priv, file); + return err; +} + +static int igt_ppgtt_fill(void *arg) +{ + return exercise_ppgtt(arg, fill_hole); +} + +static int igt_ppgtt_walk(void *arg) +{ + return exercise_ppgtt(arg, walk_hole); +} + +static int igt_ppgtt_pot(void *arg) +{ + return exercise_ppgtt(arg, pot_hole); +} + +static int igt_ppgtt_drunk(void *arg) +{ + return exercise_ppgtt(arg, drunk_hole); +} + +static int igt_ppgtt_lowlevel(void *arg) +{ + return exercise_ppgtt(arg, lowlevel_hole); +} + +static int igt_ppgtt_shrink(void *arg) +{ + return exercise_ppgtt(arg, shrink_hole); +} + +static int sort_holes(void *priv, struct list_head *A, struct list_head *B) +{ + struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); + struct drm_mm_node *b = list_entry(B, typeof(*b), hole_stack); + + if (a->start < b->start) + return -1; + else + return 1; +} + +static int exercise_ggtt(struct drm_i915_private *i915, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + u64 hole_start, hole_end, last = 0; + struct drm_mm_node *node; + IGT_TIMEOUT(end_time); + int err; + + mutex_lock(&i915->drm.struct_mutex); +restart: + list_sort(NULL, &ggtt->base.mm.hole_stack, sort_holes); + drm_mm_for_each_hole(node, &ggtt->base.mm, hole_start, hole_end) { + if (hole_start < last) + continue; + + if (ggtt->base.mm.color_adjust) + ggtt->base.mm.color_adjust(node, 0, + &hole_start, &hole_end); + if (hole_start >= hole_end) + continue; + + err = func(i915, &ggtt->base, hole_start, hole_end, end_time); + if (err) + break; + + /* As we have manipulated the drm_mm, the list may be corrupt */ + last = hole_end; + goto restart; + } + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +static int igt_ggtt_fill(void *arg) +{ + return exercise_ggtt(arg, fill_hole); +} + +static int igt_ggtt_walk(void *arg) +{ + return exercise_ggtt(arg, walk_hole); +} + +static int igt_ggtt_pot(void *arg) +{ + return exercise_ggtt(arg, pot_hole); +} + +static int igt_ggtt_drunk(void *arg) +{ + return exercise_ggtt(arg, drunk_hole); +} + +static int igt_ggtt_lowlevel(void *arg) +{ + return exercise_ggtt(arg, lowlevel_hole); +} + +static int igt_ggtt_page(void *arg) +{ + const unsigned int count = PAGE_SIZE/sizeof(u32); + I915_RND_STATE(prng); + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_i915_gem_object *obj; + struct drm_mm_node tmp; + unsigned int *order, n; + int err; + + mutex_lock(&i915->drm.struct_mutex); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_free; + + memset(&tmp, 0, sizeof(tmp)); + err = drm_mm_insert_node_in_range(&ggtt->base.mm, &tmp, + 1024 * PAGE_SIZE, 0, + I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + if (err) + goto out_unpin; + + order = i915_random_order(count, &prng); + if (!order) { + err = -ENOMEM; + goto out_remove; + } + + for (n = 0; n < count; n++) { + u64 offset = tmp.start + order[n] * PAGE_SIZE; + u32 __iomem *vaddr; + + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); + + vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + iowrite32(n, vaddr + n); + io_mapping_unmap_atomic(vaddr); + + wmb(); + ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); + } + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + u64 offset = tmp.start + order[n] * PAGE_SIZE; + u32 __iomem *vaddr; + u32 val; + + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); + + vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + val = ioread32(vaddr + n); + io_mapping_unmap_atomic(vaddr); + + ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); + + if (val != n) { + pr_err("insert page failed: found %d, expected %d\n", + val, n); + err = -EINVAL; + break; + } + } + + kfree(order); +out_remove: + drm_mm_remove_node(&tmp); +out_unpin: + i915_gem_object_unpin_pages(obj); +out_free: + i915_gem_object_put(obj); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static void track_vma_bind(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + + obj->bind_count++; /* track for eviction later */ + __i915_gem_object_pin_pages(obj); + + vma->pages = obj->mm.pages; + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); +} + +static int exercise_mock(struct drm_i915_private *i915, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct i915_gem_context *ctx; + struct i915_hw_ppgtt *ppgtt; + IGT_TIMEOUT(end_time); + int err; + + ctx = mock_context(i915, "mock"); + if (!ctx) + return -ENOMEM; + + ppgtt = ctx->ppgtt; + GEM_BUG_ON(!ppgtt); + + err = func(i915, &ppgtt->base, 0, ppgtt->base.total, end_time); + + mock_context_close(ctx); + return err; +} + +static int igt_mock_fill(void *arg) +{ + return exercise_mock(arg, fill_hole); +} + +static int igt_mock_walk(void *arg) +{ + return exercise_mock(arg, walk_hole); +} + +static int igt_mock_pot(void *arg) +{ + return exercise_mock(arg, pot_hole); +} + +static int igt_mock_drunk(void *arg) +{ + return exercise_mock(arg, drunk_hole); +} + +static int igt_gtt_reserve(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + LIST_HEAD(objects); + u64 total; + int err; + + /* i915_gem_gtt_reserve() tries to reserve the precise range + * for the node, and evicts if it has to. So our test checks that + * it can give us the requsted space and prevent overlaps. + */ + + /* Start by filling the GGTT */ + for (total = 0; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + total, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != total || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 1) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + total, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + + /* Now we start forcing evictions */ + for (total = I915_GTT_PAGE_SIZE; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + total, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != total || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 2) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + total, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + + /* And then try at random */ + list_for_each_entry_safe(obj, on, &objects, st_link) { + struct i915_vma *vma; + u64 offset; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("i915_vma_unbind failed with err=%d!\n", err); + goto out; + } + + offset = random_offset(0, i915->ggtt.base.total, + 2*I915_GTT_PAGE_SIZE, + I915_GTT_MIN_ALIGNMENT); + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + offset, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != offset || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 3) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + offset, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + +out: + list_for_each_entry_safe(obj, on, &objects, st_link) { + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + return err; +} + +static int igt_gtt_insert(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + struct drm_mm_node tmp = {}; + const struct invalid_insert { + u64 size; + u64 alignment; + u64 start, end; + } invalid_insert[] = { + { + i915->ggtt.base.total + I915_GTT_PAGE_SIZE, 0, + 0, i915->ggtt.base.total, + }, + { + 2*I915_GTT_PAGE_SIZE, 0, + 0, I915_GTT_PAGE_SIZE, + }, + { + -(u64)I915_GTT_PAGE_SIZE, 0, + 0, 4*I915_GTT_PAGE_SIZE, + }, + { + -(u64)2*I915_GTT_PAGE_SIZE, 2*I915_GTT_PAGE_SIZE, + 0, 4*I915_GTT_PAGE_SIZE, + }, + { + I915_GTT_PAGE_SIZE, I915_GTT_MIN_ALIGNMENT << 1, + I915_GTT_MIN_ALIGNMENT, I915_GTT_MIN_ALIGNMENT << 1, + }, + {} + }, *ii; + LIST_HEAD(objects); + u64 total; + int err; + + /* i915_gem_gtt_insert() tries to allocate some free space in the GTT + * to the node, evicting if required. + */ + + /* Check a couple of obviously invalid requests */ + for (ii = invalid_insert; ii->size; ii++) { + err = i915_gem_gtt_insert(&i915->ggtt.base, &tmp, + ii->size, ii->alignment, + I915_COLOR_UNEVICTABLE, + ii->start, ii->end, + 0); + if (err != -ENOSPC) { + pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n", + ii->size, ii->alignment, ii->start, ii->end, + err); + return -EINVAL; + } + } + + /* Start by filling the GGTT */ + for (total = 0; + total + I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err == -ENOSPC) { + /* maxed out the GGTT space */ + i915_gem_object_put(obj); + break; + } + if (err) { + pr_err("i915_gem_gtt_insert (pass 1) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + __i915_vma_pin(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + } + + list_for_each_entry(obj, &objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + if (!drm_mm_node_allocated(&vma->node)) { + pr_err("VMA was unexpectedly evicted!\n"); + err = -EINVAL; + goto out; + } + + __i915_vma_unpin(vma); + } + + /* If we then reinsert, we should find the same hole */ + list_for_each_entry_safe(obj, on, &objects, st_link) { + struct i915_vma *vma; + u64 offset; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + offset = vma->node.start; + + err = i915_vma_unbind(vma); + if (err) { + pr_err("i915_vma_unbind failed with err=%d!\n", err); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err) { + pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != offset) { + pr_err("i915_gem_gtt_insert did not return node to its previous location (the only hole), expected address %llx, found %llx\n", + offset, vma->node.start); + err = -EINVAL; + goto out; + } + } + + /* And then force evictions */ + for (total = 0; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err) { + pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + } + +out: + list_for_each_entry_safe(obj, on, &objects, st_link) { + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + return err; +} + +int i915_gem_gtt_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_drunk), + SUBTEST(igt_mock_walk), + SUBTEST(igt_mock_pot), + SUBTEST(igt_mock_fill), + SUBTEST(igt_gtt_reserve), + SUBTEST(igt_gtt_insert), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} + +int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ppgtt_alloc), + SUBTEST(igt_ppgtt_lowlevel), + SUBTEST(igt_ppgtt_drunk), + SUBTEST(igt_ppgtt_walk), + SUBTEST(igt_ppgtt_pot), + SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ppgtt_shrink), + SUBTEST(igt_ggtt_lowlevel), + SUBTEST(igt_ggtt_drunk), + SUBTEST(igt_ggtt_walk), + SUBTEST(igt_ggtt_pot), + SUBTEST(igt_ggtt_fill), + SUBTEST(igt_ggtt_page), + }; + + GEM_BUG_ON(offset_in_page(i915->ggtt.base.total)); + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c new file mode 100644 index 000000000000..67d82bf1407f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -0,0 +1,600 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "huge_gem_object.h" + +static int igt_gem_object(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err = -ENOMEM; + + /* Basic test to ensure we can create an object */ + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_err("i915_gem_object_create failed, err=%d\n", err); + goto out; + } + + err = 0; + i915_gem_object_put(obj); +out: + return err; +} + +static int igt_phys_object(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err; + + /* Create an object and bind it to a contiguous set of physical pages, + * i.e. exercise the i915_gem_object_phys API. + */ + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_err("i915_gem_object_create failed, err=%d\n", err); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_attach_phys(obj, PAGE_SIZE); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); + goto out_obj; + } + + if (obj->ops != &i915_gem_phys_ops) { + pr_err("i915_gem_object_attach_phys did not create a phys object\n"); + err = -EINVAL; + goto out_obj; + } + + if (!atomic_read(&obj->mm.pages_pin_count)) { + pr_err("i915_gem_object_attach_phys did not pin its phys pages\n"); + err = -EINVAL; + goto out_obj; + } + + /* Make the object dirty so that put_pages must do copy back the data */ + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_set_to_gtt_domain(obj, true); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n", + err); + goto out_obj; + } + +out_obj: + i915_gem_object_put(obj); +out: + return err; +} + +static int igt_gem_huge(void *arg) +{ + const unsigned int nreal = 509; /* just to be awkward */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + unsigned int n; + int err; + + /* Basic sanitycheck of our huge fake object allocation */ + + obj = huge_gem_object(i915, + nreal * PAGE_SIZE, + i915->ggtt.base.total + PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + for (n = 0; n < obj->base.size / PAGE_SIZE; n++) { + if (i915_gem_object_get_page(obj, n) != + i915_gem_object_get_page(obj, n % nreal)) { + pr_err("Page lookup mismatch at index %u [%u]\n", + n, n % nreal); + err = -EINVAL; + goto out_unpin; + } + } + +out_unpin: + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + +struct tile { + unsigned int width; + unsigned int height; + unsigned int stride; + unsigned int size; + unsigned int tiling; + unsigned int swizzle; +}; + +static u64 swizzle_bit(unsigned int bit, u64 offset) +{ + return (offset & BIT_ULL(bit)) >> (bit - 6); +} + +static u64 tiled_offset(const struct tile *tile, u64 v) +{ + u64 x, y; + + if (tile->tiling == I915_TILING_NONE) + return v; + + y = div64_u64_rem(v, tile->stride, &x); + v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height; + + if (tile->tiling == I915_TILING_X) { + v += y * tile->width; + v += div64_u64_rem(x, tile->width, &x) << tile->size; + v += x; + } else { + const unsigned int ytile_span = 16; + const unsigned int ytile_height = 32 * ytile_span; + + v += y * ytile_span; + v += div64_u64_rem(x, ytile_span, &x) * ytile_height; + v += x; + } + + switch (tile->swizzle) { + case I915_BIT_6_SWIZZLE_9: + v ^= swizzle_bit(9, v); + break; + case I915_BIT_6_SWIZZLE_9_10: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); + break; + case I915_BIT_6_SWIZZLE_9_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); + break; + case I915_BIT_6_SWIZZLE_9_10_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); + break; + } + + return v; +} + +static int check_partial_mapping(struct drm_i915_gem_object *obj, + const struct tile *tile, + unsigned long end_time) +{ + const unsigned int nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_vma *vma; + unsigned long page; + int err; + + if (igt_timeout(end_time, + "%s: timed out before tiling=%d stride=%d\n", + __func__, tile->tiling, tile->stride)) + return -EINTR; + + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); + if (err) + return err; + + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); + + for_each_prime_number_from(page, 1, npages) { + struct i915_ggtt_view view = + compute_partial_view(obj, page, MIN_CHUNK_PAGES); + u32 __iomem *io; + struct page *p; + unsigned int n; + u64 offset; + u32 *cpu; + + GEM_BUG_ON(view.partial.size > nreal); + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + pr_err("Failed to pin partial view: offset=%lu\n", + page); + return PTR_ERR(vma); + } + + n = page - view.partial.offset; + GEM_BUG_ON(n >= view.partial.size); + + io = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(io)) { + pr_err("Failed to iomap partial view: offset=%lu\n", + page); + return PTR_ERR(io); + } + + err = i915_vma_get_fence(vma); + if (err) { + pr_err("Failed to get fence for partial view: offset=%lu\n", + page); + i915_vma_unpin_iomap(vma); + return err; + } + + iowrite32(page, io + n * PAGE_SIZE/sizeof(*io)); + i915_vma_unpin_iomap(vma); + + offset = tiled_offset(tile, page << PAGE_SHIFT); + if (offset >= obj->base.size) + continue; + + i915_gem_object_flush_gtt_write_domain(obj); + + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + cpu = kmap(p) + offset_in_page(offset); + drm_clflush_virt_range(cpu, sizeof(*cpu)); + if (*cpu != (u32)page) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile_row_pages(obj), + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, + offset >> PAGE_SHIFT, + (unsigned int)offset_in_page(offset), + offset, + (u32)page, *cpu); + err = -EINVAL; + } + *cpu = 0; + drm_clflush_virt_range(cpu, sizeof(*cpu)); + kunmap(p); + if (err) + return err; + } + + return 0; +} + +static int igt_partial_tiling(void *arg) +{ + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int tiling; + int err; + + /* We want to check the page mapping and fencing of a large object + * mmapped through the GTT. The object we create is larger than can + * possibly be mmaped as a whole, and so we must use partial GGTT vma. + * We then check that a write through each partial GGTT vma ends up + * in the right set of pages within the object, and with the expected + * tiling, which we verify by manual swizzling. + */ + + obj = huge_gem_object(i915, + nreal << PAGE_SHIFT, + (1 + next_prime_number(i915->ggtt.base.total >> PAGE_SHIFT)) << PAGE_SHIFT); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + + if (1) { + IGT_TIMEOUT(end); + struct tile tile; + + tile.height = 1; + tile.width = 1; + tile.size = 0; + tile.stride = 0; + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; + tile.tiling = I915_TILING_NONE; + + err = check_partial_mapping(obj, &tile, end); + if (err && err != -EINTR) + goto out_unlock; + } + + for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) { + IGT_TIMEOUT(end); + unsigned int max_pitch; + unsigned int pitch; + struct tile tile; + + tile.tiling = tiling; + switch (tiling) { + case I915_TILING_X: + tile.swizzle = i915->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + tile.swizzle = i915->mm.bit_6_swizzle_y; + break; + } + + if (tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN || + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + continue; + + if (INTEL_GEN(i915) <= 2) { + tile.height = 16; + tile.width = 128; + tile.size = 11; + } else if (tile.tiling == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(i915)) { + tile.height = 32; + tile.width = 128; + tile.size = 12; + } else { + tile.height = 8; + tile.width = 512; + tile.size = 12; + } + + if (INTEL_GEN(i915) < 4) + max_pitch = 8192 / tile.width; + else if (INTEL_GEN(i915) < 7) + max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; + else + max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; + + for (pitch = max_pitch; pitch; pitch >>= 1) { + tile.stride = tile.width * pitch; + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + + if (pitch > 2 && INTEL_GEN(i915) >= 4) { + tile.stride = tile.width * (pitch - 1); + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + + if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { + tile.stride = tile.width * (pitch + 1); + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + } + + if (INTEL_GEN(i915) >= 4) { + for_each_prime_number(pitch, max_pitch) { + tile.stride = tile.width * pitch; + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + } + +next_tiling: ; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + +static int make_obj_busy(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + i915_vma_move_to_active(vma, rq, 0); + i915_add_request(rq); + + i915_gem_object_set_active_reference(obj); + i915_vma_unpin(vma); + return 0; +} + +static bool assert_mmap_offset(struct drm_i915_private *i915, + unsigned long size, + int expected) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_create_mmap_offset(obj); + i915_gem_object_put(obj); + + return err == expected; +} + +static int igt_mmap_offset_exhaustion(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; + struct drm_i915_gem_object *obj; + struct drm_mm_node resv, *hole; + u64 hole_start, hole_end; + int loop, err; + + /* Trim the device mmap space to only a page */ + memset(&resv, 0, sizeof(resv)); + drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { + resv.start = hole_start; + resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ + err = drm_mm_reserve_node(mm, &resv); + if (err) { + pr_err("Failed to trim VMA manager, err=%d\n", err); + return err; + } + break; + } + + /* Just fits! */ + if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { + pr_err("Unable to insert object into single page hole\n"); + err = -EINVAL; + goto out; + } + + /* Too large */ + if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) { + pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); + err = -EINVAL; + goto out; + } + + /* Fill the hole, further allocation attempts should then fail */ + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_create_mmap_offset(obj); + if (err) { + pr_err("Unable to insert object into reclaimed hole\n"); + goto err_obj; + } + + if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { + pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); + err = -EINVAL; + goto err_obj; + } + + i915_gem_object_put(obj); + + /* Now fill with busy dead objects that we expect to reap */ + for (loop = 0; loop < 3; loop++) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + err = make_obj_busy(obj); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("[loop %d] Failed to busy the object\n", loop); + goto err_obj; + } + + GEM_BUG_ON(!i915_gem_object_is_active(obj)); + err = i915_gem_object_create_mmap_offset(obj); + if (err) { + pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n", + loop, err); + goto out; + } + } + +out: + drm_mm_remove_node(&resv); + return err; +err_obj: + i915_gem_object_put(obj); + goto out; +} + +int i915_gem_object_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_object), + SUBTEST(igt_phys_object), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_unref(&i915->drm); + return err; +} + +int i915_gem_object_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_huge), + SUBTEST(igt_partial_tiling), + SUBTEST(igt_mmap_offset_exhaustion), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c new file mode 100644 index 000000000000..98b7aac41eec --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -0,0 +1,882 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" + +#include "mock_context.h" +#include "mock_gem_device.h" + +static int igt_add_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -ENOMEM; + + /* Basic preliminary test to create a request and let it loose! */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], + i915->kernel_context, + HZ / 10); + if (!request) + goto out_unlock; + + i915_add_request(request); + + err = 0; +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_wait_request(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -EINVAL; + + /* Submit a request, then wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) != -ETIME) { + pr_err("request wait succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_gem_request_completed(request)) { + pr_err("request completed before submit!!\n"); + goto out_unlock; + } + + i915_add_request(request); + + if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); + goto out_unlock; + } + + if (i915_gem_request_completed(request)) { + pr_err("request completed immediately!\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { + pr_err("request wait succeeded (expected timeout!)\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out!\n"); + goto out_unlock; + } + + if (!i915_gem_request_completed(request)) { + pr_err("request not complete after waiting!\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out when already complete!\n"); + goto out_unlock; + } + + err = 0; +out_unlock: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_fence_wait(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -EINVAL; + + /* Submit a request, treat it as a fence and wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_locked; + } + mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */ + + if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { + pr_err("fence wait success before submit (expected timeout)!\n"); + goto out_device; + } + + mutex_lock(&i915->drm.struct_mutex); + i915_add_request(request); + mutex_unlock(&i915->drm.struct_mutex); + + if (dma_fence_is_signaled(&request->fence)) { + pr_err("fence signaled immediately!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { + pr_err("fence wait success after submit (expected timeout)!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out (expected success)!\n"); + goto out_device; + } + + if (!dma_fence_is_signaled(&request->fence)) { + pr_err("fence unsignaled after waiting!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out when complete (expected success)!\n"); + goto out_device; + } + + err = 0; +out_device: + mutex_lock(&i915->drm.struct_mutex); +out_locked: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_request_rewind(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request, *vip; + struct i915_gem_context *ctx[2]; + int err = -EINVAL; + + mutex_lock(&i915->drm.struct_mutex); + ctx[0] = mock_context(i915, "A"); + request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); + if (!request) { + err = -ENOMEM; + goto err_context_0; + } + + i915_gem_request_get(request); + i915_add_request(request); + + ctx[1] = mock_context(i915, "B"); + vip = mock_request(i915->engine[RCS], ctx[1], 0); + if (!vip) { + err = -ENOMEM; + goto err_context_1; + } + + /* Simulate preemption by manual reordering */ + if (!mock_cancel_request(request)) { + pr_err("failed to cancel request (already executed)!\n"); + i915_add_request(vip); + goto err_context_1; + } + i915_gem_request_get(vip); + i915_add_request(vip); + request->engine->submit_request(request); + + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_wait_request(vip, 0, HZ) == -ETIME) { + pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", + vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); + goto err; + } + + if (i915_gem_request_completed(request)) { + pr_err("low priority request already completed\n"); + goto err; + } + + err = 0; +err: + i915_gem_request_put(vip); + mutex_lock(&i915->drm.struct_mutex); +err_context_1: + mock_context_close(ctx[1]); + i915_gem_request_put(request); +err_context_0: + mock_context_close(ctx[0]); + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_gem_request_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_add_request), + SUBTEST(igt_wait_request), + SUBTEST(igt_fence_wait), + SUBTEST(igt_request_rewind), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + drm_dev_unref(&i915->drm); + + return err; +} + +struct live_test { + struct drm_i915_private *i915; + const char *func; + const char *name; + + unsigned int reset_count; +}; + +static int begin_live_test(struct live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name) +{ + int err; + + t->i915 = i915; + t->func = func; + t->name = name; + + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } + + i915->gpu_error.missed_irq_rings = 0; + t->reset_count = i915_reset_count(&i915->gpu_error); + + return 0; +} + +static int end_live_test(struct live_test *t) +{ + struct drm_i915_private *i915 = t->i915; + + i915_gem_retire_requests(i915); + + if (wait_for(intel_engines_are_idle(i915), 10)) { + pr_err("%s(%s): GPU not idle\n", t->func, t->name); + return -EIO; + } + + if (t->reset_count != i915_reset_count(&i915->gpu_error)) { + pr_err("%s(%s): GPU was reset %d times!\n", + t->func, t->name, + i915_reset_count(&i915->gpu_error) - t->reset_count); + return -EIO; + } + + if (i915->gpu_error.missed_irq_rings) { + pr_err("%s(%s): Missed interrupts on engines %lx\n", + t->func, t->name, i915->gpu_error.missed_irq_rings); + return -EIO; + } + + return 0; +} + +static int live_nop_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct drm_i915_gem_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_unlock; + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_unlock; + } + + /* This space is left intentionally blank. + * + * We do not actually want to perform any + * action with this request, we just want + * to measure the latency in allocation + * and submission of our breadcrumbs - + * ensuring that the bare request is sufficient + * for the system to work (i.e. proper HEAD + * tracking of the rings, interrupt handling, + * etc). It also gives us the lowest bounds + * for latency. + */ + + i915_add_request(request); + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_unlock; + + pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static struct i915_vma *empty_batch(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static struct drm_i915_gem_request * +empty_request(struct intel_engine_cs *engine, + struct i915_vma *batch) +{ + struct drm_i915_gem_request *request; + int err; + + request = i915_gem_request_alloc(engine, + engine->i915->kernel_context); + if (IS_ERR(request)) + return request; + + err = engine->emit_flush(request, EMIT_INVALIDATE); + if (err) + goto out_request; + + err = i915_switch_context(request); + if (err) + goto out_request; + + err = engine->emit_bb_start(request, + batch->node.start, + batch->node.size, + I915_DISPATCH_SECURE); + if (err) + goto out_request; + +out_request: + __i915_add_request(request, err == 0); + return err ? ERR_PTR(err) : request; +} + +static int live_empty_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + struct i915_vma *batch; + unsigned int id; + int err = 0; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + batch = empty_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct drm_i915_gem_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_batch; + + /* Warmup / preload */ + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_batch; + + pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_batch: + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static struct i915_vma *recursive_batch(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx = i915->kernel_context; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(i915); + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + goto err; + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + if (gen >= 8) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = upper_32_bits(vma->node.start); + } else if (gen >= 6) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; + *cmd++ = lower_32_bits(vma->node.start); + } else if (gen >= 4) { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cmd++ = lower_32_bits(vma->node.start); + } else { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | 1; + *cmd++ = lower_32_bits(vma->node.start); + } + *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ + + wmb(); + i915_gem_object_unpin_map(obj); + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int recursive_batch_resolve(struct i915_vma *batch) +{ + u32 *cmd; + + cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); + + *cmd = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_object_unpin_map(batch->obj); + + return 0; +} + +static int live_all_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct drm_i915_gem_request *request[I915_NUM_ENGINES]; + struct i915_vma *batch; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines simultaneously. We + * send a recursive batch to each engine - checking that we don't + * block doing so, and that they don't complete too soon. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch, err=%d\n", __func__, err); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + request[id] = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed with err=%d\n", + __func__, err); + goto out_request; + } + + err = engine->emit_flush(request[id], EMIT_INVALIDATE); + GEM_BUG_ON(err); + + err = i915_switch_context(request[id]); + GEM_BUG_ON(err); + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + if (!i915_gem_object_has_active_reference(batch->obj)) { + i915_gem_object_get(batch->obj); + i915_gem_object_set_active_reference(batch->obj); + } + + i915_vma_move_to_active(batch, request[id], 0); + i915_gem_request_get(request[id]); + i915_add_request(request[id]); + } + + for_each_engine(engine, i915, id) { + if (i915_gem_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + } + + err = recursive_batch_resolve(batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); + goto out_request; + } + + for_each_engine(engine, i915, id) { + long timeout; + + timeout = i915_wait_request(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_gem_request_completed(request[id])); + i915_gem_request_put(request[id]); + request[id] = NULL; + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) + if (request[id]) + i915_gem_request_put(request[id]); + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_sequential_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request[I915_NUM_ENGINES] = {}; + struct drm_i915_gem_request *prev = NULL; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines sequentially, such + * that each successive request waits for the earlier ones. This + * tests that we don't execute requests out of order, even though + * they are running on independent engines. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + for_each_engine(engine, i915, id) { + struct i915_vma *batch; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch for %s, err=%d\n", + __func__, engine->name, err); + goto out_unlock; + } + + request[id] = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + if (prev) { + err = i915_gem_request_await_dma_fence(request[id], + &prev->fence); + if (err) { + i915_add_request(request[id]); + pr_err("%s: Request await failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + } + + err = engine->emit_flush(request[id], EMIT_INVALIDATE); + GEM_BUG_ON(err); + + err = i915_switch_context(request[id]); + GEM_BUG_ON(err); + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + i915_vma_move_to_active(batch, request[id], 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_get(batch); + + i915_gem_request_get(request[id]); + i915_add_request(request[id]); + + prev = request[id]; + } + + for_each_engine(engine, i915, id) { + long timeout; + + if (i915_gem_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + + err = recursive_batch_resolve(request[id]->batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", + __func__, err); + goto out_request; + } + + timeout = i915_wait_request(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_gem_request_completed(request[id])); + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) { + u32 *cmd; + + if (!request[id]) + break; + + cmd = i915_gem_object_pin_map(request[id]->batch->obj, + I915_MAP_WC); + if (!IS_ERR(cmd)) { + *cmd = MI_BATCH_BUFFER_END; + wmb(); + i915_gem_object_unpin_map(request[id]->batch->obj); + } + + i915_vma_put(request[id]->batch); + i915_gem_request_put(request[id]); + } +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_gem_request_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_nop_request), + SUBTEST(live_all_engines), + SUBTEST(live_sequential_engines), + SUBTEST(live_empty_request), + }; + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h new file mode 100644 index 000000000000..18b174d855ca --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -0,0 +1,19 @@ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as subtest__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * The function should be of type int function(void). It may be conditionally + * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * + * Tests are executed in order by igt/drv_selftest + */ +selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ +selftest(uncore, intel_uncore_live_selftests) +selftest(requests, i915_gem_request_live_selftests) +selftest(objects, i915_gem_object_live_selftests) +selftest(dmabuf, i915_gem_dmabuf_live_selftests) +selftest(coherency, i915_gem_coherency_live_selftests) +selftest(gtt, i915_gem_gtt_live_selftests) +selftest(contexts, i915_gem_context_live_selftests) +selftest(hangcheck, intel_hangcheck_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h new file mode 100644 index 000000000000..be9a9ebf5692 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -0,0 +1,20 @@ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as subtest__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * The function should be of type int function(void). It may be conditionally + * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * + * Tests are executed in order by igt/drv_selftest + */ +selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ +selftest(scatterlist, scatterlist_mock_selftests) +selftest(uncore, intel_uncore_mock_selftests) +selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) +selftest(requests, i915_gem_request_mock_selftests) +selftest(objects, i915_gem_object_mock_selftests) +selftest(dmabuf, i915_gem_dmabuf_mock_selftests) +selftest(vma, i915_vma_mock_selftests) +selftest(evict, i915_gem_evict_mock_selftests) +selftest(gtt, i915_gem_gtt_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c new file mode 100644 index 000000000000..c17c83c30637 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -0,0 +1,63 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include "i915_random.h" + +static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) +{ + return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); +} + +void i915_random_reorder(unsigned int *order, unsigned int count, + struct rnd_state *state) +{ + unsigned int i, j; + + for (i = 0; i < count; i++) { + BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32)); + j = i915_prandom_u32_max_state(count, state); + swap(order[i], order[j]); + } +} + +unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) +{ + unsigned int *order, i; + + order = kmalloc_array(count, sizeof(*order), GFP_TEMPORARY); + if (!order) + return order; + + for (i = 0; i < count; i++) + order[i] = i; + + i915_random_reorder(order, count, state); + return order; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h new file mode 100644 index 000000000000..b9c334ce6cd9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -0,0 +1,50 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_SELFTESTS_RANDOM_H__ +#define __I915_SELFTESTS_RANDOM_H__ + +#include <linux/random.h> + +#include "../i915_selftest.h" + +#define I915_RND_STATE_INITIALIZER(x) ({ \ + struct rnd_state state__; \ + prandom_seed_state(&state__, (x)); \ + state__; \ +}) + +#define I915_RND_STATE(name__) \ + struct rnd_state name__ = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed) + +#define I915_RND_SUBSTATE(name__, parent__) \ + struct rnd_state name__ = I915_RND_STATE_INITIALIZER(prandom_u32_state(&(parent__))) + +unsigned int *i915_random_order(unsigned int count, + struct rnd_state *state); +void i915_random_reorder(unsigned int *order, + unsigned int count, + struct rnd_state *state); + +#endif /* !__I915_SELFTESTS_RANDOM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c new file mode 100644 index 000000000000..addc5a599c4a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -0,0 +1,250 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/random.h> + +#include "../i915_drv.h" +#include "../i915_selftest.h" + +struct i915_selftest i915_selftest __read_mostly = { + .timeout_ms = 1000, +}; + +int i915_mock_sanitycheck(void) +{ + pr_info(DRIVER_NAME ": %s() - ok!\n", __func__); + return 0; +} + +int i915_live_sanitycheck(struct drm_i915_private *i915) +{ + pr_info("%s: %s() - ok!\n", i915->drm.driver->name, __func__); + return 0; +} + +enum { +#define selftest(name, func) mock_##name, +#include "i915_mock_selftests.h" +#undef selftest +}; + +enum { +#define selftest(name, func) live_##name, +#include "i915_live_selftests.h" +#undef selftest +}; + +struct selftest { + bool enabled; + const char *name; + union { + int (*mock)(void); + int (*live)(struct drm_i915_private *); + }; +}; + +#define selftest(n, f) [mock_##n] = { .name = #n, { .mock = f } }, +static struct selftest mock_selftests[] = { +#include "i915_mock_selftests.h" +}; +#undef selftest + +#define selftest(n, f) [live_##n] = { .name = #n, { .live = f } }, +static struct selftest live_selftests[] = { +#include "i915_live_selftests.h" +}; +#undef selftest + +/* Embed the line number into the parameter name so that we can order tests */ +#define selftest(n, func) selftest_0(n, func, param(n)) +#define param(n) __PASTE(igt__, __PASTE(__LINE__, __mock_##n)) +#define selftest_0(n, func, id) \ +module_param_named(id, mock_selftests[mock_##n].enabled, bool, 0400); +#include "i915_mock_selftests.h" +#undef selftest_0 +#undef param + +#define param(n) __PASTE(igt__, __PASTE(__LINE__, __live_##n)) +#define selftest_0(n, func, id) \ +module_param_named(id, live_selftests[live_##n].enabled, bool, 0400); +#include "i915_live_selftests.h" +#undef selftest_0 +#undef param +#undef selftest + +static void set_default_test_all(struct selftest *st, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) + if (st[i].enabled) + return; + + for (i = 0; i < count; i++) + st[i].enabled = true; +} + +static int __run_selftests(const char *name, + struct selftest *st, + unsigned int count, + void *data) +{ + int err = 0; + + while (!i915_selftest.random_seed) + i915_selftest.random_seed = get_random_int(); + + i915_selftest.timeout_jiffies = + i915_selftest.timeout_ms ? + msecs_to_jiffies_timeout(i915_selftest.timeout_ms) : + MAX_SCHEDULE_TIMEOUT; + + set_default_test_all(st, count); + + pr_info(DRIVER_NAME ": Performing %s selftests with st_random_seed=0x%x st_timeout=%u\n", + name, i915_selftest.random_seed, i915_selftest.timeout_ms); + + /* Tests are listed in order in i915_*_selftests.h */ + for (; count--; st++) { + if (!st->enabled) + continue; + + cond_resched(); + if (signal_pending(current)) + return -EINTR; + + pr_debug(DRIVER_NAME ": Running %s\n", st->name); + if (data) + err = st->live(data); + else + err = st->mock(); + if (err == -EINTR && !signal_pending(current)) + err = 0; + if (err) + break; + } + + if (WARN(err > 0 || err == -ENOTTY, + "%s returned %d, conflicting with selftest's magic values!\n", + st->name, err)) + err = -1; + + return err; +} + +#define run_selftests(x, data) \ + __run_selftests(#x, x##_selftests, ARRAY_SIZE(x##_selftests), data) + +int i915_mock_selftests(void) +{ + int err; + + if (!i915_selftest.mock) + return 0; + + err = run_selftests(mock, NULL); + if (err) { + i915_selftest.mock = err; + return err; + } + + if (i915_selftest.mock < 0) { + i915_selftest.mock = -ENOTTY; + return 1; + } + + return 0; +} + +int i915_live_selftests(struct pci_dev *pdev) +{ + int err; + + if (!i915_selftest.live) + return 0; + + err = run_selftests(live, to_i915(pci_get_drvdata(pdev))); + if (err) { + i915_selftest.live = err; + return err; + } + + if (i915_selftest.live < 0) { + i915_selftest.live = -ENOTTY; + return 1; + } + + return 0; +} + +int __i915_subtests(const char *caller, + const struct i915_subtest *st, + unsigned int count, + void *data) +{ + int err; + + for (; count--; st++) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + + pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name); + err = st->func(data); + if (err && err != -EINTR) { + pr_err(DRIVER_NAME "/%s: %s failed with error %d\n", + caller, st->name, err); + return err; + } + } + + return 0; +} + +bool __igt_timeout(unsigned long timeout, const char *fmt, ...) +{ + va_list va; + + if (!signal_pending(current)) { + cond_resched(); + if (time_before(jiffies, timeout)) + return false; + } + + if (fmt) { + va_start(va, fmt); + vprintk(fmt, va); + va_end(va); + } + + return true; +} + +module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400); +module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); + +module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); +MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); + +module_param_named_unsafe(live_selftests, i915_selftest.live, int, 0400); +MODULE_PARM_DESC(live_selftests, "Run selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)"); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c new file mode 100644 index 000000000000..ad56566e24db --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -0,0 +1,746 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "mock_context.h" + +static bool assert_vma(struct i915_vma *vma, + struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx) +{ + bool ok = true; + + if (vma->vm != &ctx->ppgtt->base) { + pr_err("VMA created with wrong VM\n"); + ok = false; + } + + if (vma->size != obj->base.size) { + pr_err("VMA created with wrong size, found %llu, expected %zu\n", + vma->size, obj->base.size); + ok = false; + } + + if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) { + pr_err("VMA created with wrong type [%d]\n", + vma->ggtt_view.type); + ok = false; + } + + return ok; +} + +static struct i915_vma * +checked_vma_instance(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + struct i915_ggtt_view *view) +{ + struct i915_vma *vma; + bool ok = true; + + vma = i915_vma_instance(obj, vm, view); + if (IS_ERR(vma)) + return vma; + + /* Manual checks, will be reinforced by i915_vma_compare! */ + if (vma->vm != vm) { + pr_err("VMA's vm [%p] does not match request [%p]\n", + vma->vm, vm); + ok = false; + } + + if (i915_is_ggtt(vm) != i915_vma_is_ggtt(vma)) { + pr_err("VMA ggtt status [%d] does not match parent [%d]\n", + i915_vma_is_ggtt(vma), i915_is_ggtt(vm)); + ok = false; + } + + if (i915_vma_compare(vma, vm, view)) { + pr_err("i915_vma_compare failed with create parmaters!\n"); + return ERR_PTR(-EINVAL); + } + + if (i915_vma_compare(vma, vma->vm, + i915_vma_is_ggtt(vma) ? &vma->ggtt_view : NULL)) { + pr_err("i915_vma_compare failed with itself\n"); + return ERR_PTR(-EINVAL); + } + + if (!ok) { + pr_err("i915_vma_compare failed to detect the difference!\n"); + return ERR_PTR(-EINVAL); + } + + return vma; +} + +static int create_vmas(struct drm_i915_private *i915, + struct list_head *objects, + struct list_head *contexts) +{ + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + int pinned; + + list_for_each_entry(obj, objects, st_link) { + for (pinned = 0; pinned <= 1; pinned++) { + list_for_each_entry(ctx, contexts, link) { + struct i915_address_space *vm = + &ctx->ppgtt->base; + struct i915_vma *vma; + int err; + + vma = checked_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + if (!assert_vma(vma, obj, ctx)) { + pr_err("VMA lookup/create failed\n"); + return -EINVAL; + } + + if (!pinned) { + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + pr_err("Failed to pin VMA\n"); + return err; + } + } else { + i915_vma_unpin(vma); + } + } + } + } + + return 0; +} + +static int igt_vma_create(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + struct i915_gem_context *ctx, *cn; + unsigned long num_obj, num_ctx; + unsigned long no, nc; + IGT_TIMEOUT(end_time); + LIST_HEAD(contexts); + LIST_HEAD(objects); + int err; + + /* Exercise creating many vma amonst many objections, checking the + * vma creation and lookup routines. + */ + + no = 0; + for_each_prime_number(num_obj, ULONG_MAX - 1) { + for (; no < num_obj; no++) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + list_add(&obj->st_link, &objects); + } + + nc = 0; + for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) { + for (; nc < num_ctx; nc++) { + ctx = mock_context(i915, "mock"); + if (!ctx) + goto out; + + list_move(&ctx->link, &contexts); + } + + err = create_vmas(i915, &objects, &contexts); + if (err) + goto out; + + if (igt_timeout(end_time, + "%s timed out: after %lu objects in %lu contexts\n", + __func__, no, nc)) + goto end; + } + + list_for_each_entry_safe(ctx, cn, &contexts, link) + mock_context_close(ctx); + } + +end: + /* Final pass to lookup all created contexts */ + err = create_vmas(i915, &objects, &contexts); +out: + list_for_each_entry_safe(ctx, cn, &contexts, link) + mock_context_close(ctx); + + list_for_each_entry_safe(obj, on, &objects, st_link) + i915_gem_object_put(obj); + return err; +} + +struct pin_mode { + u64 size; + u64 flags; + bool (*assert)(const struct i915_vma *, + const struct pin_mode *mode, + int result); + const char *string; +}; + +static bool assert_pin_valid(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + if (result) + return false; + + if (i915_vma_misplaced(vma, mode->size, 0, mode->flags)) + return false; + + return true; +} + +__maybe_unused +static bool assert_pin_e2big(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -E2BIG; +} + +__maybe_unused +static bool assert_pin_enospc(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -ENOSPC; +} + +__maybe_unused +static bool assert_pin_einval(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -EINVAL; +} + +static int igt_vma_pin1(void *arg) +{ + struct drm_i915_private *i915 = arg; + const struct pin_mode modes[] = { +#define VALID(sz, fl) { .size = (sz), .flags = (fl), .assert = assert_pin_valid, .string = #sz ", " #fl ", (valid) " } +#define __INVALID(sz, fl, check, eval) { .size = (sz), .flags = (fl), .assert = (check), .string = #sz ", " #fl ", (invalid " #eval ")" } +#define INVALID(sz, fl) __INVALID(sz, fl, assert_pin_einval, EINVAL) +#define TOOBIG(sz, fl) __INVALID(sz, fl, assert_pin_e2big, E2BIG) +#define NOSPACE(sz, fl) __INVALID(sz, fl, assert_pin_enospc, ENOSPC) + VALID(0, PIN_GLOBAL), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE), + + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 4096), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 8192), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.base.total - 4096)), + + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), + INVALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | i915->ggtt.mappable_end), + VALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | i915->ggtt.base.total), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | round_down(U64_MAX, PAGE_SIZE)), + + VALID(4096, PIN_GLOBAL), + VALID(8192, PIN_GLOBAL), + VALID(i915->ggtt.mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(i915->ggtt.mappable_end, PIN_GLOBAL | PIN_MAPPABLE), + TOOBIG(i915->ggtt.mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(i915->ggtt.base.total - 4096, PIN_GLOBAL), + VALID(i915->ggtt.base.total, PIN_GLOBAL), + TOOBIG(i915->ggtt.base.total + 4096, PIN_GLOBAL), + TOOBIG(round_down(U64_MAX, PAGE_SIZE), PIN_GLOBAL), + INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (round_down(U64_MAX, PAGE_SIZE) - 4096)), + + VALID(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + +#if !IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) + /* Misusing BIAS is a programming error (it is not controllable + * from userspace) so when debugging is enabled, it explodes. + * However, the tests are still quite interesting for checking + * variable start, end and size. + */ + NOSPACE(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | i915->ggtt.mappable_end), + NOSPACE(0, PIN_GLOBAL | PIN_OFFSET_BIAS | i915->ggtt.base.total), + NOSPACE(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + NOSPACE(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.base.total - 4096)), +#endif + { }, +#undef NOSPACE +#undef TOOBIG +#undef INVALID +#undef __INVALID +#undef VALID + }, *m; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err = -EINVAL; + + /* Exercise all the weird and wonderful i915_vma_pin requests, + * focusing on error handling of boundary conditions. + */ + + GEM_BUG_ON(!drm_mm_clean(&i915->ggtt.base.mm)); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = checked_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + goto out; + + for (m = modes; m->assert; m++) { + err = i915_vma_pin(vma, m->size, 0, m->flags); + if (!m->assert(vma, m, err)) { + pr_err("%s to pin single page into GGTT with mode[%d:%s]: size=%llx flags=%llx, err=%d\n", + m->assert == assert_pin_valid ? "Failed" : "Unexpectedly succeeded", + (int)(m - modes), m->string, m->size, m->flags, + err); + if (!err) + i915_vma_unpin(vma); + err = -EINVAL; + goto out; + } + + if (!err) { + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + if (err) { + pr_err("Failed to unbind single page from GGTT, err=%d\n", err); + goto out; + } + } + } + + err = 0; +out: + i915_gem_object_put(obj); + return err; +} + +static unsigned long rotated_index(const struct intel_rotation_info *r, + unsigned int n, + unsigned int x, + unsigned int y) +{ + return (r->plane[n].stride * (r->plane[n].height - y - 1) + + r->plane[n].offset + x); +} + +static struct scatterlist * +assert_rotated(struct drm_i915_gem_object *obj, + const struct intel_rotation_info *r, unsigned int n, + struct scatterlist *sg) +{ + unsigned int x, y; + + for (x = 0; x < r->plane[n].width; x++) { + for (y = 0; y < r->plane[n].height; y++) { + unsigned long src_idx; + dma_addr_t src; + + if (!sg) { + pr_err("Invalid sg table: too short at plane %d, (%d, %d)!\n", + n, x, y); + return ERR_PTR(-EINVAL); + } + + src_idx = rotated_index(r, n, x, y); + src = i915_gem_object_get_dma_address(obj, src_idx); + + if (sg_dma_len(sg) != PAGE_SIZE) { + pr_err("Invalid sg.length, found %d, expected %lu for rotated page (%d, %d) [src index %lu]\n", + sg_dma_len(sg), PAGE_SIZE, + x, y, src_idx); + return ERR_PTR(-EINVAL); + } + + if (sg_dma_address(sg) != src) { + pr_err("Invalid address for rotated page (%d, %d) [src index %lu]\n", + x, y, src_idx); + return ERR_PTR(-EINVAL); + } + + sg = sg_next(sg); + } + } + + return sg; +} + +static unsigned int rotated_size(const struct intel_rotation_plane_info *a, + const struct intel_rotation_plane_info *b) +{ + return a->width * a->height + b->width * b->height; +} + +static int igt_vma_rotate(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm = &i915->ggtt.base; + struct drm_i915_gem_object *obj; + const struct intel_rotation_plane_info planes[] = { + { .width = 1, .height = 1, .stride = 1 }, + { .width = 2, .height = 2, .stride = 2 }, + { .width = 4, .height = 4, .stride = 4 }, + { .width = 8, .height = 8, .stride = 8 }, + + { .width = 3, .height = 5, .stride = 3 }, + { .width = 3, .height = 5, .stride = 4 }, + { .width = 3, .height = 5, .stride = 5 }, + + { .width = 5, .height = 3, .stride = 5 }, + { .width = 5, .height = 3, .stride = 7 }, + { .width = 5, .height = 3, .stride = 9 }, + + { .width = 4, .height = 6, .stride = 6 }, + { .width = 6, .height = 4, .stride = 6 }, + { } + }, *a, *b; + const unsigned int max_pages = 64; + int err = -ENOMEM; + + /* Create VMA for many different combinations of planes and check + * that the page layout within the rotated VMA match our expectations. + */ + + obj = i915_gem_object_create_internal(i915, max_pages * PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + for (a = planes; a->width; a++) { + for (b = planes + ARRAY_SIZE(planes); b-- != planes; ) { + struct i915_ggtt_view view; + unsigned int n, max_offset; + + max_offset = max(a->stride * a->height, + b->stride * b->height); + GEM_BUG_ON(max_offset > max_pages); + max_offset = max_pages - max_offset; + + view.type = I915_GGTT_VIEW_ROTATED; + view.rotated.plane[0] = *a; + view.rotated.plane[1] = *b; + + for_each_prime_number_from(view.rotated.plane[0].offset, 0, max_offset) { + for_each_prime_number_from(view.rotated.plane[1].offset, 0, max_offset) { + struct scatterlist *sg; + struct i915_vma *vma; + + vma = checked_vma_instance(obj, vm, &view); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + pr_err("Failed to pin VMA, err=%d\n", err); + goto out_object; + } + + if (vma->size != rotated_size(a, b) * PAGE_SIZE) { + pr_err("VMA is wrong size, expected %lu, found %llu\n", + PAGE_SIZE * rotated_size(a, b), vma->size); + err = -EINVAL; + goto out_object; + } + + if (vma->pages->nents != rotated_size(a, b)) { + pr_err("sg table is wrong sizeo, expected %u, found %u nents\n", + rotated_size(a, b), vma->pages->nents); + err = -EINVAL; + goto out_object; + } + + if (vma->node.size < vma->size) { + pr_err("VMA binding too small, expected %llu, found %llu\n", + vma->size, vma->node.size); + err = -EINVAL; + goto out_object; + } + + if (vma->pages == obj->mm.pages) { + pr_err("VMA using unrotated object pages!\n"); + err = -EINVAL; + goto out_object; + } + + sg = vma->pages->sgl; + for (n = 0; n < ARRAY_SIZE(view.rotated.plane); n++) { + sg = assert_rotated(obj, &view.rotated, n, sg); + if (IS_ERR(sg)) { + pr_err("Inconsistent VMA pages for plane %d: [(%d, %d, %d, %d), (%d, %d, %d, %d)]\n", n, + view.rotated.plane[0].width, + view.rotated.plane[0].height, + view.rotated.plane[0].stride, + view.rotated.plane[0].offset, + view.rotated.plane[1].width, + view.rotated.plane[1].height, + view.rotated.plane[1].stride, + view.rotated.plane[1].offset); + err = -EINVAL; + goto out_object; + } + } + + i915_vma_unpin(vma); + } + } + } + } + +out_object: + i915_gem_object_put(obj); +out: + return err; +} + +static bool assert_partial(struct drm_i915_gem_object *obj, + struct i915_vma *vma, + unsigned long offset, + unsigned long size) +{ + struct sgt_iter sgt; + dma_addr_t dma; + + for_each_sgt_dma(dma, sgt, vma->pages) { + dma_addr_t src; + + if (!size) { + pr_err("Partial scattergather list too long\n"); + return false; + } + + src = i915_gem_object_get_dma_address(obj, offset); + if (src != dma) { + pr_err("DMA mismatch for partial page offset %lu\n", + offset); + return false; + } + + offset++; + size--; + } + + return true; +} + +static bool assert_pin(struct i915_vma *vma, + struct i915_ggtt_view *view, + u64 size, + const char *name) +{ + bool ok = true; + + if (vma->size != size) { + pr_err("(%s) VMA is wrong size, expected %llu, found %llu\n", + name, size, vma->size); + ok = false; + } + + if (vma->node.size < vma->size) { + pr_err("(%s) VMA binding too small, expected %llu, found %llu\n", + name, vma->size, vma->node.size); + ok = false; + } + + if (view && view->type != I915_GGTT_VIEW_NORMAL) { + if (memcmp(&vma->ggtt_view, view, sizeof(*view))) { + pr_err("(%s) VMA mismatch upon creation!\n", + name); + ok = false; + } + + if (vma->pages == vma->obj->mm.pages) { + pr_err("(%s) VMA using original object pages!\n", + name); + ok = false; + } + } else { + if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) { + pr_err("Not the normal ggtt view! Found %d\n", + vma->ggtt_view.type); + ok = false; + } + + if (vma->pages != vma->obj->mm.pages) { + pr_err("VMA not using object pages!\n"); + ok = false; + } + } + + return ok; +} + +static int igt_vma_partial(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm = &i915->ggtt.base; + const unsigned int npages = 1021; /* prime! */ + struct drm_i915_gem_object *obj; + const struct phase { + const char *name; + } phases[] = { + { "create" }, + { "lookup" }, + { }, + }, *p; + unsigned int sz, offset; + struct i915_vma *vma; + int err = -ENOMEM; + + /* Create lots of different VMA for the object and check that + * we are returned the same VMA when we later request the same range. + */ + + obj = i915_gem_object_create_internal(i915, npages*PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + for (p = phases; p->name; p++) { /* exercise both create/lookup */ + unsigned int count, nvma; + + nvma = 0; + for_each_prime_number_from(sz, 1, npages) { + for_each_prime_number_from(offset, 0, npages - sz) { + struct i915_ggtt_view view; + + view.type = I915_GGTT_VIEW_PARTIAL; + view.partial.offset = offset; + view.partial.size = sz; + + if (sz == npages) + view.type = I915_GGTT_VIEW_NORMAL; + + vma = checked_vma_instance(obj, vm, &view); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto out_object; + + if (!assert_pin(vma, &view, sz*PAGE_SIZE, p->name)) { + pr_err("(%s) Inconsistent partial pinning for (offset=%d, size=%d)\n", + p->name, offset, sz); + err = -EINVAL; + goto out_object; + } + + if (!assert_partial(obj, vma, offset, sz)) { + pr_err("(%s) Inconsistent partial pages for (offset=%d, size=%d)\n", + p->name, offset, sz); + err = -EINVAL; + goto out_object; + } + + i915_vma_unpin(vma); + nvma++; + } + } + + count = 0; + list_for_each_entry(vma, &obj->vma_list, obj_link) + count++; + if (count != nvma) { + pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n", + p->name, count, nvma); + err = -EINVAL; + goto out_object; + } + + /* Check that we did create the whole object mapping */ + vma = checked_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto out_object; + + if (!assert_pin(vma, NULL, obj->base.size, p->name)) { + pr_err("(%s) inconsistent full pin\n", p->name); + err = -EINVAL; + goto out_object; + } + + i915_vma_unpin(vma); + + count = 0; + list_for_each_entry(vma, &obj->vma_list, obj_link) + count++; + if (count != nvma) { + pr_err("(%s) allocated an extra full vma!\n", p->name); + err = -EINVAL; + goto out_object; + } + } + +out_object: + i915_gem_object_put(obj); +out: + return err; +} + +int i915_vma_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_vma_create), + SUBTEST(igt_vma_pin1), + SUBTEST(igt_vma_rotate), + SUBTEST(igt_vma_partial), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} + diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c new file mode 100644 index 000000000000..19860a372d90 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -0,0 +1,481 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" +#include "i915_random.h" + +#include "mock_gem_device.h" +#include "mock_engine.h" + +static int check_rbtree(struct intel_engine_cs *engine, + const unsigned long *bitmap, + const struct intel_wait *waiters, + const int count) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct rb_node *rb; + int n; + + if (&b->irq_wait->node != rb_first(&b->waiters)) { + pr_err("First waiter does not match first element of wait-tree\n"); + return -EINVAL; + } + + n = find_first_bit(bitmap, count); + for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { + struct intel_wait *w = container_of(rb, typeof(*w), node); + int idx = w - waiters; + + if (!test_bit(idx, bitmap)) { + pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n", + idx, w->seqno); + return -EINVAL; + } + + if (n != idx) { + pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n", + idx, w->seqno, n); + return -EINVAL; + } + + n = find_next_bit(bitmap, count, n + 1); + } + + return 0; +} + +static int check_completion(struct intel_engine_cs *engine, + const unsigned long *bitmap, + const struct intel_wait *waiters, + const int count) +{ + int n; + + for (n = 0; n < count; n++) { + if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap)) + continue; + + pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n", + n, waiters[n].seqno, + intel_wait_complete(&waiters[n]) ? "complete" : "active", + test_bit(n, bitmap) ? "active" : "complete"); + return -EINVAL; + } + + return 0; +} + +static int check_rbtree_empty(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + if (b->irq_wait) { + pr_err("Empty breadcrumbs still has a waiter\n"); + return -EINVAL; + } + + if (!RB_EMPTY_ROOT(&b->waiters)) { + pr_err("Empty breadcrumbs, but wait-tree not empty\n"); + return -EINVAL; + } + + return 0; +} + +static int igt_random_insert_remove(void *arg) +{ + const u32 seqno_bias = 0x1000; + I915_RND_STATE(prng); + struct intel_engine_cs *engine = arg; + struct intel_wait *waiters; + const int count = 4096; + unsigned int *order; + unsigned long *bitmap; + int err = -ENOMEM; + int n; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), + GFP_TEMPORARY); + if (!bitmap) + goto out_waiters; + + order = i915_random_order(count, &prng); + if (!order) + goto out_bitmap; + + for (n = 0; n < count; n++) + intel_wait_init_for_seqno(&waiters[n], seqno_bias + n); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + + /* Add and remove waiters into the rbtree in random order. At each + * step, we verify that the rbtree is correctly ordered. + */ + for (n = 0; n < count; n++) { + int i = order[n]; + + intel_engine_add_wait(engine, &waiters[i]); + __set_bit(i, bitmap); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + } + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + int i = order[n]; + + intel_engine_remove_wait(engine, &waiters[i]); + __clear_bit(i, bitmap); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + } + + err = check_rbtree_empty(engine); +out_order: + kfree(order); +out_bitmap: + kfree(bitmap); +out_waiters: + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + +static int igt_insert_complete(void *arg) +{ + const u32 seqno_bias = 0x1000; + struct intel_engine_cs *engine = arg; + struct intel_wait *waiters; + const int count = 4096; + unsigned long *bitmap; + int err = -ENOMEM; + int n, m; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), + GFP_TEMPORARY); + if (!bitmap) + goto out_waiters; + + for (n = 0; n < count; n++) { + intel_wait_init_for_seqno(&waiters[n], n + seqno_bias); + intel_engine_add_wait(engine, &waiters[n]); + __set_bit(n, bitmap); + } + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_bitmap; + + /* On each step, we advance the seqno so that several waiters are then + * complete (we increase the seqno by increasingly larger values to + * retire more and more waiters at once). All retired waiters should + * be woken and removed from the rbtree, and so that we check. + */ + for (n = 0; n < count; n = m) { + int seqno = 2 * n; + + GEM_BUG_ON(find_first_bit(bitmap, count) != n); + + if (intel_wait_complete(&waiters[n])) { + pr_err("waiter[%d, seqno=%d] completed too early\n", + n, waiters[n].seqno); + err = -EINVAL; + goto out_bitmap; + } + + /* complete the following waiters */ + mock_seqno_advance(engine, seqno + seqno_bias); + for (m = n; m <= seqno; m++) { + if (m == count) + break; + + GEM_BUG_ON(!test_bit(m, bitmap)); + __clear_bit(m, bitmap); + } + + intel_engine_remove_wait(engine, &waiters[n]); + RB_CLEAR_NODE(&waiters[n].node); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) { + pr_err("rbtree corrupt after seqno advance to %d\n", + seqno + seqno_bias); + goto out_bitmap; + } + + err = check_completion(engine, bitmap, waiters, count); + if (err) { + pr_err("completions after seqno advance to %d failed\n", + seqno + seqno_bias); + goto out_bitmap; + } + } + + err = check_rbtree_empty(engine); +out_bitmap: + kfree(bitmap); +out_waiters: + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + +struct igt_wakeup { + struct task_struct *tsk; + atomic_t *ready, *set, *done; + struct intel_engine_cs *engine; + unsigned long flags; +#define STOP 0 +#define IDLE 1 + wait_queue_head_t *wq; + u32 seqno; +}; + +static int wait_atomic(atomic_t *p) +{ + schedule(); + return 0; +} + +static int wait_atomic_timeout(atomic_t *p) +{ + return schedule_timeout(10 * HZ) ? 0 : -ETIMEDOUT; +} + +static bool wait_for_ready(struct igt_wakeup *w) +{ + DEFINE_WAIT(ready); + + set_bit(IDLE, &w->flags); + if (atomic_dec_and_test(w->done)) + wake_up_atomic_t(w->done); + + if (test_bit(STOP, &w->flags)) + goto out; + + for (;;) { + prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE); + if (atomic_read(w->ready) == 0) + break; + + schedule(); + } + finish_wait(w->wq, &ready); + +out: + clear_bit(IDLE, &w->flags); + if (atomic_dec_and_test(w->set)) + wake_up_atomic_t(w->set); + + return !test_bit(STOP, &w->flags); +} + +static int igt_wakeup_thread(void *arg) +{ + struct igt_wakeup *w = arg; + struct intel_wait wait; + + while (wait_for_ready(w)) { + GEM_BUG_ON(kthread_should_stop()); + + intel_wait_init_for_seqno(&wait, w->seqno); + intel_engine_add_wait(w->engine, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (i915_seqno_passed(intel_engine_get_seqno(w->engine), + w->seqno)) + break; + + if (test_bit(STOP, &w->flags)) /* emergency escape */ + break; + + schedule(); + } + intel_engine_remove_wait(w->engine, &wait); + __set_current_state(TASK_RUNNING); + } + + return 0; +} + +static void igt_wake_all_sync(atomic_t *ready, + atomic_t *set, + atomic_t *done, + wait_queue_head_t *wq, + int count) +{ + atomic_set(set, count); + atomic_set(ready, 0); + wake_up_all(wq); + + wait_on_atomic_t(set, wait_atomic, TASK_UNINTERRUPTIBLE); + atomic_set(ready, count); + atomic_set(done, count); +} + +static int igt_wakeup(void *arg) +{ + I915_RND_STATE(prng); + const int state = TASK_UNINTERRUPTIBLE; + struct intel_engine_cs *engine = arg; + struct igt_wakeup *waiters; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + const int count = 4096; + const u32 max_seqno = count / 4; + atomic_t ready, set, done; + int err = -ENOMEM; + int n, step; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + /* Create a large number of threads, each waiting on a random seqno. + * Multiple waiters will be waiting for the same seqno. + */ + atomic_set(&ready, count); + for (n = 0; n < count; n++) { + waiters[n].wq = &wq; + waiters[n].ready = &ready; + waiters[n].set = &set; + waiters[n].done = &done; + waiters[n].engine = engine; + waiters[n].flags = BIT(IDLE); + + waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n], + "i915/igt:%d", n); + if (IS_ERR(waiters[n].tsk)) + goto out_waiters; + + get_task_struct(waiters[n].tsk); + } + + for (step = 1; step <= max_seqno; step <<= 1) { + u32 seqno; + + /* The waiter threads start paused as we assign them a random + * seqno and reset the engine. Once the engine is reset, + * we signal that the threads may begin their wait upon their + * seqno. + */ + for (n = 0; n < count; n++) { + GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags)); + waiters[n].seqno = + 1 + prandom_u32_state(&prng) % max_seqno; + } + mock_seqno_advance(engine, 0); + igt_wake_all_sync(&ready, &set, &done, &wq, count); + + /* Simulate the GPU doing chunks of work, with one or more + * seqno appearing to finish at the same time. A random number + * of threads will be waiting upon the update and hopefully be + * woken. + */ + for (seqno = 1; seqno <= max_seqno + step; seqno += step) { + usleep_range(50, 500); + mock_seqno_advance(engine, seqno); + } + GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno); + + /* With the seqno now beyond any of the waiting threads, they + * should all be woken, see that they are complete and signal + * that they are ready for the next test. We wait until all + * threads are complete and waiting for us (i.e. not a seqno). + */ + err = wait_on_atomic_t(&done, wait_atomic_timeout, state); + if (err) { + pr_err("Timed out waiting for %d remaining waiters\n", + atomic_read(&done)); + break; + } + + err = check_rbtree_empty(engine); + if (err) + break; + } + +out_waiters: + for (n = 0; n < count; n++) { + if (IS_ERR(waiters[n].tsk)) + break; + + set_bit(STOP, &waiters[n].flags); + } + mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */ + igt_wake_all_sync(&ready, &set, &done, &wq, n); + + for (n = 0; n < count; n++) { + if (IS_ERR(waiters[n].tsk)) + break; + + kthread_stop(waiters[n].tsk); + put_task_struct(waiters[n].tsk); + } + + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + +int intel_breadcrumbs_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_random_insert_remove), + SUBTEST(igt_insert_complete), + SUBTEST(igt_wakeup), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915->engine[RCS]); + drm_dev_unref(&i915->drm); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c new file mode 100644 index 000000000000..aa31d6c0cdfb --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -0,0 +1,542 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +struct hang { + struct drm_i915_private *i915; + struct drm_i915_gem_object *hws; + struct drm_i915_gem_object *obj; + u32 *seqno; + u32 *batch; +}; + +static int hang_init(struct hang *h, struct drm_i915_private *i915) +{ + void *vaddr; + int err; + + memset(h, 0, sizeof(*h)); + h->i915 = i915; + + h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(h->hws)) + return PTR_ERR(h->hws); + + h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(h->obj)) { + err = PTR_ERR(h->obj); + goto err_hws; + } + + i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC); + vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + h->seqno = memset(vaddr, 0xff, PAGE_SIZE); + + vaddr = i915_gem_object_pin_map(h->obj, + HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_unpin_hws; + } + h->batch = vaddr; + + return 0; + +err_unpin_hws: + i915_gem_object_unpin_map(h->hws); +err_obj: + i915_gem_object_put(h->obj); +err_hws: + i915_gem_object_put(h->hws); + return err; +} + +static u64 hws_address(const struct i915_vma *hws, + const struct drm_i915_gem_request *rq) +{ + return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); +} + +static int emit_recurse_batch(struct hang *h, + struct drm_i915_gem_request *rq) +{ + struct drm_i915_private *i915 = h->i915; + struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base; + struct i915_vma *hws, *vma; + unsigned int flags; + u32 *batch; + int err; + + vma = i915_vma_instance(h->obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + hws = i915_vma_instance(h->hws, vm, NULL); + if (IS_ERR(hws)) + return PTR_ERR(hws); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + err = i915_vma_pin(hws, 0, 0, PIN_USER); + if (err) + goto unpin_vma; + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto unpin_hws; + + err = i915_switch_context(rq); + if (err) + goto unpin_hws; + + i915_vma_move_to_active(vma, rq, 0); + if (!i915_gem_object_has_active_reference(vma->obj)) { + i915_gem_object_get(vma->obj); + i915_gem_object_set_active_reference(vma->obj); + } + + i915_vma_move_to_active(hws, rq, 0); + if (!i915_gem_object_has_active_reference(hws->obj)) { + i915_gem_object_get(hws->obj); + i915_gem_object_set_active_reference(hws->obj); + } + + batch = h->batch; + if (INTEL_GEN(i915) >= 8) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = upper_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *batch++ = lower_32_bits(vma->node.start); + *batch++ = upper_32_bits(vma->node.start); + } else if (INTEL_GEN(i915) >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 1 << 8; + *batch++ = lower_32_bits(vma->node.start); + } else if (INTEL_GEN(i915) >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *batch++ = 0; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6; + *batch++ = lower_32_bits(vma->node.start); + } else { + *batch++ = MI_STORE_DWORD_IMM; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1; + *batch++ = lower_32_bits(vma->node.start); + } + *batch++ = MI_BATCH_BUFFER_END; /* not reached */ + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); + +unpin_hws: + i915_vma_unpin(hws); +unpin_vma: + i915_vma_unpin(vma); + return err; +} + +static struct drm_i915_gem_request * +hang_create_request(struct hang *h, + struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + struct drm_i915_gem_request *rq; + int err; + + if (i915_gem_object_is_active(h->obj)) { + struct drm_i915_gem_object *obj; + void *vaddr; + + obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vaddr = i915_gem_object_pin_map(obj, + HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC); + if (IS_ERR(vaddr)) { + i915_gem_object_put(obj); + return ERR_CAST(vaddr); + } + + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); + + h->obj = obj; + h->batch = vaddr; + } + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return rq; + + err = emit_recurse_batch(h, rq); + if (err) { + __i915_add_request(rq, false); + return ERR_PTR(err); + } + + return rq; +} + +static u32 hws_seqno(const struct hang *h, + const struct drm_i915_gem_request *rq) +{ + return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); +} + +static void hang_fini(struct hang *h) +{ + *h->batch = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); + + i915_gem_object_unpin_map(h->hws); + i915_gem_object_put(h->hws); + + i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); +} + +static int igt_hang_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *rq; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err; + + /* Basic check that we can execute our hanging batch */ + + if (!igt_can_mi_store_dword_imm(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + for_each_engine(engine, i915, id) { + long timeout; + + rq = hang_create_request(&h, engine, i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + pr_err("Failed to create request for %s, err=%d\n", + engine->name, err); + goto fini; + } + + i915_gem_request_get(rq); + + *h.batch = MI_BATCH_BUFFER_END; + __i915_add_request(rq, true); + + timeout = i915_wait_request(rq, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + i915_gem_request_put(rq); + + if (timeout < 0) { + err = timeout; + pr_err("Wait for request failed on %s, err=%d\n", + engine->name, err); + goto fini; + } + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_global_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + unsigned int reset_count; + int err = 0; + + /* Check that we can issue a global GPU reset */ + + set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); + + mutex_lock(&i915->drm.struct_mutex); + reset_count = i915_reset_count(&i915->gpu_error); + + i915_reset(i915); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + } + mutex_unlock(&i915->drm.struct_mutex); + + GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); + clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + if (i915_terminally_wedged(&i915->gpu_error)) + err = -EIO; + + return err; +} + +static u32 fake_hangcheck(struct drm_i915_gem_request *rq) +{ + u32 reset_count; + + rq->engine->hangcheck.stalled = true; + rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine); + + reset_count = i915_reset_count(&rq->i915->gpu_error); + + set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags); + wake_up_all(&rq->i915->gpu_error.wait_queue); + + return reset_count; +} + +static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) +{ + return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 10) && + wait_for(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 1000)); +} + +static int igt_wait_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *rq; + unsigned int reset_count; + struct hang h; + long timeout; + int err; + + /* Check that we detect a stuck waiter and issue a reset */ + + set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + + if (!wait_for_hang(&h, rq)) { + pr_err("Failed to start request %x\n", rq->fence.seqno); + err = -EIO; + goto out_rq; + } + + reset_count = fake_hangcheck(rq); + + timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10); + if (timeout < 0) { + pr_err("i915_wait_request failed on a stuck request: err=%ld\n", + timeout); + err = timeout; + goto out_rq; + } + + GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + goto out_rq; + } + +out_rq: + i915_gem_request_put(rq); +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + +static int igt_reset_queue(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err; + + /* Check that we replay pending requests following a hang */ + + if (!igt_can_mi_store_dword_imm(i915)) + return 0; + + set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + for_each_engine(engine, i915, id) { + struct drm_i915_gem_request *prev; + IGT_TIMEOUT(end_time); + unsigned int count; + + prev = hang_create_request(&h, engine, i915->kernel_context); + if (IS_ERR(prev)) { + err = PTR_ERR(prev); + goto fini; + } + + i915_gem_request_get(prev); + __i915_add_request(prev, true); + + count = 0; + do { + struct drm_i915_gem_request *rq; + unsigned int reset_count; + + rq = hang_create_request(&h, + engine, + i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + + if (!wait_for_hang(&h, prev)) { + pr_err("Failed to start request %x\n", + prev->fence.seqno); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EIO; + goto fini; + } + + reset_count = fake_hangcheck(prev); + + i915_reset(i915); + + GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, + &i915->gpu_error.flags)); + + if (prev->fence.error != -EIO) { + pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", + prev->fence.error); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + if (rq->fence.error) { + pr_err("Fence error status not zero [%d] after unrelated reset\n", + rq->fence.error); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + i915_gem_request_put(prev); + prev = rq; + count++; + } while (time_before(jiffies, end_time)); + pr_info("%s: Completed %d resets\n", engine->name, count); + + *h.batch = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_request_put(prev); + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + +int intel_hangcheck_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_hang_sanitycheck), + SUBTEST(igt_global_reset), + SUBTEST(igt_wait_reset), + SUBTEST(igt_reset_queue), + }; + + if (!intel_has_gpu_reset(i915)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c new file mode 100644 index 000000000000..2d0fef2cfca6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -0,0 +1,182 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +static int intel_fw_table_check(const struct intel_forcewake_range *ranges, + unsigned int num_ranges, + bool is_watertight) +{ + unsigned int i; + s32 prev; + + for (i = 0, prev = -1; i < num_ranges; i++, ranges++) { + /* Check that the table is watertight */ + if (is_watertight && (prev + 1) != (s32)ranges->start) { + pr_err("%s: entry[%d]:(%x, %x) is not watertight to previous (%x)\n", + __func__, i, ranges->start, ranges->end, prev); + return -EINVAL; + } + + /* Check that the table never goes backwards */ + if (prev >= (s32)ranges->start) { + pr_err("%s: entry[%d]:(%x, %x) is less than the previous (%x)\n", + __func__, i, ranges->start, ranges->end, prev); + return -EINVAL; + } + + /* Check that the entry is valid */ + if (ranges->start >= ranges->end) { + pr_err("%s: entry[%d]:(%x, %x) has negative length\n", + __func__, i, ranges->start, ranges->end); + return -EINVAL; + } + + prev = ranges->end; + } + + return 0; +} + +static int intel_shadow_table_check(void) +{ + const i915_reg_t *reg = gen8_shadowed_regs; + unsigned int i; + s32 prev; + + for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { + u32 offset = i915_mmio_reg_offset(*reg); + + if (prev >= (s32)offset) { + pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", + __func__, i, offset, prev); + return -EINVAL; + } + + prev = offset; + } + + return 0; +} + +int intel_uncore_mock_selftests(void) +{ + struct { + const struct intel_forcewake_range *ranges; + unsigned int num_ranges; + bool is_watertight; + } fw[] = { + { __vlv_fw_ranges, ARRAY_SIZE(__vlv_fw_ranges), false }, + { __chv_fw_ranges, ARRAY_SIZE(__chv_fw_ranges), false }, + { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, + }; + int err, i; + + for (i = 0; i < ARRAY_SIZE(fw); i++) { + err = intel_fw_table_check(fw[i].ranges, + fw[i].num_ranges, + fw[i].is_watertight); + if (err) + return err; + } + + err = intel_shadow_table_check(); + if (err) + return err; + + return 0; +} + +static int intel_uncore_check_forcewake_domains(struct drm_i915_private *dev_priv) +{ +#define FW_RANGE 0x40000 + unsigned long *valid; + u32 offset; + int err; + + if (!HAS_FPGA_DBG_UNCLAIMED(dev_priv) && + !IS_VALLEYVIEW(dev_priv) && + !IS_CHERRYVIEW(dev_priv)) + return 0; + + if (IS_VALLEYVIEW(dev_priv)) /* XXX system lockup! */ + return 0; + + if (IS_BROADWELL(dev_priv)) /* XXX random GPU hang afterwards! */ + return 0; + + valid = kzalloc(BITS_TO_LONGS(FW_RANGE) * sizeof(*valid), + GFP_TEMPORARY); + if (!valid) + return -ENOMEM; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + check_for_unclaimed_mmio(dev_priv); + for (offset = 0; offset < FW_RANGE; offset += 4) { + i915_reg_t reg = { offset }; + + (void)I915_READ_FW(reg); + if (!check_for_unclaimed_mmio(dev_priv)) + set_bit(offset, valid); + } + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + err = 0; + for_each_set_bit(offset, valid, FW_RANGE) { + i915_reg_t reg = { offset }; + + intel_uncore_forcewake_reset(dev_priv, false); + check_for_unclaimed_mmio(dev_priv); + + (void)I915_READ(reg); + if (check_for_unclaimed_mmio(dev_priv)) { + pr_err("Unclaimed mmio read to register 0x%04x\n", + offset); + err = -EINVAL; + } + } + + kfree(valid); + return err; +} + +int intel_uncore_live_selftests(struct drm_i915_private *i915) +{ + int err; + + /* Confirm the table we load is still valid */ + err = intel_fw_table_check(i915->uncore.fw_domains_table, + i915->uncore.fw_domains_table_entries, + INTEL_GEN(i915) >= 9); + if (err) + return err; + + err = intel_uncore_check_forcewake_domains(i915); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c new file mode 100644 index 000000000000..8d3a90c3f8ac --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -0,0 +1,78 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_context.h" +#include "mock_gtt.h" + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name) +{ + struct i915_gem_context *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + kref_init(&ctx->ref); + INIT_LIST_HEAD(&ctx->link); + ctx->i915 = i915; + + ret = ida_simple_get(&i915->context_hw_ida, + 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + if (ret < 0) + goto err_free; + ctx->hw_id = ret; + + if (name) { + ctx->name = kstrdup(name, GFP_KERNEL); + if (!ctx->name) + goto err_put; + + ctx->ppgtt = mock_ppgtt(i915, name); + if (!ctx->ppgtt) + goto err_put; + } + + return ctx; + +err_free: + kfree(ctx); + return NULL; + +err_put: + i915_gem_context_set_closed(ctx); + i915_gem_context_put(ctx); + return NULL; +} + +void mock_context_close(struct i915_gem_context *ctx) +{ + i915_gem_context_set_closed(ctx); + + i915_ppgtt_close(&ctx->ppgtt->base); + + i915_gem_context_put(ctx); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h new file mode 100644 index 000000000000..2427e5c0916a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_context.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_CONTEXT_H +#define __MOCK_CONTEXT_H + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name); + +void mock_context_close(struct i915_gem_context *ctx); + +#endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c new file mode 100644 index 000000000000..302f7d103635 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c @@ -0,0 +1,176 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_dmabuf.h" + +static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment, + enum dma_data_direction dir) +{ + struct mock_dmabuf *mock = to_mock(attachment->dmabuf); + struct sg_table *st; + struct scatterlist *sg; + int i, err; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + err = sg_alloc_table(st, mock->npages, GFP_KERNEL); + if (err) + goto err_free; + + sg = st->sgl; + for (i = 0; i < mock->npages; i++) { + sg_set_page(sg, mock->pages[i], PAGE_SIZE, 0); + sg = sg_next(sg); + } + + if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { + err = -ENOMEM; + goto err_st; + } + + return st; + +err_st: + sg_free_table(st); +err_free: + kfree(st); + return ERR_PTR(err); +} + +static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment, + struct sg_table *st, + enum dma_data_direction dir) +{ + dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir); + sg_free_table(st); + kfree(st); +} + +static void mock_dmabuf_release(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + int i; + + for (i = 0; i < mock->npages; i++) + put_page(mock->pages[i]); + + kfree(mock); +} + +static void *mock_dmabuf_vmap(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL); +} + +static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + vm_unmap_ram(vaddr, mock->npages); +} + +static void *mock_dmabuf_kmap_atomic(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kmap_atomic(mock->pages[page_num]); +} + +static void mock_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + kunmap_atomic(addr); +} + +static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kmap(mock->pages[page_num]); +} + +static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kunmap(mock->pages[page_num]); +} + +static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + return -ENODEV; +} + +static const struct dma_buf_ops mock_dmabuf_ops = { + .map_dma_buf = mock_map_dma_buf, + .unmap_dma_buf = mock_unmap_dma_buf, + .release = mock_dmabuf_release, + .map = mock_dmabuf_kmap, + .map_atomic = mock_dmabuf_kmap_atomic, + .unmap = mock_dmabuf_kunmap, + .unmap_atomic = mock_dmabuf_kunmap_atomic, + .mmap = mock_dmabuf_mmap, + .vmap = mock_dmabuf_vmap, + .vunmap = mock_dmabuf_vunmap, +}; + +static struct dma_buf *mock_dmabuf(int npages) +{ + struct mock_dmabuf *mock; + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct dma_buf *dmabuf; + int i; + + mock = kmalloc(sizeof(*mock) + npages * sizeof(struct page *), + GFP_KERNEL); + if (!mock) + return ERR_PTR(-ENOMEM); + + mock->npages = npages; + for (i = 0; i < npages; i++) { + mock->pages[i] = alloc_page(GFP_KERNEL); + if (!mock->pages[i]) + goto err; + } + + exp_info.ops = &mock_dmabuf_ops; + exp_info.size = npages * PAGE_SIZE; + exp_info.flags = O_CLOEXEC; + exp_info.priv = mock; + + dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(dmabuf)) + goto err; + + return dmabuf; + +err: + while (i--) + put_page(mock->pages[i]); + kfree(mock); + return ERR_PTR(-ENOMEM); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h new file mode 100644 index 000000000000..ec80613159b9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h @@ -0,0 +1,41 @@ + +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_DMABUF_H__ +#define __MOCK_DMABUF_H__ + +#include <linux/dma-buf.h> + +struct mock_dmabuf { + int npages; + struct page *pages[]; +}; + +static struct mock_dmabuf *to_mock(struct dma_buf *buf) +{ + return buf->priv; +} + +#endif /* !__MOCK_DMABUF_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.c b/drivers/gpu/drm/i915/selftests/mock_drm.c new file mode 100644 index 000000000000..09c704153456 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_drm.c @@ -0,0 +1,73 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_drm.h" + +struct drm_file *mock_file(struct drm_i915_private *i915) +{ + struct file *filp; + struct inode *inode; + struct drm_file *file; + int err; + + inode = kzalloc(sizeof(*inode), GFP_KERNEL); + if (!inode) { + err = -ENOMEM; + goto err; + } + + inode->i_rdev = i915->drm.primary->index; + + filp = kzalloc(sizeof(*filp), GFP_KERNEL); + if (!filp) { + err = -ENOMEM; + goto err_inode; + } + + err = drm_open(inode, filp); + if (err) + goto err_filp; + + file = filp->private_data; + memset(&file->filp, POISON_INUSE, sizeof(file->filp)); + file->authenticated = true; + + kfree(filp); + kfree(inode); + return file; + +err_filp: + kfree(filp); +err_inode: + kfree(inode); +err: + return ERR_PTR(err); +} + +void mock_file_free(struct drm_i915_private *i915, struct drm_file *file) +{ + struct file filp = { .private_data = file }; + + drm_release(NULL, &filp); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.h b/drivers/gpu/drm/i915/selftests/mock_drm.h new file mode 100644 index 000000000000..b39beee9f8f6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_drm.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_DRM_H +#define __MOCK_DRM_H + +struct drm_file *mock_file(struct drm_i915_private *i915); +void mock_file_free(struct drm_i915_private *i915, struct drm_file *file); + +#endif /* !__MOCK_DRM_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c new file mode 100644 index 000000000000..0ad624a1db90 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -0,0 +1,206 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_engine.h" +#include "mock_request.h" + +static struct mock_request *first_request(struct mock_engine *engine) +{ + return list_first_entry_or_null(&engine->hw_queue, + struct mock_request, + link); +} + +static void hw_delay_complete(unsigned long data) +{ + struct mock_engine *engine = (typeof(engine))data; + struct mock_request *request; + + spin_lock(&engine->hw_lock); + + request = first_request(engine); + if (request) { + list_del_init(&request->link); + mock_seqno_advance(&engine->base, request->base.global_seqno); + } + + request = first_request(engine); + if (request) + mod_timer(&engine->hw_delay, jiffies + request->delay); + + spin_unlock(&engine->hw_lock); +} + +static int mock_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + i915_gem_context_get(ctx); + return 0; +} + +static void mock_context_unpin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + i915_gem_context_put(ctx); +} + +static int mock_request_alloc(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + + INIT_LIST_HEAD(&mock->link); + mock->delay = 0; + + request->ring = request->engine->buffer; + return 0; +} + +static int mock_emit_flush(struct drm_i915_gem_request *request, + unsigned int flags) +{ + return 0; +} + +static void mock_emit_breadcrumb(struct drm_i915_gem_request *request, + u32 *flags) +{ +} + +static void mock_submit_request(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + struct mock_engine *engine = + container_of(request->engine, typeof(*engine), base); + + i915_gem_request_submit(request); + GEM_BUG_ON(!request->global_seqno); + + spin_lock_irq(&engine->hw_lock); + list_add_tail(&mock->link, &engine->hw_queue); + if (mock->link.prev == &engine->hw_queue) + mod_timer(&engine->hw_delay, jiffies + mock->delay); + spin_unlock_irq(&engine->hw_lock); +} + +static struct intel_ring *mock_ring(struct intel_engine_cs *engine) +{ + const unsigned long sz = roundup_pow_of_two(sizeof(struct intel_ring)); + struct intel_ring *ring; + + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); + if (!ring) + return NULL; + + ring->engine = engine; + ring->size = sz; + ring->effective_size = sz; + ring->vaddr = (void *)(ring + 1); + + INIT_LIST_HEAD(&ring->request_list); + intel_ring_update_space(ring); + + return ring; +} + +struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, + const char *name) +{ + struct mock_engine *engine; + static int id; + + engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); + if (!engine) + return NULL; + + engine->base.buffer = mock_ring(&engine->base); + if (!engine->base.buffer) { + kfree(engine); + return NULL; + } + + /* minimal engine setup for requests */ + engine->base.i915 = i915; + engine->base.name = name; + engine->base.id = id++; + engine->base.status_page.page_addr = (void *)(engine + 1); + + engine->base.context_pin = mock_context_pin; + engine->base.context_unpin = mock_context_unpin; + engine->base.request_alloc = mock_request_alloc; + engine->base.emit_flush = mock_emit_flush; + engine->base.emit_breadcrumb = mock_emit_breadcrumb; + engine->base.submit_request = mock_submit_request; + + engine->base.timeline = + &i915->gt.global_timeline.engine[engine->base.id]; + + intel_engine_init_breadcrumbs(&engine->base); + engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ + + /* fake hw queue */ + spin_lock_init(&engine->hw_lock); + setup_timer(&engine->hw_delay, + hw_delay_complete, + (unsigned long)engine); + INIT_LIST_HEAD(&engine->hw_queue); + + return &engine->base; +} + +void mock_engine_flush(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + struct mock_request *request, *rn; + + del_timer_sync(&mock->hw_delay); + + spin_lock_irq(&mock->hw_lock); + list_for_each_entry_safe(request, rn, &mock->hw_queue, link) { + list_del_init(&request->link); + mock_seqno_advance(&mock->base, request->base.global_seqno); + } + spin_unlock_irq(&mock->hw_lock); +} + +void mock_engine_reset(struct intel_engine_cs *engine) +{ + intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0); +} + +void mock_engine_free(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + + GEM_BUG_ON(timer_pending(&mock->hw_delay)); + + if (engine->last_retired_context) + engine->context_unpin(engine, engine->last_retired_context); + + intel_engine_fini_breadcrumbs(engine); + + kfree(engine->buffer); + kfree(engine); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h new file mode 100644 index 000000000000..e5e240216ba3 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_ENGINE_H__ +#define __MOCK_ENGINE_H__ + +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/timer.h> + +#include "../intel_ringbuffer.h" + +struct mock_engine { + struct intel_engine_cs base; + + spinlock_t hw_lock; + struct list_head hw_queue; + struct timer_list hw_delay; +}; + +struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, + const char *name); +void mock_engine_flush(struct intel_engine_cs *engine); +void mock_engine_reset(struct intel_engine_cs *engine); +void mock_engine_free(struct intel_engine_cs *engine); + +static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno) +{ + intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); + intel_engine_wakeup(engine); +} + +#endif /* !__MOCK_ENGINE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c new file mode 100644 index 000000000000..6a8258eacdcb --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -0,0 +1,226 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/pm_runtime.h> + +#include "mock_engine.h" +#include "mock_context.h" +#include "mock_request.h" +#include "mock_gem_device.h" +#include "mock_gem_object.h" +#include "mock_gtt.h" + +void mock_device_flush(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) + mock_engine_flush(engine); + + i915_gem_retire_requests(i915); +} + +static void mock_device_release(struct drm_device *dev) +{ + struct drm_i915_private *i915 = to_i915(dev); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + mutex_lock(&i915->drm.struct_mutex); + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + + cancel_delayed_work_sync(&i915->gt.retire_work); + cancel_delayed_work_sync(&i915->gt.idle_work); + + mutex_lock(&i915->drm.struct_mutex); + for_each_engine(engine, i915, id) + mock_engine_free(engine); + i915_gem_context_fini(i915); + mutex_unlock(&i915->drm.struct_mutex); + + drain_workqueue(i915->wq); + i915_gem_drain_freed_objects(i915); + + mutex_lock(&i915->drm.struct_mutex); + mock_fini_ggtt(i915); + i915_gem_timeline_fini(&i915->gt.global_timeline); + mutex_unlock(&i915->drm.struct_mutex); + + destroy_workqueue(i915->wq); + + kmem_cache_destroy(i915->dependencies); + kmem_cache_destroy(i915->requests); + kmem_cache_destroy(i915->vmas); + kmem_cache_destroy(i915->objects); + + drm_dev_fini(&i915->drm); + put_device(&i915->drm.pdev->dev); +} + +static struct drm_driver mock_driver = { + .name = "mock", + .driver_features = DRIVER_GEM, + .release = mock_device_release, + + .gem_close_object = i915_gem_close_object, + .gem_free_object_unlocked = i915_gem_free_object, +}; + +static void release_dev(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + kfree(pdev); +} + +static void mock_retire_work_handler(struct work_struct *work) +{ +} + +static void mock_idle_work_handler(struct work_struct *work) +{ +} + +struct drm_i915_private *mock_gem_device(void) +{ + struct drm_i915_private *i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct pci_dev *pdev; + int err; + + pdev = kzalloc(sizeof(*pdev) + sizeof(*i915), GFP_KERNEL); + if (!pdev) + goto err; + + device_initialize(&pdev->dev); + pdev->dev.release = release_dev; + dev_set_name(&pdev->dev, "mock"); + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + i915 = (struct drm_i915_private *)(pdev + 1); + pci_set_drvdata(pdev, i915); + + err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev); + if (err) { + pr_err("Failed to initialise mock GEM device: err=%d\n", err); + goto put_device; + } + i915->drm.pdev = pdev; + i915->drm.dev_private = i915; + + /* Using the global GTT may ask questions about KMS users, so prepare */ + drm_mode_config_init(&i915->drm); + + mkwrite_device_info(i915)->gen = -1; + + spin_lock_init(&i915->mm.object_stat_lock); + + init_waitqueue_head(&i915->gpu_error.wait_queue); + init_waitqueue_head(&i915->gpu_error.reset_queue); + + i915->wq = alloc_ordered_workqueue("mock", 0); + if (!i915->wq) + goto put_device; + + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); + init_llist_head(&i915->mm.free_list); + INIT_LIST_HEAD(&i915->mm.unbound_list); + INIT_LIST_HEAD(&i915->mm.bound_list); + + ida_init(&i915->context_hw_ida); + + INIT_DELAYED_WORK(&i915->gt.retire_work, mock_retire_work_handler); + INIT_DELAYED_WORK(&i915->gt.idle_work, mock_idle_work_handler); + + i915->gt.awake = true; + + i915->objects = KMEM_CACHE(mock_object, SLAB_HWCACHE_ALIGN); + if (!i915->objects) + goto err_wq; + + i915->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); + if (!i915->vmas) + goto err_objects; + + i915->requests = KMEM_CACHE(mock_request, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU); + if (!i915->requests) + goto err_vmas; + + i915->dependencies = KMEM_CACHE(i915_dependency, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT); + if (!i915->dependencies) + goto err_requests; + + mutex_lock(&i915->drm.struct_mutex); + INIT_LIST_HEAD(&i915->gt.timelines); + err = i915_gem_timeline_init__global(i915); + if (err) { + mutex_unlock(&i915->drm.struct_mutex); + goto err_dependencies; + } + + mock_init_ggtt(i915); + mutex_unlock(&i915->drm.struct_mutex); + + mkwrite_device_info(i915)->ring_mask = BIT(0); + i915->engine[RCS] = mock_engine(i915, "mock"); + if (!i915->engine[RCS]) + goto err_dependencies; + + i915->kernel_context = mock_context(i915, NULL); + if (!i915->kernel_context) + goto err_engine; + + return i915; + +err_engine: + for_each_engine(engine, i915, id) + mock_engine_free(engine); +err_dependencies: + kmem_cache_destroy(i915->dependencies); +err_requests: + kmem_cache_destroy(i915->requests); +err_vmas: + kmem_cache_destroy(i915->vmas); +err_objects: + kmem_cache_destroy(i915->objects); +err_wq: + destroy_workqueue(i915->wq); +put_device: + put_device(&pdev->dev); +err: + return NULL; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.h b/drivers/gpu/drm/i915/selftests/mock_gem_device.h new file mode 100644 index 000000000000..4cca4d57f52c --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.h @@ -0,0 +1,9 @@ +#ifndef __MOCK_GEM_DEVICE_H__ +#define __MOCK_GEM_DEVICE_H__ + +struct drm_i915_private; + +struct drm_i915_private *mock_gem_device(void); +void mock_device_flush(struct drm_i915_private *i915); + +#endif /* !__MOCK_GEM_DEVICE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/selftests/mock_gem_object.h new file mode 100644 index 000000000000..9fbf67321662 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_object.h @@ -0,0 +1,8 @@ +#ifndef __MOCK_GEM_OBJECT_H__ +#define __MOCK_GEM_OBJECT_H__ + +struct mock_object { + struct drm_i915_gem_object base; +}; + +#endif /* !__MOCK_GEM_OBJECT_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c new file mode 100644 index 000000000000..a61309c7cb3e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -0,0 +1,138 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_gtt.h" + +static void mock_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ +} + +static void mock_insert_entries(struct i915_address_space *vm, + struct sg_table *st, + u64 start, + enum i915_cache_level level, u32 flags) +{ +} + +static int mock_bind_ppgtt(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); + vma->pages = vma->obj->mm.pages; + vma->flags |= I915_VMA_LOCAL_BIND; + return 0; +} + +static void mock_unbind_ppgtt(struct i915_vma *vma) +{ +} + +static void mock_cleanup(struct i915_address_space *vm) +{ +} + +struct i915_hw_ppgtt * +mock_ppgtt(struct drm_i915_private *i915, + const char *name) +{ + struct i915_hw_ppgtt *ppgtt; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return NULL; + + kref_init(&ppgtt->ref); + ppgtt->base.i915 = i915; + ppgtt->base.total = round_down(U64_MAX, PAGE_SIZE); + ppgtt->base.file = ERR_PTR(-ENODEV); + + INIT_LIST_HEAD(&ppgtt->base.active_list); + INIT_LIST_HEAD(&ppgtt->base.inactive_list); + INIT_LIST_HEAD(&ppgtt->base.unbound_list); + + INIT_LIST_HEAD(&ppgtt->base.global_link); + drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total); + i915_gem_timeline_init(i915, &ppgtt->base.timeline, name); + + ppgtt->base.clear_range = nop_clear_range; + ppgtt->base.insert_page = mock_insert_page; + ppgtt->base.insert_entries = mock_insert_entries; + ppgtt->base.bind_vma = mock_bind_ppgtt; + ppgtt->base.unbind_vma = mock_unbind_ppgtt; + ppgtt->base.cleanup = mock_cleanup; + + return ppgtt; +} + +static int mock_bind_ggtt(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + int err; + + err = i915_get_ggtt_vma_pages(vma); + if (err) + return err; + + vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; + return 0; +} + +static void mock_unbind_ggtt(struct i915_vma *vma) +{ +} + +void mock_init_ggtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + INIT_LIST_HEAD(&i915->vm_list); + + ggtt->base.i915 = i915; + + ggtt->mappable_base = 0; + ggtt->mappable_end = 2048 * PAGE_SIZE; + ggtt->base.total = 4096 * PAGE_SIZE; + + ggtt->base.clear_range = nop_clear_range; + ggtt->base.insert_page = mock_insert_page; + ggtt->base.insert_entries = mock_insert_entries; + ggtt->base.bind_vma = mock_bind_ggtt; + ggtt->base.unbind_vma = mock_unbind_ggtt; + ggtt->base.cleanup = mock_cleanup; + + i915_address_space_init(&ggtt->base, i915, "global"); +} + +void mock_fini_ggtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + i915_address_space_fini(&ggtt->base); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.h b/drivers/gpu/drm/i915/selftests/mock_gtt.h new file mode 100644 index 000000000000..9a0a833bb545 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.h @@ -0,0 +1,35 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_GTT_H +#define __MOCK_GTT_H + +void mock_init_ggtt(struct drm_i915_private *i915); +void mock_fini_ggtt(struct drm_i915_private *i915); + +struct i915_hw_ppgtt * +mock_ppgtt(struct drm_i915_private *i915, + const char *name); + +#endif /* !__MOCK_GTT_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c new file mode 100644 index 000000000000..8097e3693ec4 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -0,0 +1,63 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_engine.h" +#include "mock_request.h" + +struct drm_i915_gem_request * +mock_request(struct intel_engine_cs *engine, + struct i915_gem_context *context, + unsigned long delay) +{ + struct drm_i915_gem_request *request; + struct mock_request *mock; + + /* NB the i915->requests slab cache is enlarged to fit mock_request */ + request = i915_gem_request_alloc(engine, context); + if (IS_ERR(request)) + return NULL; + + mock = container_of(request, typeof(*mock), base); + mock->delay = delay; + + return &mock->base; +} + +bool mock_cancel_request(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + struct mock_engine *engine = + container_of(request->engine, typeof(*engine), base); + bool was_queued; + + spin_lock_irq(&engine->hw_lock); + was_queued = !list_empty(&mock->link); + list_del_init(&mock->link); + spin_unlock_irq(&engine->hw_lock); + + if (was_queued) + i915_gem_request_unsubmit(request); + + return was_queued; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h new file mode 100644 index 000000000000..4dea74c8e96d --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_request.h @@ -0,0 +1,46 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_REQUEST__ +#define __MOCK_REQUEST__ + +#include <linux/list.h> + +#include "../i915_gem_request.h" + +struct mock_request { + struct drm_i915_gem_request base; + + struct list_head link; + unsigned long delay; +}; + +struct drm_i915_gem_request * +mock_request(struct intel_engine_cs *engine, + struct i915_gem_context *context, + unsigned long delay); + +bool mock_cancel_request(struct drm_i915_gem_request *request); + +#endif /* !__MOCK_REQUEST__ */ diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c new file mode 100644 index 000000000000..1cc5d2931753 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/scatterlist.c @@ -0,0 +1,364 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/prime_numbers.h> +#include <linux/random.h> + +#include "../i915_selftest.h" + +#define PFN_BIAS (1 << 10) + +struct pfn_table { + struct sg_table st; + unsigned long start, end; +}; + +typedef unsigned int (*npages_fn_t)(unsigned long n, + unsigned long count, + struct rnd_state *rnd); + +static noinline int expect_pfn_sg(struct pfn_table *pt, + npages_fn_t npages_fn, + struct rnd_state *rnd, + const char *who, + unsigned long timeout) +{ + struct scatterlist *sg; + unsigned long pfn, n; + + pfn = pt->start; + for_each_sg(pt->st.sgl, sg, pt->st.nents, n) { + struct page *page = sg_page(sg); + unsigned int npages = npages_fn(n, pt->st.nents, rnd); + + if (page_to_pfn(page) != pfn) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sg)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (sg->length != npages * PAGE_SIZE) { + pr_err("%s: %s copied wrong sg length, expected size %lu, found %u (using for_each_sg)\n", + __func__, who, npages * PAGE_SIZE, sg->length); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn += npages; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static noinline int expect_pfn_sg_page_iter(struct pfn_table *pt, + const char *who, + unsigned long timeout) +{ + struct sg_page_iter sgiter; + unsigned long pfn; + + pfn = pt->start; + for_each_sg_page(pt->st.sgl, &sgiter, pt->st.nents, 0) { + struct page *page = sg_page_iter_page(&sgiter); + + if (page != pfn_to_page(pfn)) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sg_page)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn++; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static noinline int expect_pfn_sgtiter(struct pfn_table *pt, + const char *who, + unsigned long timeout) +{ + struct sgt_iter sgt; + struct page *page; + unsigned long pfn; + + pfn = pt->start; + for_each_sgt_page(page, sgt, &pt->st) { + if (page != pfn_to_page(pfn)) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sgt_page)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn++; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static int expect_pfn_sgtable(struct pfn_table *pt, + npages_fn_t npages_fn, + struct rnd_state *rnd, + const char *who, + unsigned long timeout) +{ + int err; + + err = expect_pfn_sg(pt, npages_fn, rnd, who, timeout); + if (err) + return err; + + err = expect_pfn_sg_page_iter(pt, who, timeout); + if (err) + return err; + + err = expect_pfn_sgtiter(pt, who, timeout); + if (err) + return err; + + return 0; +} + +static unsigned int one(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return 1; +} + +static unsigned int grow(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return n + 1; +} + +static unsigned int shrink(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return count - n; +} + +static unsigned int random(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return 1 + (prandom_u32_state(rnd) % 1024); +} + +static inline bool page_contiguous(struct page *first, + struct page *last, + unsigned long npages) +{ + return first + npages == last; +} + +static int alloc_table(struct pfn_table *pt, + unsigned long count, unsigned long max, + npages_fn_t npages_fn, + struct rnd_state *rnd, + int alloc_error) +{ + struct scatterlist *sg; + unsigned long n, pfn; + + if (sg_alloc_table(&pt->st, max, + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN)) + return alloc_error; + + /* count should be less than 20 to prevent overflowing sg->length */ + GEM_BUG_ON(overflows_type(count * PAGE_SIZE, sg->length)); + + /* Construct a table where each scatterlist contains different number + * of entries. The idea is to check that we can iterate the individual + * pages from inside the coalesced lists. + */ + pt->start = PFN_BIAS; + pfn = pt->start; + sg = pt->st.sgl; + for (n = 0; n < count; n++) { + unsigned long npages = npages_fn(n, count, rnd); + + /* Nobody expects the Sparse Memmap! */ + if (!page_contiguous(pfn_to_page(pfn), + pfn_to_page(pfn + npages), + npages)) { + sg_free_table(&pt->st); + return -ENOSPC; + } + + if (n) + sg = sg_next(sg); + sg_set_page(sg, pfn_to_page(pfn), npages * PAGE_SIZE, 0); + + GEM_BUG_ON(page_to_pfn(sg_page(sg)) != pfn); + GEM_BUG_ON(sg->length != npages * PAGE_SIZE); + GEM_BUG_ON(sg->offset != 0); + + pfn += npages; + } + sg_mark_end(sg); + pt->st.nents = n; + pt->end = pfn; + + return 0; +} + +static const npages_fn_t npages_funcs[] = { + one, + grow, + shrink, + random, + NULL, +}; + +static int igt_sg_alloc(void *ignored) +{ + IGT_TIMEOUT(end_time); + const unsigned long max_order = 20; /* approximating a 4GiB object */ + struct rnd_state prng; + unsigned long prime; + int alloc_error = -ENOMEM; + + for_each_prime_number(prime, max_order) { + unsigned long size = BIT(prime); + int offset; + + for (offset = -1; offset <= 1; offset++) { + unsigned long sz = size + offset; + const npages_fn_t *npages; + struct pfn_table pt; + int err; + + for (npages = npages_funcs; *npages; npages++) { + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = alloc_table(&pt, sz, sz, *npages, &prng, + alloc_error); + if (err == -ENOSPC) + break; + if (err) + return err; + + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = expect_pfn_sgtable(&pt, *npages, &prng, + "sg_alloc_table", + end_time); + sg_free_table(&pt.st); + if (err) + return err; + } + } + + /* Test at least one continuation before accepting oom */ + if (size > SG_MAX_SINGLE_ALLOC) + alloc_error = -ENOSPC; + } + + return 0; +} + +static int igt_sg_trim(void *ignored) +{ + IGT_TIMEOUT(end_time); + const unsigned long max = PAGE_SIZE; /* not prime! */ + struct pfn_table pt; + unsigned long prime; + int alloc_error = -ENOMEM; + + for_each_prime_number(prime, max) { + const npages_fn_t *npages; + int err; + + for (npages = npages_funcs; *npages; npages++) { + struct rnd_state prng; + + prandom_seed_state(&prng, i915_selftest.random_seed); + err = alloc_table(&pt, prime, max, *npages, &prng, + alloc_error); + if (err == -ENOSPC) + break; + if (err) + return err; + + if (i915_sg_trim(&pt.st)) { + if (pt.st.orig_nents != prime || + pt.st.nents != prime) { + pr_err("i915_sg_trim failed (nents %u, orig_nents %u), expected %lu\n", + pt.st.nents, pt.st.orig_nents, prime); + err = -EINVAL; + } else { + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = expect_pfn_sgtable(&pt, + *npages, &prng, + "i915_sg_trim", + end_time); + } + } + sg_free_table(&pt.st); + if (err) + return err; + } + + /* Test at least one continuation before accepting oom */ + if (prime > SG_MAX_SINGLE_ALLOC) + alloc_error = -ENOSPC; + } + + return 0; +} + +int scatterlist_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_sg_alloc), + SUBTEST(igt_sg_trim), + }; + + return i915_subtests(tests, NULL); +} |