diff options
author | Maxime Ripard <maxime.ripard@bootlin.com> | 2019-02-11 12:35:35 +0300 |
---|---|---|
committer | Maxime Ripard <maxime.ripard@bootlin.com> | 2019-02-11 12:35:35 +0300 |
commit | d588100baa28dae6a5c32d02bfe744d0792ed2ad (patch) | |
tree | 3be3984cea35a07b4259f8ebe55ce0e52e416d1c /drivers/gpu/drm/i915 | |
parent | 7bd0a3271e239d34bae5fa51e4f44ed9ee861a59 (diff) | |
parent | 16065fcdd19ddb9e093192914ac863884f308766 (diff) | |
download | linux-d588100baa28dae6a5c32d02bfe744d0792ed2ad.tar.xz |
Merge drm/drm-next into drm-misc-next
We need to backmerge drm-next to fix the komeda build failure.
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Diffstat (limited to 'drivers/gpu/drm/i915')
158 files changed, 12034 insertions, 8075 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 9e36ffb5eb7c..ad4d71161dda 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -21,11 +21,11 @@ config DRM_I915_DEBUG select DEBUG_FS select PREEMPT_COUNT select I2C_CHARDEV + select STACKDEPOT select DRM_DP_AUX_CHARDEV select X86_MSR # used by igt/pm_rpm select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y - select STACKDEPOT if DRM=y # for DRM_DEBUG_MM select DRM_DEBUG_SELFTEST select SW_SYNC # signaling validation framework (igt/syncobj*) select DRM_I915_SW_FENCE_DEBUG_OBJECTS @@ -173,6 +173,7 @@ config DRM_I915_DEBUG_RUNTIME_PM bool "Enable extra state checking for runtime PM" depends on DRM_I915 default n + select STACKDEPOT help Choose this option to turn on extra state checking for the runtime PM functionality. This may introduce overhead during diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index c34bee16730d..1787e1299b1b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -22,6 +22,7 @@ subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) subdir-ccflags-y += $(call cc-disable-warning, sign-compare) subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) +subdir-ccflags-y += $(call cc-disable-warning, uninitialized) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable @@ -40,9 +41,10 @@ i915-y := i915_drv.o \ i915_mm.o \ i915_params.o \ i915_pci.o \ + i915_reset.o \ i915_suspend.o \ - i915_syncmap.o \ i915_sw_fence.o \ + i915_syncmap.o \ i915_sysfs.o \ intel_csr.o \ intel_device_info.o \ @@ -55,7 +57,9 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # GEM code -i915-y += i915_cmd_parser.o \ +i915-y += \ + i915_active.o \ + i915_cmd_parser.o \ i915_gem_batch_pool.o \ i915_gem_clflush.o \ i915_gem_context.o \ @@ -166,6 +170,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/i915_random.o \ selftests/i915_selftest.o \ selftests/igt_flush_test.o \ + selftests/igt_live_test.o \ selftests/igt_reset.o \ selftests/igt_spinner.o @@ -198,3 +203,4 @@ endif i915-y += intel_lpe_audio.o obj-$(CONFIG_DRM_I915) += i915.o +obj-$(CONFIG_DRM_I915_GVT_KVMGT) += gvt/kvmgt.o diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index b016dc753db9..271fb46d4dd0 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -7,4 +7,3 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) -obj-$(CONFIG_DRM_I915_GVT_KVMGT) += $(GVT_DIR)/kvmgt.o diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 359d37d5c958..1fa2f65c3cd1 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -180,7 +180,7 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu) } mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } static int alloc_vgpu_fence(struct intel_vgpu *vgpu) @@ -206,7 +206,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) _clear_vgpu_fence(vgpu); mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; out_free_fence: gvt_vgpu_err("Failed to alloc fences\n"); @@ -219,7 +219,7 @@ out_free_fence: vgpu->fence.regs[i] = NULL; } mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return -ENOSPC; } @@ -317,7 +317,7 @@ void intel_vgpu_reset_resource(struct intel_vgpu *vgpu) intel_runtime_pm_get(dev_priv); _clear_vgpu_fence(vgpu); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } /** diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 77ae634eb11c..35b4ec3f7618 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -55,10 +55,10 @@ struct sub_op_bits { int low; }; struct decode_info { - char *name; + const char *name; int op_len; int nr_sub_op; - struct sub_op_bits *sub_op; + const struct sub_op_bits *sub_op; }; #define MAX_CMD_BUDGET 0x7fffffff @@ -375,7 +375,7 @@ typedef int (*parser_cmd_handler)(struct parser_exec_state *s); #define ADDR_FIX_5(x1, x2, x3, x4, x5) (ADDR_FIX_1(x1) | ADDR_FIX_4(x2, x3, x4, x5)) struct cmd_info { - char *name; + const char *name; u32 opcode; #define F_LEN_MASK (1U<<0) @@ -399,10 +399,10 @@ struct cmd_info { #define R_VECS (1 << VECS) #define R_ALL (R_RCS | R_VCS | R_BCS | R_VECS) /* rings that support this cmd: BLT/RCS/VCS/VECS */ - uint16_t rings; + u16 rings; /* devices that support this cmd: SNB/IVB/HSW/... */ - uint16_t devices; + u16 devices; /* which DWords are address that need fix up. * bit 0 means a 32-bit non address operand in command @@ -412,20 +412,20 @@ struct cmd_info { * No matter the address length, each address only takes * one bit in the bitmap. */ - uint16_t addr_bitmap; + u16 addr_bitmap; /* flag == F_LEN_CONST : command length * flag == F_LEN_VAR : length bias bits * Note: length is in DWord */ - uint8_t len; + u8 len; parser_cmd_handler handler; }; struct cmd_entry { struct hlist_node hlist; - struct cmd_info *info; + const struct cmd_info *info; }; enum { @@ -474,7 +474,7 @@ struct parser_exec_state { int saved_buf_addr_type; bool is_ctx_wa; - struct cmd_info *info; + const struct cmd_info *info; struct intel_vgpu_workload *workload; }; @@ -485,12 +485,12 @@ struct parser_exec_state { static unsigned long bypass_scan_mask = 0; /* ring ALL, type = 0 */ -static struct sub_op_bits sub_op_mi[] = { +static const struct sub_op_bits sub_op_mi[] = { {31, 29}, {28, 23}, }; -static struct decode_info decode_info_mi = { +static const struct decode_info decode_info_mi = { "MI", OP_LEN_MI, ARRAY_SIZE(sub_op_mi), @@ -498,12 +498,12 @@ static struct decode_info decode_info_mi = { }; /* ring RCS, command type 2 */ -static struct sub_op_bits sub_op_2d[] = { +static const struct sub_op_bits sub_op_2d[] = { {31, 29}, {28, 22}, }; -static struct decode_info decode_info_2d = { +static const struct decode_info decode_info_2d = { "2D", OP_LEN_2D, ARRAY_SIZE(sub_op_2d), @@ -511,14 +511,14 @@ static struct decode_info decode_info_2d = { }; /* ring RCS, command type 3 */ -static struct sub_op_bits sub_op_3d_media[] = { +static const struct sub_op_bits sub_op_3d_media[] = { {31, 29}, {28, 27}, {26, 24}, {23, 16}, }; -static struct decode_info decode_info_3d_media = { +static const struct decode_info decode_info_3d_media = { "3D_Media", OP_LEN_3D_MEDIA, ARRAY_SIZE(sub_op_3d_media), @@ -526,7 +526,7 @@ static struct decode_info decode_info_3d_media = { }; /* ring VCS, command type 3 */ -static struct sub_op_bits sub_op_mfx_vc[] = { +static const struct sub_op_bits sub_op_mfx_vc[] = { {31, 29}, {28, 27}, {26, 24}, @@ -534,7 +534,7 @@ static struct sub_op_bits sub_op_mfx_vc[] = { {20, 16}, }; -static struct decode_info decode_info_mfx_vc = { +static const struct decode_info decode_info_mfx_vc = { "MFX_VC", OP_LEN_MFX_VC, ARRAY_SIZE(sub_op_mfx_vc), @@ -542,7 +542,7 @@ static struct decode_info decode_info_mfx_vc = { }; /* ring VECS, command type 3 */ -static struct sub_op_bits sub_op_vebox[] = { +static const struct sub_op_bits sub_op_vebox[] = { {31, 29}, {28, 27}, {26, 24}, @@ -550,14 +550,14 @@ static struct sub_op_bits sub_op_vebox[] = { {20, 16}, }; -static struct decode_info decode_info_vebox = { +static const struct decode_info decode_info_vebox = { "VEBOX", OP_LEN_VEBOX, ARRAY_SIZE(sub_op_vebox), sub_op_vebox, }; -static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { +static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { [RCS] = { &decode_info_mi, NULL, @@ -616,7 +616,7 @@ static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { static inline u32 get_opcode(u32 cmd, int ring_id) { - struct decode_info *d_info; + const struct decode_info *d_info; d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)]; if (d_info == NULL) @@ -625,7 +625,7 @@ static inline u32 get_opcode(u32 cmd, int ring_id) return cmd >> (32 - d_info->op_len); } -static inline struct cmd_info *find_cmd_entry(struct intel_gvt *gvt, +static inline const struct cmd_info *find_cmd_entry(struct intel_gvt *gvt, unsigned int opcode, int ring_id) { struct cmd_entry *e; @@ -638,7 +638,7 @@ static inline struct cmd_info *find_cmd_entry(struct intel_gvt *gvt, return NULL; } -static inline struct cmd_info *get_cmd_info(struct intel_gvt *gvt, +static inline const struct cmd_info *get_cmd_info(struct intel_gvt *gvt, u32 cmd, int ring_id) { u32 opcode; @@ -657,7 +657,7 @@ static inline u32 sub_op_val(u32 cmd, u32 hi, u32 low) static inline void print_opcode(u32 cmd, int ring_id) { - struct decode_info *d_info; + const struct decode_info *d_info; int i; d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)]; @@ -776,7 +776,7 @@ static inline int ip_gma_advance(struct parser_exec_state *s, return 0; } -static inline int get_cmd_length(struct cmd_info *info, u32 cmd) +static inline int get_cmd_length(const struct cmd_info *info, u32 cmd) { if ((info->flag & F_LEN_MASK) == F_LEN_CONST) return info->len; @@ -901,7 +901,8 @@ static int cmd_reg_handler(struct parser_exec_state *s, * It's good enough to support initializing mmio by lri command in * vgpu inhibit context on KBL. */ - if (IS_KABYLAKE(s->vgpu->gvt->dev_priv) && + if ((IS_KABYLAKE(s->vgpu->gvt->dev_priv) + || IS_COFFEELAKE(s->vgpu->gvt->dev_priv)) && intel_gvt_mmio_is_in_ctx(gvt, offset) && !strncmp(cmd, "lri", 3)) { intel_gvt_hypervisor_read_gpa(s->vgpu, @@ -1280,9 +1281,7 @@ static int gen8_check_mi_display_flip(struct parser_exec_state *s, if (!info->async_flip) return 0; - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { stride = vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(9, 0); tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & GENMASK(12, 10)) >> 10; @@ -1310,9 +1309,7 @@ static int gen8_update_plane_mmio_from_mi_display_flip( set_mask_bits(&vgpu_vreg_t(vgpu, info->surf_reg), GENMASK(31, 12), info->surf_val << 12); - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(9, 0), info->stride_val); set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(12, 10), @@ -1336,9 +1333,7 @@ static int decode_mi_display_flip(struct parser_exec_state *s, if (IS_BROADWELL(dev_priv)) return gen8_decode_mi_display_flip(s, info); - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) return skl_decode_mi_display_flip(s, info); return -ENODEV; @@ -1643,8 +1638,8 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) { unsigned long gma = 0; - struct cmd_info *info; - uint32_t cmd_len = 0; + const struct cmd_info *info; + u32 cmd_len = 0; bool bb_end = false; struct intel_vgpu *vgpu = s->vgpu; u32 cmd; @@ -1842,7 +1837,7 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s) static int mi_noop_index; -static struct cmd_info cmd_info[] = { +static const struct cmd_info cmd_info[] = { {"MI_NOOP", OP_MI_NOOP, F_LEN_CONST, R_ALL, D_ALL, 0, 1, NULL}, {"MI_SET_PREDICATE", OP_MI_SET_PREDICATE, F_LEN_CONST, R_ALL, D_ALL, @@ -2521,7 +2516,7 @@ static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e) static int cmd_parser_exec(struct parser_exec_state *s) { struct intel_vgpu *vgpu = s->vgpu; - struct cmd_info *info; + const struct cmd_info *info; u32 cmd; int ret = 0; @@ -2683,7 +2678,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) I915_GTT_PAGE_SIZE))) return -EINVAL; - ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(uint32_t); + ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(u32); ring_size = round_up(wa_ctx->indirect_ctx.size + CACHELINE_BYTES, PAGE_SIZE); gma_head = wa_ctx->indirect_ctx.guest_gma; @@ -2850,7 +2845,7 @@ put_obj: static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) { - uint32_t per_ctx_start[CACHELINE_DWORDS] = {0}; + u32 per_ctx_start[CACHELINE_DWORDS] = {0}; unsigned char *bb_start_sva; if (!wa_ctx->per_ctx.valid) @@ -2895,10 +2890,10 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) return 0; } -static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt, +static const struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt, unsigned int opcode, unsigned long rings) { - struct cmd_info *info = NULL; + const struct cmd_info *info = NULL; unsigned int ring; for_each_set_bit(ring, &rings, I915_NUM_ENGINES) { @@ -2913,7 +2908,7 @@ static int init_cmd_table(struct intel_gvt *gvt) { int i; struct cmd_entry *e; - struct cmd_info *info; + const struct cmd_info *info; unsigned int gen_type; gen_type = intel_gvt_get_device_type(gvt); diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index df1e14145747..035479e273be 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -198,7 +198,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) SDE_PORTC_HOTPLUG_CPT | SDE_PORTD_HOTPLUG_CPT); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv)) { vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | SDE_PORTE_HOTPLUG_SPT); vgpu_vreg_t(vgpu, SKL_FUSE_STATUS) |= @@ -273,7 +274,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; } - if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && + if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv)) && intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) { vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT; } @@ -340,6 +342,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, port->dpcd->data_valid = true; port->dpcd->data[DPCD_SINK_COUNT] = 0x1; port->type = type; + port->id = resolution; emulate_monitor_status_change(vgpu); @@ -443,6 +446,36 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt) } /** + * intel_vgpu_emulate_hotplug - trigger hotplug event for vGPU + * @vgpu: a vGPU + * @conncted: link state + * + * This function is used to trigger hotplug interrupt for vGPU + * + */ +void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + + /* TODO: add more platforms support */ + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (connected) { + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= + SFUSE_STRAP_DDID_DETECTED; + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; + } else { + vgpu_vreg_t(vgpu, SFUSE_STRAP) &= + ~SFUSE_STRAP_DDID_DETECTED; + vgpu_vreg_t(vgpu, SDEISR) &= ~SDE_PORTD_HOTPLUG_CPT; + } + vgpu_vreg_t(vgpu, SDEIIR) |= SDE_PORTD_HOTPLUG_CPT; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTD_HOTPLUG_STATUS_MASK; + intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG); + } +} + +/** * intel_vgpu_clean_display - clean vGPU virtual display emulation * @vgpu: a vGPU * @@ -453,7 +486,8 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv)) clean_virtual_dp_monitor(vgpu, PORT_D); else clean_virtual_dp_monitor(vgpu, PORT_B); @@ -476,7 +510,8 @@ int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution) intel_vgpu_init_i2c_edid(vgpu); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv)) return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D, resolution); else diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index ea7c1c525b8c..a87f33e6a23c 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -146,18 +146,19 @@ enum intel_vgpu_port_type { GVT_PORT_MAX }; +enum intel_vgpu_edid { + GVT_EDID_1024_768, + GVT_EDID_1920_1200, + GVT_EDID_NUM, +}; + struct intel_vgpu_port { /* per display EDID information */ struct intel_vgpu_edid_data *edid; /* per display DPCD information */ struct intel_vgpu_dpcd_data *dpcd; int type; -}; - -enum intel_vgpu_edid { - GVT_EDID_1024_768, - GVT_EDID_1920_1200, - GVT_EDID_NUM, + enum intel_vgpu_edid id; }; static inline char *vgpu_edid_str(enum intel_vgpu_edid id) @@ -172,6 +173,30 @@ static inline char *vgpu_edid_str(enum intel_vgpu_edid id) } } +static inline unsigned int vgpu_edid_xres(enum intel_vgpu_edid id) +{ + switch (id) { + case GVT_EDID_1024_768: + return 1024; + case GVT_EDID_1920_1200: + return 1920; + default: + return 0; + } +} + +static inline unsigned int vgpu_edid_yres(enum intel_vgpu_edid id) +{ + switch (id) { + case GVT_EDID_1024_768: + return 768; + case GVT_EDID_1920_1200: + return 1200; + default: + return 0; + } +} + void intel_gvt_emulate_vblank(struct intel_gvt *gvt); void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 51ed99a37803..3e7e2b80c857 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -29,7 +29,6 @@ */ #include <linux/dma-buf.h> -#include <drm/drmP.h> #include <linux/vfio.h> #include "i915_drv.h" @@ -164,9 +163,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, obj->read_domains = I915_GEM_DOMAIN_GTT; obj->write_domain = 0; - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { unsigned int tiling_mode = 0; unsigned int stride = 0; diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c index 5d4bb35bb889..1fe6124918f1 100644 --- a/drivers/gpu/drm/i915/gvt/edid.c +++ b/drivers/gpu/drm/i915/gvt/edid.c @@ -77,16 +77,32 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu) return chr; } +static inline int cnp_get_port_from_gmbus0(u32 gmbus0) +{ + int port_select = gmbus0 & _GMBUS_PIN_SEL_MASK; + int port = -EINVAL; + + if (port_select == GMBUS_PIN_1_BXT) + port = PORT_B; + else if (port_select == GMBUS_PIN_2_BXT) + port = PORT_C; + else if (port_select == GMBUS_PIN_3_BXT) + port = PORT_D; + else if (port_select == GMBUS_PIN_4_CNP) + port = PORT_E; + return port; +} + static inline int bxt_get_port_from_gmbus0(u32 gmbus0) { int port_select = gmbus0 & _GMBUS_PIN_SEL_MASK; int port = -EINVAL; - if (port_select == 1) + if (port_select == GMBUS_PIN_1_BXT) port = PORT_B; - else if (port_select == 2) + else if (port_select == GMBUS_PIN_2_BXT) port = PORT_C; - else if (port_select == 3) + else if (port_select == GMBUS_PIN_3_BXT) port = PORT_D; return port; } @@ -96,13 +112,13 @@ static inline int get_port_from_gmbus0(u32 gmbus0) int port_select = gmbus0 & _GMBUS_PIN_SEL_MASK; int port = -EINVAL; - if (port_select == 2) + if (port_select == GMBUS_PIN_VGADDC) port = PORT_E; - else if (port_select == 4) + else if (port_select == GMBUS_PIN_DPC) port = PORT_C; - else if (port_select == 5) + else if (port_select == GMBUS_PIN_DPB) port = PORT_B; - else if (port_select == 6) + else if (port_select == GMBUS_PIN_DPD) port = PORT_D; return port; } @@ -133,6 +149,8 @@ static int gmbus0_mmio_write(struct intel_vgpu *vgpu, if (IS_BROXTON(dev_priv)) port = bxt_get_port_from_gmbus0(pin_select); + else if (IS_COFFEELAKE(dev_priv)) + port = cnp_get_port_from_gmbus0(pin_select); else port = get_port_from_gmbus0(pin_select); if (WARN_ON(port < 0)) diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 85e6736f0a32..65e847392aea 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -151,9 +151,7 @@ static u32 intel_vgpu_get_stride(struct intel_vgpu *vgpu, int pipe, u32 stride_reg = vgpu_vreg_t(vgpu, DSPSTRIDE(pipe)) & stride_mask; u32 stride = stride_reg; - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { switch (tiled) { case PLANE_CTL_TILED_LINEAR: stride = stride_reg * 64; @@ -217,9 +215,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, if (!plane->enabled) return -ENODEV; - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { plane->tiled = val & PLANE_CTL_TILED_MASK; fmt = skl_format_to_drm( val & PLANE_CTL_FORMAT_MASK, @@ -260,9 +256,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, } plane->stride = intel_vgpu_get_stride(vgpu, pipe, plane->tiled, - (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) ? + (INTEL_GEN(dev_priv) >= 9) ? (_PRI_PLANE_STRIDE_MASK >> 6) : _PRI_PLANE_STRIDE_MASK, plane->bpp); diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 733a2a0d0c30..43f4242062dd 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -185,54 +185,9 @@ static const struct intel_gvt_ops intel_gvt_ops = { .vgpu_query_plane = intel_vgpu_query_plane, .vgpu_get_dmabuf = intel_vgpu_get_dmabuf, .write_protect_handler = intel_vgpu_page_track_handler, + .emulate_hotplug = intel_vgpu_emulate_hotplug, }; -/** - * intel_gvt_init_host - Load MPT modules and detect if we're running in host - * - * This function is called at the driver loading stage. If failed to find a - * loadable MPT module or detect currently we're running in a VM, then GVT-g - * will be disabled - * - * Returns: - * Zero on success, negative error code if failed. - * - */ -int intel_gvt_init_host(void) -{ - if (intel_gvt_host.initialized) - return 0; - - /* Xen DOM U */ - if (xen_domain() && !xen_initial_domain()) - return -ENODEV; - - /* Try to load MPT modules for hypervisors */ - if (xen_initial_domain()) { - /* In Xen dom0 */ - intel_gvt_host.mpt = try_then_request_module( - symbol_get(xengt_mpt), "xengt"); - intel_gvt_host.hypervisor_type = INTEL_GVT_HYPERVISOR_XEN; - } else { -#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) - /* not in Xen. Try KVMGT */ - intel_gvt_host.mpt = try_then_request_module( - symbol_get(kvmgt_mpt), "kvmgt"); - intel_gvt_host.hypervisor_type = INTEL_GVT_HYPERVISOR_KVM; -#endif - } - - /* Fail to load MPT modules - bail out */ - if (!intel_gvt_host.mpt) - return -EINVAL; - - gvt_dbg_core("Running with hypervisor %s in host mode\n", - supported_hypervisors[intel_gvt_host.hypervisor_type]); - - intel_gvt_host.initialized = true; - return 0; -} - static void init_device_info(struct intel_gvt *gvt) { struct intel_gvt_device_info *info = &gvt->device_info; @@ -316,7 +271,6 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) return; intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); - intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev, gvt); intel_gvt_cleanup_vgpu_type_groups(gvt); intel_gvt_clean_vgpu_types(gvt); @@ -352,13 +306,6 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) struct intel_vgpu *vgpu; int ret; - /* - * Cannot initialize GVT device without intel_gvt_host gets - * initialized first. - */ - if (WARN_ON(!intel_gvt_host.initialized)) - return -EINVAL; - if (WARN_ON(dev_priv->gvt)) return -EEXIST; @@ -420,13 +367,6 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) goto out_clean_types; } - ret = intel_gvt_hypervisor_host_init(&dev_priv->drm.pdev->dev, gvt, - &intel_gvt_ops); - if (ret) { - gvt_err("failed to register gvt-g host device: %d\n", ret); - goto out_clean_types; - } - vgpu = intel_gvt_create_idle_vgpu(gvt); if (IS_ERR(vgpu)) { ret = PTR_ERR(vgpu); @@ -441,6 +381,8 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) gvt_dbg_core("gvt device initialization is done\n"); dev_priv->gvt = gvt; + intel_gvt_host.dev = &dev_priv->drm.pdev->dev; + intel_gvt_host.initialized = true; return 0; out_clean_types: @@ -467,6 +409,45 @@ out_clean_idr: return ret; } -#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) -MODULE_SOFTDEP("pre: kvmgt"); -#endif +int +intel_gvt_register_hypervisor(struct intel_gvt_mpt *m) +{ + int ret; + void *gvt; + + if (!intel_gvt_host.initialized) + return -ENODEV; + + if (m->type != INTEL_GVT_HYPERVISOR_KVM && + m->type != INTEL_GVT_HYPERVISOR_XEN) + return -EINVAL; + + /* Get a reference for device model module */ + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + intel_gvt_host.mpt = m; + intel_gvt_host.hypervisor_type = m->type; + gvt = (void *)kdev_to_i915(intel_gvt_host.dev)->gvt; + + ret = intel_gvt_hypervisor_host_init(intel_gvt_host.dev, gvt, + &intel_gvt_ops); + if (ret < 0) { + gvt_err("Failed to init %s hypervisor module\n", + supported_hypervisors[intel_gvt_host.hypervisor_type]); + module_put(THIS_MODULE); + return -ENODEV; + } + gvt_dbg_core("Running with hypervisor %s in host mode\n", + supported_hypervisors[intel_gvt_host.hypervisor_type]); + return 0; +} +EXPORT_SYMBOL_GPL(intel_gvt_register_hypervisor); + +void +intel_gvt_unregister_hypervisor(void) +{ + intel_gvt_hypervisor_host_exit(intel_gvt_host.dev); + module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(intel_gvt_unregister_hypervisor); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index b4ab1dad0143..8bce09de4b82 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -52,12 +52,8 @@ #define GVT_MAX_VGPU 8 -enum { - INTEL_GVT_HYPERVISOR_XEN = 0, - INTEL_GVT_HYPERVISOR_KVM, -}; - struct intel_gvt_host { + struct device *dev; bool initialized; int hypervisor_type; struct intel_gvt_mpt *mpt; @@ -540,6 +536,8 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); +void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected); + static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar) { /* We are 64bit bar. */ @@ -581,6 +579,7 @@ struct intel_gvt_ops { int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int); int (*write_protect_handler)(struct intel_vgpu *, u64, void *, unsigned int); + void (*emulate_hotplug)(struct intel_vgpu *vgpu, bool connected); }; @@ -597,7 +596,7 @@ static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv) static inline void mmio_hw_access_post(struct drm_i915_private *dev_priv) { - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } /** diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index b5475c91e2ef..9c106e47e640 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -57,6 +57,8 @@ unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt) return D_KBL; else if (IS_BROXTON(gvt->dev_priv)) return D_BXT; + else if (IS_COFFEELAKE(gvt->dev_priv)) + return D_CFL; return 0; } @@ -276,14 +278,12 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { u32 old, new; - uint32_t ack_reg_offset; + u32 ack_reg_offset; old = vgpu_vreg(vgpu, offset); new = CALC_MODE_MASK_REG(old, *(u32 *)p_data); - if (IS_SKYLAKE(vgpu->gvt->dev_priv) - || IS_KABYLAKE(vgpu->gvt->dev_priv) - || IS_BROXTON(vgpu->gvt->dev_priv)) { + if (INTEL_GEN(vgpu->gvt->dev_priv) >= 9) { switch (offset) { case FORCEWAKE_RENDER_GEN9_REG: ack_reg_offset = FORCEWAKE_ACK_RENDER_GEN9_REG; @@ -833,7 +833,7 @@ static int dp_aux_ch_ctl_trans_done(struct intel_vgpu *vgpu, u32 value, } static void dp_aux_ch_ctl_link_training(struct intel_vgpu_dpcd_data *dpcd, - uint8_t t) + u8 t) { if ((t & DPCD_TRAINING_PATTERN_SET_MASK) == DPCD_TRAINING_PATTERN_1) { /* training pattern 1 for CR */ @@ -889,9 +889,7 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu, write_vreg(vgpu, offset, p_data, bytes); data = vgpu_vreg(vgpu, offset); - if ((IS_SKYLAKE(vgpu->gvt->dev_priv) - || IS_KABYLAKE(vgpu->gvt->dev_priv) - || IS_BROXTON(vgpu->gvt->dev_priv)) + if ((INTEL_GEN(vgpu->gvt->dev_priv) >= 9) && offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) { /* SKL DPB/C/D aux ctl register changed */ return 0; @@ -919,7 +917,7 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu, if (op == GVT_AUX_NATIVE_WRITE) { int t; - uint8_t buf[16]; + u8 buf[16]; if ((addr + len + 1) >= DPCD_SIZE) { /* @@ -1407,7 +1405,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, switch (cmd) { case GEN9_PCODE_READ_MEM_LATENCY: if (IS_SKYLAKE(vgpu->gvt->dev_priv) - || IS_KABYLAKE(vgpu->gvt->dev_priv)) { + || IS_KABYLAKE(vgpu->gvt->dev_priv) + || IS_COFFEELAKE(vgpu->gvt->dev_priv)) { /** * "Read memory latency" command on gen9. * Below memory latency values are read @@ -1431,7 +1430,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, break; case SKL_PCODE_CDCLK_CONTROL: if (IS_SKYLAKE(vgpu->gvt->dev_priv) - || IS_KABYLAKE(vgpu->gvt->dev_priv)) + || IS_KABYLAKE(vgpu->gvt->dev_priv) + || IS_COFFEELAKE(vgpu->gvt->dev_priv)) *data0 = SKL_CDCLK_READY_FOR_CHANGE; break; case GEN6_PCODE_READ_RC6VIDS: @@ -3041,8 +3041,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GEN9_WM_CHICKEN3, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(_MMIO(0x4ab8), D_KBL); - MMIO_D(_MMIO(0x2248), D_KBL | D_SKL); + MMIO_D(_MMIO(0x4ab8), D_KBL | D_CFL); + MMIO_D(_MMIO(0x2248), D_SKL_PLUS); return 0; } @@ -3302,7 +3302,8 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) if (ret) goto err; } else if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv)) { + || IS_KABYLAKE(dev_priv) + || IS_COFFEELAKE(dev_priv)) { ret = init_broadwell_mmio_info(gvt); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 5af11cf1b482..5e01cc8d9b16 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -33,13 +33,19 @@ #ifndef _GVT_HYPERCALL_H_ #define _GVT_HYPERCALL_H_ +enum hypervisor_type { + INTEL_GVT_HYPERVISOR_XEN = 0, + INTEL_GVT_HYPERVISOR_KVM, +}; + /* * Specific GVT-g MPT modules function collections. Currently GVT-g supports * both Xen and KVM by providing dedicated hypervisor-related MPT modules. */ struct intel_gvt_mpt { + enum hypervisor_type type; int (*host_init)(struct device *dev, void *gvt, const void *ops); - void (*host_exit)(struct device *dev, void *gvt); + void (*host_exit)(struct device *dev); int (*attach_vgpu)(void *vgpu, unsigned long *handle); void (*detach_vgpu)(unsigned long handle); int (*inject_msi)(unsigned long handle, u32 addr, u16 data); @@ -61,12 +67,12 @@ struct intel_gvt_mpt { int (*set_trap_area)(unsigned long handle, u64 start, u64 end, bool map); int (*set_opregion)(void *vgpu); + int (*set_edid)(void *vgpu, int port_num); int (*get_vfio_device)(void *vgpu); void (*put_vfio_device)(void *vgpu); bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn); }; extern struct intel_gvt_mpt xengt_mpt; -extern struct intel_gvt_mpt kvmgt_mpt; #endif /* _GVT_HYPERCALL_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index 6b9d1354ff29..67125c5eec6e 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -581,9 +581,7 @@ static void gen8_init_irq( SET_BIT_INFO(irq, 4, PRIMARY_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C); SET_BIT_INFO(irq, 5, SPRITE_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C); - } else if (IS_SKYLAKE(gvt->dev_priv) - || IS_KABYLAKE(gvt->dev_priv) - || IS_BROXTON(gvt->dev_priv)) { + } else if (INTEL_GEN(gvt->dev_priv) >= 9) { SET_BIT_INFO(irq, 25, AUX_CHANNEL_B, INTEL_GVT_IRQ_INFO_DE_PORT); SET_BIT_INFO(irq, 26, AUX_CHANNEL_C, INTEL_GVT_IRQ_INFO_DE_PORT); SET_BIT_INFO(irq, 27, AUX_CHANNEL_D, INTEL_GVT_IRQ_INFO_DE_PORT); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index c1072143da1d..63eef86a2a85 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -57,6 +57,8 @@ static const struct intel_gvt_ops *intel_gvt_ops; #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) +#define EDID_BLOB_OFFSET (PAGE_SIZE/2) + #define OPREGION_SIGNATURE "IntelGraphicsMem" struct vfio_region; @@ -76,6 +78,11 @@ struct vfio_region { void *data; }; +struct vfio_edid_region { + struct vfio_region_gfx_edid vfio_edid_regs; + void *edid_blob; +}; + struct kvmgt_pgfn { gfn_t gfn; struct hlist_node hnode; @@ -427,6 +434,111 @@ static const struct intel_vgpu_regops intel_vgpu_regops_opregion = { .release = intel_vgpu_reg_release_opregion, }; +static int handle_edid_regs(struct intel_vgpu *vgpu, + struct vfio_edid_region *region, char *buf, + size_t count, u16 offset, bool is_write) +{ + struct vfio_region_gfx_edid *regs = ®ion->vfio_edid_regs; + unsigned int data; + + if (offset + count > sizeof(*regs)) + return -EINVAL; + + if (count != 4) + return -EINVAL; + + if (is_write) { + data = *((unsigned int *)buf); + switch (offset) { + case offsetof(struct vfio_region_gfx_edid, link_state): + if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) { + if (!drm_edid_block_valid( + (u8 *)region->edid_blob, + 0, + true, + NULL)) { + gvt_vgpu_err("invalid EDID blob\n"); + return -EINVAL; + } + intel_gvt_ops->emulate_hotplug(vgpu, true); + } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN) + intel_gvt_ops->emulate_hotplug(vgpu, false); + else { + gvt_vgpu_err("invalid EDID link state %d\n", + regs->link_state); + return -EINVAL; + } + regs->link_state = data; + break; + case offsetof(struct vfio_region_gfx_edid, edid_size): + if (data > regs->edid_max_size) { + gvt_vgpu_err("EDID size is bigger than %d!\n", + regs->edid_max_size); + return -EINVAL; + } + regs->edid_size = data; + break; + default: + /* read-only regs */ + gvt_vgpu_err("write read-only EDID region at offset %d\n", + offset); + return -EPERM; + } + } else { + memcpy(buf, (char *)regs + offset, count); + } + + return count; +} + +static int handle_edid_blob(struct vfio_edid_region *region, char *buf, + size_t count, u16 offset, bool is_write) +{ + if (offset + count > region->vfio_edid_regs.edid_size) + return -EINVAL; + + if (is_write) + memcpy(region->edid_blob + offset, buf, count); + else + memcpy(buf, region->edid_blob + offset, count); + + return count; +} + +static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf, + size_t count, loff_t *ppos, bool iswrite) +{ + int ret; + unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - + VFIO_PCI_NUM_REGIONS; + struct vfio_edid_region *region = + (struct vfio_edid_region *)vgpu->vdev.region[i].data; + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + + if (pos < region->vfio_edid_regs.edid_offset) { + ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite); + } else { + pos -= EDID_BLOB_OFFSET; + ret = handle_edid_blob(region, buf, count, pos, iswrite); + } + + if (ret < 0) + gvt_vgpu_err("failed to access EDID region\n"); + + return ret; +} + +static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu, + struct vfio_region *region) +{ + kfree(region->data); +} + +static const struct intel_vgpu_regops intel_vgpu_regops_edid = { + .rw = intel_vgpu_reg_rw_edid, + .release = intel_vgpu_reg_release_edid, +}; + static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, unsigned int type, unsigned int subtype, const struct intel_vgpu_regops *ops, @@ -493,6 +605,36 @@ static int kvmgt_set_opregion(void *p_vgpu) return ret; } +static int kvmgt_set_edid(void *p_vgpu, int port_num) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; + struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); + struct vfio_edid_region *base; + int ret; + + base = kzalloc(sizeof(*base), GFP_KERNEL); + if (!base) + return -ENOMEM; + + /* TODO: Add multi-port and EDID extension block support */ + base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET; + base->vfio_edid_regs.edid_max_size = EDID_SIZE; + base->vfio_edid_regs.edid_size = EDID_SIZE; + base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id); + base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id); + base->edid_blob = port->edid->edid_block; + + ret = intel_vgpu_register_reg(vgpu, + VFIO_REGION_TYPE_GFX, + VFIO_REGION_SUBTYPE_GFX_EDID, + &intel_vgpu_regops_edid, EDID_SIZE, + VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_CAPS, base); + + return ret; +} + static void kvmgt_put_vfio_device(void *vgpu) { if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device)) @@ -627,6 +769,12 @@ static int intel_vgpu_open(struct mdev_device *mdev) goto undo_iommu; } + /* Take a module reference as mdev core doesn't take + * a reference for vendor driver. + */ + if (!try_module_get(THIS_MODULE)) + goto undo_group; + ret = kvmgt_guest_init(mdev); if (ret) goto undo_group; @@ -679,6 +827,9 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) &vgpu->vdev.group_notifier); WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret); + /* dereference module reference taken at open */ + module_put(THIS_MODULE); + info = (struct kvmgt_guest_info *)vgpu->handle; kvmgt_guest_exit(info); @@ -703,7 +854,7 @@ static void intel_vgpu_release_work(struct work_struct *work) __intel_vgpu_release(vgpu); } -static uint64_t intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) +static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) { u32 start_lo, start_hi; u32 mem_type; @@ -730,10 +881,10 @@ static uint64_t intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) return ((u64)start_hi << 32) | start_lo; } -static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, uint64_t off, +static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off, void *buf, unsigned int count, bool is_write) { - uint64_t bar_start = intel_vgpu_get_bar_addr(vgpu, bar); + u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar); int ret; if (is_write) @@ -745,13 +896,13 @@ static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, uint64_t off, return ret; } -static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, uint64_t off) +static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off) { return off >= vgpu_aperture_offset(vgpu) && off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu); } -static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, uint64_t off, +static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off, void *buf, unsigned long count, bool is_write) { void *aperture_va; @@ -783,7 +934,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, { struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + u64 pos = *ppos & VFIO_PCI_OFFSET_MASK; int ret = -EINVAL; @@ -1029,7 +1180,7 @@ static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type) static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu, unsigned int index, unsigned int start, - unsigned int count, uint32_t flags, + unsigned int count, u32 flags, void *data) { return 0; @@ -1037,21 +1188,21 @@ static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu, static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu, unsigned int index, unsigned int start, - unsigned int count, uint32_t flags, void *data) + unsigned int count, u32 flags, void *data) { return 0; } static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu, unsigned int index, unsigned int start, unsigned int count, - uint32_t flags, void *data) + u32 flags, void *data) { return 0; } static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu, unsigned int index, unsigned int start, unsigned int count, - uint32_t flags, void *data) + u32 flags, void *data) { struct eventfd_ctx *trigger; @@ -1070,12 +1221,12 @@ static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu, return 0; } -static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags, +static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags, unsigned int index, unsigned int start, unsigned int count, void *data) { int (*func)(struct intel_vgpu *vgpu, unsigned int index, - unsigned int start, unsigned int count, uint32_t flags, + unsigned int start, unsigned int count, u32 flags, void *data) = NULL; switch (index) { @@ -1467,7 +1618,7 @@ static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) return mdev_register_device(dev, &intel_vgpu_ops); } -static void kvmgt_host_exit(struct device *dev, void *gvt) +static void kvmgt_host_exit(struct device *dev) { mdev_unregister_device(dev); } @@ -1849,7 +2000,8 @@ static bool kvmgt_is_valid_gfn(unsigned long handle, unsigned long gfn) return ret; } -struct intel_gvt_mpt kvmgt_mpt = { +static struct intel_gvt_mpt kvmgt_mpt = { + .type = INTEL_GVT_HYPERVISOR_KVM, .host_init = kvmgt_host_init, .host_exit = kvmgt_host_exit, .attach_vgpu = kvmgt_attach_vgpu, @@ -1864,19 +2016,22 @@ struct intel_gvt_mpt kvmgt_mpt = { .dma_map_guest_page = kvmgt_dma_map_guest_page, .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, .set_opregion = kvmgt_set_opregion, + .set_edid = kvmgt_set_edid, .get_vfio_device = kvmgt_get_vfio_device, .put_vfio_device = kvmgt_put_vfio_device, .is_valid_gfn = kvmgt_is_valid_gfn, }; -EXPORT_SYMBOL_GPL(kvmgt_mpt); static int __init kvmgt_init(void) { + if (intel_gvt_register_hypervisor(&kvmgt_mpt) < 0) + return -ENODEV; return 0; } static void __exit kvmgt_exit(void) { + intel_gvt_unregister_hypervisor(); } module_init(kvmgt_init); diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 43f65848ecd6..ed4df2f6d60b 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -57,7 +57,7 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) (reg >= gvt->device_info.gtt_start_offset \ && reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) -static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, +static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, u64 pa, void *p_data, unsigned int bytes, bool read) { struct intel_gvt *gvt = NULL; @@ -99,7 +99,7 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, * Returns: * Zero on success, negative error code if failed */ -int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, +int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, u64 pa, void *p_data, unsigned int bytes) { struct intel_gvt *gvt = vgpu->gvt; @@ -171,7 +171,7 @@ out: * Returns: * Zero on success, negative error code if failed */ -int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, +int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, u64 pa, void *p_data, unsigned int bytes) { struct intel_gvt *gvt = vgpu->gvt; diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 1ffc69eba30e..5874f1cb4306 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -43,15 +43,16 @@ struct intel_vgpu; #define D_SKL (1 << 1) #define D_KBL (1 << 2) #define D_BXT (1 << 3) +#define D_CFL (1 << 4) -#define D_GEN9PLUS (D_SKL | D_KBL | D_BXT) -#define D_GEN8PLUS (D_BDW | D_SKL | D_KBL | D_BXT) +#define D_GEN9PLUS (D_SKL | D_KBL | D_BXT | D_CFL) +#define D_GEN8PLUS (D_BDW | D_SKL | D_KBL | D_BXT | D_CFL) -#define D_SKL_PLUS (D_SKL | D_KBL | D_BXT) -#define D_BDW_PLUS (D_BDW | D_SKL | D_KBL | D_BXT) +#define D_SKL_PLUS (D_SKL | D_KBL | D_BXT | D_CFL) +#define D_BDW_PLUS (D_BDW | D_SKL | D_KBL | D_BXT | D_CFL) #define D_PRE_SKL (D_BDW) -#define D_ALL (D_BDW | D_SKL | D_KBL | D_BXT) +#define D_ALL (D_BDW | D_SKL | D_KBL | D_BXT | D_CFL) typedef int (*gvt_mmio_func)(struct intel_vgpu *, unsigned int, void *, unsigned int); diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index d6e02c15ef97..7d84cfb9051a 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -353,8 +353,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) */ fw = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ | FW_REG_WRITE); - if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || - IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv))) + if (ring_id == RCS && (INTEL_GEN(dev_priv) >= 9)) fw |= FORCEWAKE_RENDER; intel_uncore_forcewake_get(dev_priv, fw); @@ -391,7 +390,8 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; - if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv)) && ring_id == RCS) + if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv) + || IS_COFFEELAKE(dev_priv)) && ring_id == RCS) return; if (!pre && !gen9_render_mocs.initialized) @@ -457,9 +457,7 @@ static void switch_mmio(struct intel_vgpu *pre, u32 old_v, new_v; dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv; - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) switch_mocs(pre, next, ring_id); for (mmio = dev_priv->gvt->engine_mmio_list.mmio; @@ -471,8 +469,8 @@ static void switch_mmio(struct intel_vgpu *pre, * state image on kabylake, it's initialized by lri command and * save or restore with context together. */ - if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv)) - && mmio->in_context) + if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv) + || IS_COFFEELAKE(dev_priv)) && mmio->in_context) continue; // save @@ -565,9 +563,7 @@ void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt) { struct engine_mmio *mmio; - if (IS_SKYLAKE(gvt->dev_priv) || - IS_KABYLAKE(gvt->dev_priv) || - IS_BROXTON(gvt->dev_priv)) + if (INTEL_GEN(gvt->dev_priv) >= 9) gvt->engine_mmio_list.mmio = gen9_engine_mmio_list; else gvt->engine_mmio_list.mmio = gen8_engine_mmio_list; diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 67f19992b226..5d8b8f228d8f 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -50,11 +50,10 @@ * Zero on success, negative error code if failed */ static inline int intel_gvt_hypervisor_host_init(struct device *dev, - void *gvt, const void *ops) + void *gvt, const void *ops) { - /* optional to provide */ if (!intel_gvt_host.mpt->host_init) - return 0; + return -ENODEV; return intel_gvt_host.mpt->host_init(dev, gvt, ops); } @@ -62,14 +61,13 @@ static inline int intel_gvt_hypervisor_host_init(struct device *dev, /** * intel_gvt_hypervisor_host_exit - exit GVT-g host side */ -static inline void intel_gvt_hypervisor_host_exit(struct device *dev, - void *gvt) +static inline void intel_gvt_hypervisor_host_exit(struct device *dev) { /* optional to provide */ if (!intel_gvt_host.mpt->host_exit) return; - intel_gvt_host.mpt->host_exit(dev, gvt); + intel_gvt_host.mpt->host_exit(dev); } /** @@ -316,6 +314,23 @@ static inline int intel_gvt_hypervisor_set_opregion(struct intel_vgpu *vgpu) } /** + * intel_gvt_hypervisor_set_edid - Set EDID region for guest + * @vgpu: a vGPU + * @port_num: display port number + * + * Returns: + * Zero on success, negative error code if failed. + */ +static inline int intel_gvt_hypervisor_set_edid(struct intel_vgpu *vgpu, + int port_num) +{ + if (!intel_gvt_host.mpt->set_edid) + return 0; + + return intel_gvt_host.mpt->set_edid(vgpu, port_num); +} + +/** * intel_gvt_hypervisor_get_vfio_device - increase vfio device ref count * @vgpu: a vGPU * @@ -362,4 +377,7 @@ static inline bool intel_gvt_hypervisor_is_valid_gfn( return intel_gvt_host.mpt->is_valid_gfn(vgpu->handle, gfn); } +int intel_gvt_register_hypervisor(struct intel_gvt_mpt *); +void intel_gvt_unregister_hypervisor(void); + #endif /* _GVT_MPT_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index c32e7d5e8629..1c763a27a412 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -94,7 +94,7 @@ static void gvt_balance_timeslice(struct gvt_sched_data *sched_data) { struct vgpu_sched_data *vgpu_data; struct list_head *pos; - static uint64_t stage_check; + static u64 stage_check; int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM; /* The timeslice accumulation reset at stage 0, which is @@ -474,6 +474,6 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) } } spin_unlock_bh(&scheduler->mmio_context_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); mutex_unlock(&vgpu->gvt->sched_lock); } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 1ad8c5e1455d..b7957eefb976 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -299,7 +299,8 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) void *shadow_ring_buffer_va; u32 *cs; - if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915)) + if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915) + || IS_COFFEELAKE(req->i915)) && is_inhibit_context(req->hw_context)) intel_vgpu_restore_inhibit_context(vgpu, req); @@ -939,9 +940,7 @@ static int workload_thread(void *priv) struct intel_vgpu_workload *workload = NULL; struct intel_vgpu *vgpu = NULL; int ret; - bool need_force_wake = IS_SKYLAKE(gvt->dev_priv) - || IS_KABYLAKE(gvt->dev_priv) - || IS_BROXTON(gvt->dev_priv); + bool need_force_wake = (INTEL_GEN(gvt->dev_priv) >= 9); DEFINE_WAIT_FUNC(wait, woken_wake_function); kfree(p); @@ -997,7 +996,7 @@ complete: intel_uncore_forcewake_put(gvt->dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put(gvt->dev_priv); + intel_runtime_pm_put_unchecked(gvt->dev_priv); if (ret && (vgpu_is_vm_unhealthy(ret))) enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); } @@ -1451,7 +1450,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, mutex_lock(&dev_priv->drm.struct_mutex); ret = intel_gvt_scan_and_shadow_workload(workload); mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } if (ret && (vgpu_is_vm_unhealthy(ret))) { diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index ca5529d0e48e..1e9eec6a32fe 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -61,7 +61,7 @@ struct shadow_indirect_ctx { unsigned long guest_gma; unsigned long shadow_gma; void *shadow_va; - uint32_t size; + u32 size; }; #define PER_CTX_ADDR_MASK 0xfffff000 diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 1fd64202d74e..6d787750d279 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -228,7 +228,7 @@ TRACE_EVENT(oos_sync, TRACE_EVENT(gvt_command, TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, u32 buf_type, u32 buf_addr_type, - void *workload, char *cmd_name), + void *workload, const char *cmd_name), TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type, buf_addr_type, workload, cmd_name), diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index e1c860f80eb0..720e2b10adaa 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -428,6 +428,12 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_sched_policy; + /*TODO: add more platforms support */ + if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) + ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); + if (ret) + goto out_clean_sched_policy; + return vgpu; out_clean_sched_policy: diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c new file mode 100644 index 000000000000..215b6ff8aa73 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_active.c @@ -0,0 +1,286 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_active.h" + +#define BKL(ref) (&(ref)->i915->drm.struct_mutex) + +/* + * Active refs memory management + * + * To be more economical with memory, we reap all the i915_active trees as + * they idle (when we know the active requests are inactive) and allocate the + * nodes from a local slab cache to hopefully reduce the fragmentation. + */ +static struct i915_global_active { + struct kmem_cache *slab_cache; +} global; + +struct active_node { + struct i915_active_request base; + struct i915_active *ref; + struct rb_node node; + u64 timeline; +}; + +static void +__active_park(struct i915_active *ref) +{ + struct active_node *it, *n; + + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + GEM_BUG_ON(i915_active_request_isset(&it->base)); + kmem_cache_free(global.slab_cache, it); + } + ref->tree = RB_ROOT; +} + +static void +__active_retire(struct i915_active *ref) +{ + GEM_BUG_ON(!ref->count); + if (--ref->count) + return; + + /* return the unused nodes to our slabcache */ + __active_park(ref); + + ref->retire(ref); +} + +static void +node_retire(struct i915_active_request *base, struct i915_request *rq) +{ + __active_retire(container_of(base, struct active_node, base)->ref); +} + +static void +last_retire(struct i915_active_request *base, struct i915_request *rq) +{ + __active_retire(container_of(base, struct i915_active, last)); +} + +static struct i915_active_request * +active_instance(struct i915_active *ref, u64 idx) +{ + struct active_node *node; + struct rb_node **p, *parent; + struct i915_request *old; + + /* + * We track the most recently used timeline to skip a rbtree search + * for the common case, under typical loads we never need the rbtree + * at all. We can reuse the last slot if it is empty, that is + * after the previous activity has been retired, or if it matches the + * current timeline. + * + * Note that we allow the timeline to be active simultaneously in + * the rbtree and the last cache. We do this to avoid having + * to search and replace the rbtree element for a new timeline, with + * the cost being that we must be aware that the ref may be retired + * twice for the same timeline (as the older rbtree element will be + * retired before the new request added to last). + */ + old = i915_active_request_raw(&ref->last, BKL(ref)); + if (!old || old->fence.context == idx) + goto out; + + /* Move the currently active fence into the rbtree */ + idx = old->fence.context; + + parent = NULL; + p = &ref->tree.rb_node; + while (*p) { + parent = *p; + + node = rb_entry(parent, struct active_node, node); + if (node->timeline == idx) + goto replace; + + if (node->timeline < idx) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + + node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); + + /* kmalloc may retire the ref->last (thanks shrinker)! */ + if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) { + kmem_cache_free(global.slab_cache, node); + goto out; + } + + if (unlikely(!node)) + return ERR_PTR(-ENOMEM); + + i915_active_request_init(&node->base, NULL, node_retire); + node->ref = ref; + node->timeline = idx; + + rb_link_node(&node->node, parent, p); + rb_insert_color(&node->node, &ref->tree); + +replace: + /* + * Overwrite the previous active slot in the rbtree with last, + * leaving last zeroed. If the previous slot is still active, + * we must be careful as we now only expect to receive one retire + * callback not two, and so much undo the active counting for the + * overwritten slot. + */ + if (i915_active_request_isset(&node->base)) { + /* Retire ourselves from the old rq->active_list */ + __list_del_entry(&node->base.link); + ref->count--; + GEM_BUG_ON(!ref->count); + } + GEM_BUG_ON(list_empty(&ref->last.link)); + list_replace_init(&ref->last.link, &node->base.link); + node->base.request = fetch_and_zero(&ref->last.request); + +out: + return &ref->last; +} + +void i915_active_init(struct drm_i915_private *i915, + struct i915_active *ref, + void (*retire)(struct i915_active *ref)) +{ + ref->i915 = i915; + ref->retire = retire; + ref->tree = RB_ROOT; + i915_active_request_init(&ref->last, NULL, last_retire); + ref->count = 0; +} + +int i915_active_ref(struct i915_active *ref, + u64 timeline, + struct i915_request *rq) +{ + struct i915_active_request *active; + + active = active_instance(ref, timeline); + if (IS_ERR(active)) + return PTR_ERR(active); + + if (!i915_active_request_isset(active)) + ref->count++; + __i915_active_request_set(active, rq); + + GEM_BUG_ON(!ref->count); + return 0; +} + +bool i915_active_acquire(struct i915_active *ref) +{ + lockdep_assert_held(BKL(ref)); + return !ref->count++; +} + +void i915_active_release(struct i915_active *ref) +{ + lockdep_assert_held(BKL(ref)); + __active_retire(ref); +} + +int i915_active_wait(struct i915_active *ref) +{ + struct active_node *it, *n; + int ret = 0; + + if (i915_active_acquire(ref)) + goto out_release; + + ret = i915_active_request_retire(&ref->last, BKL(ref)); + if (ret) + goto out_release; + + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + ret = i915_active_request_retire(&it->base, BKL(ref)); + if (ret) + break; + } + +out_release: + i915_active_release(ref); + return ret; +} + +int i915_request_await_active_request(struct i915_request *rq, + struct i915_active_request *active) +{ + struct i915_request *barrier = + i915_active_request_raw(active, &rq->i915->drm.struct_mutex); + + return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; +} + +int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) +{ + struct active_node *it, *n; + int ret; + + ret = i915_request_await_active_request(rq, &ref->last); + if (ret) + return ret; + + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + ret = i915_request_await_active_request(rq, &it->base); + if (ret) + return ret; + } + + return 0; +} + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +void i915_active_fini(struct i915_active *ref) +{ + GEM_BUG_ON(i915_active_request_isset(&ref->last)); + GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); + GEM_BUG_ON(ref->count); +} +#endif + +int i915_active_request_set(struct i915_active_request *active, + struct i915_request *rq) +{ + int err; + + /* Must maintain ordering wrt previous active requests */ + err = i915_request_await_active_request(rq, active); + if (err) + return err; + + __i915_active_request_set(active, rq); + return 0; +} + +void i915_active_retire_noop(struct i915_active_request *active, + struct i915_request *request) +{ + /* Space left intentionally blank */ +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_active.c" +#endif + +int __init i915_global_active_init(void) +{ + global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); + if (!global.slab_cache) + return -ENOMEM; + + return 0; +} + +void __exit i915_global_active_exit(void) +{ + kmem_cache_destroy(global.slab_cache); +} diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h new file mode 100644 index 000000000000..12b5c1d287d1 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_active.h @@ -0,0 +1,425 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef _I915_ACTIVE_H_ +#define _I915_ACTIVE_H_ + +#include <linux/lockdep.h> + +#include "i915_active_types.h" +#include "i915_request.h" + +/* + * We treat requests as fences. This is not be to confused with our + * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. + * We use the fences to synchronize access from the CPU with activity on the + * GPU, for example, we should not rewrite an object's PTE whilst the GPU + * is reading them. We also track fences at a higher level to provide + * implicit synchronisation around GEM objects, e.g. set-domain will wait + * for outstanding GPU rendering before marking the object ready for CPU + * access, or a pageflip will wait until the GPU is complete before showing + * the frame on the scanout. + * + * In order to use a fence, the object must track the fence it needs to + * serialise with. For example, GEM objects want to track both read and + * write access so that we can perform concurrent read operations between + * the CPU and GPU engines, as well as waiting for all rendering to + * complete, or waiting for the last GPU user of a "fence register". The + * object then embeds a #i915_active_request to track the most recent (in + * retirement order) request relevant for the desired mode of access. + * The #i915_active_request is updated with i915_active_request_set() to + * track the most recent fence request, typically this is done as part of + * i915_vma_move_to_active(). + * + * When the #i915_active_request completes (is retired), it will + * signal its completion to the owner through a callback as well as mark + * itself as idle (i915_active_request.request == NULL). The owner + * can then perform any action, such as delayed freeing of an active + * resource including itself. + */ + +void i915_active_retire_noop(struct i915_active_request *active, + struct i915_request *request); + +/** + * i915_active_request_init - prepares the activity tracker for use + * @active - the active tracker + * @rq - initial request to track, can be NULL + * @func - a callback when then the tracker is retired (becomes idle), + * can be NULL + * + * i915_active_request_init() prepares the embedded @active struct for use as + * an activity tracker, that is for tracking the last known active request + * associated with it. When the last request becomes idle, when it is retired + * after completion, the optional callback @func is invoked. + */ +static inline void +i915_active_request_init(struct i915_active_request *active, + struct i915_request *rq, + i915_active_retire_fn retire) +{ + RCU_INIT_POINTER(active->request, rq); + INIT_LIST_HEAD(&active->link); + active->retire = retire ?: i915_active_retire_noop; +} + +#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL) + +/** + * i915_active_request_set - updates the tracker to watch the current request + * @active - the active tracker + * @request - the request to watch + * + * __i915_active_request_set() watches the given @request for completion. Whilst + * that @request is busy, the @active reports busy. When that @request is + * retired, the @active tracker is updated to report idle. + */ +static inline void +__i915_active_request_set(struct i915_active_request *active, + struct i915_request *request) +{ + list_move(&active->link, &request->active_list); + rcu_assign_pointer(active->request, request); +} + +int __must_check +i915_active_request_set(struct i915_active_request *active, + struct i915_request *rq); + +/** + * i915_active_request_set_retire_fn - updates the retirement callback + * @active - the active tracker + * @fn - the routine called when the request is retired + * @mutex - struct_mutex used to guard retirements + * + * i915_active_request_set_retire_fn() updates the function pointer that + * is called when the final request associated with the @active tracker + * is retired. + */ +static inline void +i915_active_request_set_retire_fn(struct i915_active_request *active, + i915_active_retire_fn fn, + struct mutex *mutex) +{ + lockdep_assert_held(mutex); + active->retire = fn ?: i915_active_retire_noop; +} + +static inline struct i915_request * +__i915_active_request_peek(const struct i915_active_request *active) +{ + /* + * Inside the error capture (running with the driver in an unknown + * state), we want to bend the rules slightly (a lot). + * + * Work is in progress to make it safer, in the meantime this keeps + * the known issue from spamming the logs. + */ + return rcu_dereference_protected(active->request, 1); +} + +/** + * i915_active_request_raw - return the active request + * @active - the active tracker + * + * i915_active_request_raw() returns the current request being tracked, or NULL. + * It does not obtain a reference on the request for the caller, so the caller + * must hold struct_mutex. + */ +static inline struct i915_request * +i915_active_request_raw(const struct i915_active_request *active, + struct mutex *mutex) +{ + return rcu_dereference_protected(active->request, + lockdep_is_held(mutex)); +} + +/** + * i915_active_request_peek - report the active request being monitored + * @active - the active tracker + * + * i915_active_request_peek() returns the current request being tracked if + * still active, or NULL. It does not obtain a reference on the request + * for the caller, so the caller must hold struct_mutex. + */ +static inline struct i915_request * +i915_active_request_peek(const struct i915_active_request *active, + struct mutex *mutex) +{ + struct i915_request *request; + + request = i915_active_request_raw(active, mutex); + if (!request || i915_request_completed(request)) + return NULL; + + return request; +} + +/** + * i915_active_request_get - return a reference to the active request + * @active - the active tracker + * + * i915_active_request_get() returns a reference to the active request, or NULL + * if the active tracker is idle. The caller must hold struct_mutex. + */ +static inline struct i915_request * +i915_active_request_get(const struct i915_active_request *active, + struct mutex *mutex) +{ + return i915_request_get(i915_active_request_peek(active, mutex)); +} + +/** + * __i915_active_request_get_rcu - return a reference to the active request + * @active - the active tracker + * + * __i915_active_request_get() returns a reference to the active request, + * or NULL if the active tracker is idle. The caller must hold the RCU read + * lock, but the returned pointer is safe to use outside of RCU. + */ +static inline struct i915_request * +__i915_active_request_get_rcu(const struct i915_active_request *active) +{ + /* + * Performing a lockless retrieval of the active request is super + * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing + * slab of request objects will not be freed whilst we hold the + * RCU read lock. It does not guarantee that the request itself + * will not be freed and then *reused*. Viz, + * + * Thread A Thread B + * + * rq = active.request + * retire(rq) -> free(rq); + * (rq is now first on the slab freelist) + * active.request = NULL + * + * rq = new submission on a new object + * ref(rq) + * + * To prevent the request from being reused whilst the caller + * uses it, we take a reference like normal. Whilst acquiring + * the reference we check that it is not in a destroyed state + * (refcnt == 0). That prevents the request being reallocated + * whilst the caller holds on to it. To check that the request + * was not reallocated as we acquired the reference we have to + * check that our request remains the active request across + * the lookup, in the same manner as a seqlock. The visibility + * of the pointer versus the reference counting is controlled + * by using RCU barriers (rcu_dereference and rcu_assign_pointer). + * + * In the middle of all that, we inspect whether the request is + * complete. Retiring is lazy so the request may be completed long + * before the active tracker is updated. Querying whether the + * request is complete is far cheaper (as it involves no locked + * instructions setting cachelines to exclusive) than acquiring + * the reference, so we do it first. The RCU read lock ensures the + * pointer dereference is valid, but does not ensure that the + * seqno nor HWS is the right one! However, if the request was + * reallocated, that means the active tracker's request was complete. + * If the new request is also complete, then both are and we can + * just report the active tracker is idle. If the new request is + * incomplete, then we acquire a reference on it and check that + * it remained the active request. + * + * It is then imperative that we do not zero the request on + * reallocation, so that we can chase the dangling pointers! + * See i915_request_alloc(). + */ + do { + struct i915_request *request; + + request = rcu_dereference(active->request); + if (!request || i915_request_completed(request)) + return NULL; + + /* + * An especially silly compiler could decide to recompute the + * result of i915_request_completed, more specifically + * re-emit the load for request->fence.seqno. A race would catch + * a later seqno value, which could flip the result from true to + * false. Which means part of the instructions below might not + * be executed, while later on instructions are executed. Due to + * barriers within the refcounting the inconsistency can't reach + * past the call to i915_request_get_rcu, but not executing + * that while still executing i915_request_put() creates + * havoc enough. Prevent this with a compiler barrier. + */ + barrier(); + + request = i915_request_get_rcu(request); + + /* + * What stops the following rcu_access_pointer() from occurring + * before the above i915_request_get_rcu()? If we were + * to read the value before pausing to get the reference to + * the request, we may not notice a change in the active + * tracker. + * + * The rcu_access_pointer() is a mere compiler barrier, which + * means both the CPU and compiler are free to perform the + * memory read without constraint. The compiler only has to + * ensure that any operations after the rcu_access_pointer() + * occur afterwards in program order. This means the read may + * be performed earlier by an out-of-order CPU, or adventurous + * compiler. + * + * The atomic operation at the heart of + * i915_request_get_rcu(), see dma_fence_get_rcu(), is + * atomic_inc_not_zero() which is only a full memory barrier + * when successful. That is, if i915_request_get_rcu() + * returns the request (and so with the reference counted + * incremented) then the following read for rcu_access_pointer() + * must occur after the atomic operation and so confirm + * that this request is the one currently being tracked. + * + * The corresponding write barrier is part of + * rcu_assign_pointer(). + */ + if (!request || request == rcu_access_pointer(active->request)) + return rcu_pointer_handoff(request); + + i915_request_put(request); + } while (1); +} + +/** + * i915_active_request_get_unlocked - return a reference to the active request + * @active - the active tracker + * + * i915_active_request_get_unlocked() returns a reference to the active request, + * or NULL if the active tracker is idle. The reference is obtained under RCU, + * so no locking is required by the caller. + * + * The reference should be freed with i915_request_put(). + */ +static inline struct i915_request * +i915_active_request_get_unlocked(const struct i915_active_request *active) +{ + struct i915_request *request; + + rcu_read_lock(); + request = __i915_active_request_get_rcu(active); + rcu_read_unlock(); + + return request; +} + +/** + * i915_active_request_isset - report whether the active tracker is assigned + * @active - the active tracker + * + * i915_active_request_isset() returns true if the active tracker is currently + * assigned to a request. Due to the lazy retiring, that request may be idle + * and this may report stale information. + */ +static inline bool +i915_active_request_isset(const struct i915_active_request *active) +{ + return rcu_access_pointer(active->request); +} + +/** + * i915_active_request_retire - waits until the request is retired + * @active - the active request on which to wait + * + * i915_active_request_retire() waits until the request is completed, + * and then ensures that at least the retirement handler for this + * @active tracker is called before returning. If the @active + * tracker is idle, the function returns immediately. + */ +static inline int __must_check +i915_active_request_retire(struct i915_active_request *active, + struct mutex *mutex) +{ + struct i915_request *request; + long ret; + + request = i915_active_request_raw(active, mutex); + if (!request) + return 0; + + ret = i915_request_wait(request, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (ret < 0) + return ret; + + list_del_init(&active->link); + RCU_INIT_POINTER(active->request, NULL); + + active->retire(active, request); + + return 0; +} + +/* + * GPU activity tracking + * + * Each set of commands submitted to the GPU compromises a single request that + * signals a fence upon completion. struct i915_request combines the + * command submission, scheduling and fence signaling roles. If we want to see + * if a particular task is complete, we need to grab the fence (struct + * i915_request) for that task and check or wait for it to be signaled. More + * often though we want to track the status of a bunch of tasks, for example + * to wait for the GPU to finish accessing some memory across a variety of + * different command pipelines from different clients. We could choose to + * track every single request associated with the task, but knowing that + * each request belongs to an ordered timeline (later requests within a + * timeline must wait for earlier requests), we need only track the + * latest request in each timeline to determine the overall status of the + * task. + * + * struct i915_active provides this tracking across timelines. It builds a + * composite shared-fence, and is updated as new work is submitted to the task, + * forming a snapshot of the current status. It should be embedded into the + * different resources that need to track their associated GPU activity to + * provide a callback when that GPU activity has ceased, or otherwise to + * provide a serialisation point either for request submission or for CPU + * synchronisation. + */ + +void i915_active_init(struct drm_i915_private *i915, + struct i915_active *ref, + void (*retire)(struct i915_active *ref)); + +int i915_active_ref(struct i915_active *ref, + u64 timeline, + struct i915_request *rq); + +int i915_active_wait(struct i915_active *ref); + +int i915_request_await_active(struct i915_request *rq, + struct i915_active *ref); +int i915_request_await_active_request(struct i915_request *rq, + struct i915_active_request *active); + +bool i915_active_acquire(struct i915_active *ref); + +static inline void i915_active_cancel(struct i915_active *ref) +{ + GEM_BUG_ON(ref->count != 1); + ref->count = 0; +} + +void i915_active_release(struct i915_active *ref); + +static inline bool +i915_active_is_idle(const struct i915_active *ref) +{ + return !ref->count; +} + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +void i915_active_fini(struct i915_active *ref); +#else +static inline void i915_active_fini(struct i915_active *ref) { } +#endif + +int i915_global_active_init(void); +void i915_global_active_exit(void); + +#endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h new file mode 100644 index 000000000000..b679253b53a5 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef _I915_ACTIVE_TYPES_H_ +#define _I915_ACTIVE_TYPES_H_ + +#include <linux/rbtree.h> +#include <linux/rcupdate.h> + +struct drm_i915_private; +struct i915_active_request; +struct i915_request; + +typedef void (*i915_active_retire_fn)(struct i915_active_request *, + struct i915_request *); + +struct i915_active_request { + struct i915_request __rcu *request; + struct list_head link; + i915_active_retire_fn retire; +}; + +struct i915_active { + struct drm_i915_private *i915; + + struct rb_root tree; + struct i915_active_request last; + unsigned int count; + + void (*retire)(struct i915_active *ref); +}; + +#endif /* _I915_ACTIVE_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 978da8838393..0bd890c04fe4 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -33,6 +33,8 @@ #include "intel_drv.h" #include "intel_guc_submission.h" +#include "i915_reset.h" + static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) { return to_i915(node->minor->dev); @@ -158,14 +160,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : ""); if (obj->base.name) seq_printf(m, " (name: %d)", obj->base.name); - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (i915_vma_is_pinned(vma)) pin_count++; } seq_printf(m, " (pinned x %d)", pin_count); if (obj->pin_global) seq_printf(m, " (global)"); - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -205,7 +207,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) if (vma->fence) seq_printf(m, " , fence: %d%s", vma->fence->id, - i915_gem_active_isset(&vma->last_fence) ? "*" : ""); + i915_active_request_isset(&vma->last_fence) ? "*" : ""); seq_puts(m, ")"); } if (obj->stolen) @@ -321,7 +323,7 @@ static int per_file_stats(int id, void *ptr, void *data) if (obj->base.name || obj->base.dma_buf) stats->shared += obj->base.size; - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -627,10 +629,12 @@ static void gen8_display_interrupt_info(struct seq_file *m) for_each_pipe(dev_priv, pipe) { enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, - power_domain)) { + wakeref = intel_display_power_get_if_enabled(dev_priv, + power_domain); + if (!wakeref) { seq_printf(m, "Pipe %c power disabled\n", pipe_name(pipe)); continue; @@ -645,7 +649,7 @@ static void gen8_display_interrupt_info(struct seq_file *m) pipe_name(pipe), I915_READ(GEN8_DE_PIPE_IER(pipe))); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); } seq_printf(m, "Display Engine port interrupt mask:\t%08x\n", @@ -675,11 +679,14 @@ static int i915_interrupt_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int i, pipe; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); if (IS_CHERRYVIEW(dev_priv)) { + intel_wakeref_t pref; + seq_printf(m, "Master Interrupt Control:\t%08x\n", I915_READ(GEN8_MASTER_IRQ)); @@ -695,8 +702,9 @@ static int i915_interrupt_info(struct seq_file *m, void *data) enum intel_display_power_domain power_domain; power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, - power_domain)) { + pref = intel_display_power_get_if_enabled(dev_priv, + power_domain); + if (!pref) { seq_printf(m, "Pipe %c power disabled\n", pipe_name(pipe)); continue; @@ -706,17 +714,17 @@ static int i915_interrupt_info(struct seq_file *m, void *data) pipe_name(pipe), I915_READ(PIPESTAT(pipe))); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, pref); } - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + pref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); seq_printf(m, "Port hotplug:\t%08x\n", I915_READ(PORT_HOTPLUG_EN)); seq_printf(m, "DPFLIPSTAT:\t%08x\n", I915_READ(VLV_DPFLIPSTAT)); seq_printf(m, "DPINVGTT:\t%08x\n", I915_READ(DPINVGTT)); - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, pref); for (i = 0; i < 4; i++) { seq_printf(m, "GT Interrupt IMR %d:\t%08x\n", @@ -779,10 +787,12 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(VLV_IMR)); for_each_pipe(dev_priv, pipe) { enum intel_display_power_domain power_domain; + intel_wakeref_t pref; power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, - power_domain)) { + pref = intel_display_power_get_if_enabled(dev_priv, + power_domain); + if (!pref) { seq_printf(m, "Pipe %c power disabled\n", pipe_name(pipe)); continue; @@ -791,7 +801,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) seq_printf(m, "Pipe %c stat:\t%08x\n", pipe_name(pipe), I915_READ(PIPESTAT(pipe))); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, pref); } seq_printf(m, "Master IER:\t%08x\n", @@ -878,7 +888,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) } } - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -951,10 +961,11 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; struct i915_gpu_state *gpu; + intel_wakeref_t wakeref; - intel_runtime_pm_get(i915); - gpu = i915_capture_gpu_state(i915); - intel_runtime_pm_put(i915); + gpu = NULL; + with_intel_runtime_pm(i915, wakeref) + gpu = i915_capture_gpu_state(i915); if (IS_ERR(gpu)) return PTR_ERR(gpu); @@ -1013,9 +1024,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_rps *rps = &dev_priv->gt_pm.rps; + intel_wakeref_t wakeref; int ret = 0; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); if (IS_GEN(dev_priv, 5)) { u16 rgvswctl = I915_READ16(MEMSWCTL); @@ -1227,7 +1239,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq); seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return ret; } @@ -1266,14 +1278,13 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) u64 acthd[I915_NUM_ENGINES]; u32 seqno[I915_NUM_ENGINES]; struct intel_instdone instdone; + intel_wakeref_t wakeref; enum intel_engine_id id; if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) seq_puts(m, "Wedged\n"); if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) seq_puts(m, "Reset in progress: struct_mutex backoff\n"); - if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags)) - seq_puts(m, "Reset in progress: reset handoff to waiter\n"); if (waitqueue_active(&dev_priv->gpu_error.wait_queue)) seq_puts(m, "Waiter holding struct mutex\n"); if (waitqueue_active(&dev_priv->gpu_error.reset_queue)) @@ -1284,17 +1295,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) return 0; } - intel_runtime_pm_get(dev_priv); + with_intel_runtime_pm(dev_priv, wakeref) { + for_each_engine(engine, dev_priv, id) { + acthd[id] = intel_engine_get_active_head(engine); + seqno[id] = intel_engine_get_seqno(engine); + } - for_each_engine(engine, dev_priv, id) { - acthd[id] = intel_engine_get_active_head(engine); - seqno[id] = intel_engine_get_seqno(engine); + intel_engine_get_instdone(dev_priv->engine[RCS], &instdone); } - intel_engine_get_instdone(dev_priv->engine[RCS], &instdone); - - intel_runtime_pm_put(dev_priv); - if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer)) seq_printf(m, "Hangcheck active, timer fires in %dms\n", jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires - @@ -1307,37 +1316,16 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); for_each_engine(engine, dev_priv, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct rb_node *rb; - seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x]\n", + seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine)); - seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n", - yesno(intel_engine_has_waiter(engine)), - yesno(test_bit(engine->id, - &dev_priv->gpu_error.missed_irq_rings)), - yesno(engine->hangcheck.stalled), - yesno(engine->hangcheck.wedged)); - - spin_lock_irq(&b->rb_lock); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - seq_printf(m, "\t%s [%d] waiting for %x\n", - w->tsk->comm, w->tsk->pid, w->seqno); - } - spin_unlock_irq(&b->rb_lock); + intel_engine_last_submit(engine), + jiffies_to_msecs(jiffies - + engine->hangcheck.action_timestamp)); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, (long long)acthd[id]); - seq_printf(m, "\taction = %s(%d) %d ms ago\n", - hangcheck_action_to_str(engine->hangcheck.action), - engine->hangcheck.action, - jiffies_to_msecs(jiffies - - engine->hangcheck.action_timestamp)); if (engine->id == RCS) { seq_puts(m, "\tinstdone read =\n"); @@ -1569,18 +1557,17 @@ static int gen6_drpc_info(struct seq_file *m) static int i915_drpc_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - int err; - - intel_runtime_pm_get(dev_priv); - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - err = vlv_drpc_info(m); - else if (INTEL_GEN(dev_priv) >= 6) - err = gen6_drpc_info(m); - else - err = ironlake_drpc_info(m); - - intel_runtime_pm_put(dev_priv); + intel_wakeref_t wakeref; + int err = -ENODEV; + + with_intel_runtime_pm(dev_priv, wakeref) { + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + err = vlv_drpc_info(m); + else if (INTEL_GEN(dev_priv) >= 6) + err = gen6_drpc_info(m); + else + err = ironlake_drpc_info(m); + } return err; } @@ -1602,11 +1589,12 @@ static int i915_fbc_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_fbc *fbc = &dev_priv->fbc; + intel_wakeref_t wakeref; if (!HAS_FBC(dev_priv)) return -ENODEV; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&fbc->lock); if (intel_fbc_is_active(dev_priv)) @@ -1633,7 +1621,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused) } mutex_unlock(&fbc->lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -1678,11 +1666,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_false_color_fops, static int i915_ips_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; if (!HAS_IPS(dev_priv)) return -ENODEV; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); seq_printf(m, "Enabled by kernel parameter: %s\n", yesno(i915_modparams.enable_ips)); @@ -1696,7 +1685,7 @@ static int i915_ips_status(struct seq_file *m, void *unused) seq_puts(m, "Currently: disabled\n"); } - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -1704,10 +1693,10 @@ static int i915_ips_status(struct seq_file *m, void *unused) static int i915_sr_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; bool sr_enabled = false; - intel_runtime_pm_get(dev_priv); - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); if (INTEL_GEN(dev_priv) >= 9) /* no global SR status; inspect per-plane WM */; @@ -1723,8 +1712,7 @@ static int i915_sr_status(struct seq_file *m, void *unused) else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - intel_runtime_pm_put(dev_priv); + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref); seq_printf(m, "self-refresh: %s\n", enableddisabled(sr_enabled)); @@ -1733,31 +1721,24 @@ static int i915_sr_status(struct seq_file *m, void *unused) static int i915_emon_status(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - unsigned long temp, chipset, gfx; - int ret; + struct drm_i915_private *i915 = node_to_i915(m->private); + intel_wakeref_t wakeref; - if (!IS_GEN(dev_priv, 5)) + if (!IS_GEN(i915, 5)) return -ENODEV; - intel_runtime_pm_get(dev_priv); - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - temp = i915_mch_val(dev_priv); - chipset = i915_chipset_val(dev_priv); - gfx = i915_gfx_val(dev_priv); - mutex_unlock(&dev->struct_mutex); + with_intel_runtime_pm(i915, wakeref) { + unsigned long temp, chipset, gfx; - seq_printf(m, "GMCH temp: %ld\n", temp); - seq_printf(m, "Chipset power: %ld\n", chipset); - seq_printf(m, "GFX power: %ld\n", gfx); - seq_printf(m, "Total power: %ld\n", chipset + gfx); + temp = i915_mch_val(i915); + chipset = i915_chipset_val(i915); + gfx = i915_gfx_val(i915); - intel_runtime_pm_put(dev_priv); + seq_printf(m, "GMCH temp: %ld\n", temp); + seq_printf(m, "Chipset power: %ld\n", chipset); + seq_printf(m, "GFX power: %ld\n", gfx); + seq_printf(m, "Total power: %ld\n", chipset + gfx); + } return 0; } @@ -1767,13 +1748,14 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_rps *rps = &dev_priv->gt_pm.rps; unsigned int max_gpu_freq, min_gpu_freq; + intel_wakeref_t wakeref; int gpu_freq, ia_freq; int ret; if (!HAS_LLC(dev_priv)) return -ENODEV; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) @@ -1806,7 +1788,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) mutex_unlock(&dev_priv->pcu_lock); out: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return ret; } @@ -1979,8 +1961,9 @@ static const char *swizzle_string(unsigned swizzle) static int i915_swizzle_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); seq_printf(m, "bit6 swizzle for X-tiling = %s\n", swizzle_string(dev_priv->mm.bit_6_swizzle_x)); @@ -2018,23 +2001,11 @@ static int i915_swizzle_info(struct seq_file *m, void *data) if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) seq_puts(m, "L-shaped memory detected\n"); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } -static int count_irq_waiters(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int count = 0; - - for_each_engine(engine, i915, id) - count += intel_engine_has_waiter(engine); - - return count; -} - static const char *rps_power_to_str(unsigned int power) { static const char * const strings[] = { @@ -2055,9 +2026,10 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) struct drm_device *dev = &dev_priv->drm; struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 act_freq = rps->cur_freq; + intel_wakeref_t wakeref; struct drm_file *file; - if (intel_runtime_pm_get_if_in_use(dev_priv)) { + with_intel_runtime_pm_if_in_use(dev_priv, wakeref) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); act_freq = vlv_punit_read(dev_priv, @@ -2068,13 +2040,11 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) act_freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); } - intel_runtime_pm_put(dev_priv); } seq_printf(m, "RPS enabled? %d\n", rps->enabled); seq_printf(m, "GPU busy? %s [%d requests]\n", yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); - seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); @@ -2151,6 +2121,7 @@ static int i915_llc(struct seq_file *m, void *data) static int i915_huc_load_status_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; struct drm_printer p; if (!HAS_HUC(dev_priv)) @@ -2159,9 +2130,8 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) p = drm_seq_file_printer(m); intel_uc_fw_dump(&dev_priv->huc.fw, &p); - intel_runtime_pm_get(dev_priv); - seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); - intel_runtime_pm_put(dev_priv); + with_intel_runtime_pm(dev_priv, wakeref) + seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); return 0; } @@ -2169,8 +2139,8 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) static int i915_guc_load_status_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; struct drm_printer p; - u32 tmp, i; if (!HAS_GUC(dev_priv)) return -ENODEV; @@ -2178,22 +2148,23 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) p = drm_seq_file_printer(m); intel_uc_fw_dump(&dev_priv->guc.fw, &p); - intel_runtime_pm_get(dev_priv); - - tmp = I915_READ(GUC_STATUS); - - seq_printf(m, "\nGuC status 0x%08x:\n", tmp); - seq_printf(m, "\tBootrom status = 0x%x\n", - (tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT); - seq_printf(m, "\tuKernel status = 0x%x\n", - (tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT); - seq_printf(m, "\tMIA Core status = 0x%x\n", - (tmp & GS_MIA_MASK) >> GS_MIA_SHIFT); - seq_puts(m, "\nScratch registers:\n"); - for (i = 0; i < 16; i++) - seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i))); - - intel_runtime_pm_put(dev_priv); + with_intel_runtime_pm(dev_priv, wakeref) { + u32 tmp = I915_READ(GUC_STATUS); + u32 i; + + seq_printf(m, "\nGuC status 0x%08x:\n", tmp); + seq_printf(m, "\tBootrom status = 0x%x\n", + (tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT); + seq_printf(m, "\tuKernel status = 0x%x\n", + (tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT); + seq_printf(m, "\tMIA Core status = 0x%x\n", + (tmp & GS_MIA_MASK) >> GS_MIA_SHIFT); + seq_puts(m, "\nScratch registers:\n"); + for (i = 0; i < 16; i++) { + seq_printf(m, "\t%2d: \t0x%x\n", + i, I915_READ(SOFT_SCRATCH(i))); + } + } return 0; } @@ -2245,7 +2216,7 @@ static void i915_guc_client_info(struct seq_file *m, { struct intel_engine_cs *engine; enum intel_engine_id id; - uint64_t tot = 0; + u64 tot = 0; seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n", client->priority, client->stage_id, client->proc_desc_offset); @@ -2500,7 +2471,8 @@ DEFINE_SHOW_ATTRIBUTE(i915_psr_sink_status); static void psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) { - u32 val, psr_status; + u32 val, status_val; + const char *status = "unknown"; if (dev_priv->psr.psr2_enabled) { static const char * const live_status[] = { @@ -2516,14 +2488,11 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) "BUF_ON", "TG_ON" }; - psr_status = I915_READ(EDP_PSR2_STATUS); - val = (psr_status & EDP_PSR2_STATUS_STATE_MASK) >> - EDP_PSR2_STATUS_STATE_SHIFT; - if (val < ARRAY_SIZE(live_status)) { - seq_printf(m, "Source PSR status: 0x%x [%s]\n", - psr_status, live_status[val]); - return; - } + val = I915_READ(EDP_PSR2_STATUS); + status_val = (val & EDP_PSR2_STATUS_STATE_MASK) >> + EDP_PSR2_STATUS_STATE_SHIFT; + if (status_val < ARRAY_SIZE(live_status)) + status = live_status[status_val]; } else { static const char * const live_status[] = { "IDLE", @@ -2535,74 +2504,102 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) "SRDOFFACK", "SRDENT_ON", }; - psr_status = I915_READ(EDP_PSR_STATUS); - val = (psr_status & EDP_PSR_STATUS_STATE_MASK) >> - EDP_PSR_STATUS_STATE_SHIFT; - if (val < ARRAY_SIZE(live_status)) { - seq_printf(m, "Source PSR status: 0x%x [%s]\n", - psr_status, live_status[val]); - return; - } + val = I915_READ(EDP_PSR_STATUS); + status_val = (val & EDP_PSR_STATUS_STATE_MASK) >> + EDP_PSR_STATUS_STATE_SHIFT; + if (status_val < ARRAY_SIZE(live_status)) + status = live_status[status_val]; } - seq_printf(m, "Source PSR status: 0x%x [%s]\n", psr_status, "unknown"); + seq_printf(m, "Source PSR status: %s [0x%08x]\n", status, val); } static int i915_edp_psr_status(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - u32 psrperf = 0; - bool enabled = false; - bool sink_support; + struct i915_psr *psr = &dev_priv->psr; + intel_wakeref_t wakeref; + const char *status; + bool enabled; + u32 val; if (!HAS_PSR(dev_priv)) return -ENODEV; - sink_support = dev_priv->psr.sink_support; - seq_printf(m, "Sink_Support: %s\n", yesno(sink_support)); - if (!sink_support) - return 0; + seq_printf(m, "Sink support: %s", yesno(psr->sink_support)); + if (psr->dp) + seq_printf(m, " [0x%02x]", psr->dp->psr_dpcd[0]); + seq_puts(m, "\n"); - intel_runtime_pm_get(dev_priv); + if (!psr->sink_support) + return 0; - mutex_lock(&dev_priv->psr.lock); - seq_printf(m, "PSR mode: %s\n", - dev_priv->psr.psr2_enabled ? "PSR2" : "PSR1"); - seq_printf(m, "Enabled: %s\n", yesno(dev_priv->psr.enabled)); - seq_printf(m, "Busy frontbuffer bits: 0x%03x\n", - dev_priv->psr.busy_frontbuffer_bits); + wakeref = intel_runtime_pm_get(dev_priv); + mutex_lock(&psr->lock); - if (dev_priv->psr.psr2_enabled) - enabled = I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE; + if (psr->enabled) + status = psr->psr2_enabled ? "PSR2 enabled" : "PSR1 enabled"; else - enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE; + status = "disabled"; + seq_printf(m, "PSR mode: %s\n", status); - seq_printf(m, "Main link in standby mode: %s\n", - yesno(dev_priv->psr.link_standby)); + if (!psr->enabled) + goto unlock; - seq_printf(m, "HW Enabled & Active bit: %s\n", yesno(enabled)); + if (psr->psr2_enabled) { + val = I915_READ(EDP_PSR2_CTL); + enabled = val & EDP_PSR2_ENABLE; + } else { + val = I915_READ(EDP_PSR_CTL); + enabled = val & EDP_PSR_ENABLE; + } + seq_printf(m, "Source PSR ctl: %s [0x%08x]\n", + enableddisabled(enabled), val); + psr_source_status(dev_priv, m); + seq_printf(m, "Busy frontbuffer bits: 0x%08x\n", + psr->busy_frontbuffer_bits); /* * SKL+ Perf counter is reset to 0 everytime DC state is entered */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - psrperf = I915_READ(EDP_PSR_PERF_CNT) & - EDP_PSR_PERF_CNT_MASK; + val = I915_READ(EDP_PSR_PERF_CNT) & EDP_PSR_PERF_CNT_MASK; + seq_printf(m, "Performance counter: %u\n", val); + } - seq_printf(m, "Performance_Counter: %u\n", psrperf); + if (psr->debug & I915_PSR_DEBUG_IRQ) { + seq_printf(m, "Last attempted entry at: %lld\n", + psr->last_entry_attempt); + seq_printf(m, "Last exit at: %lld\n", psr->last_exit); } - psr_source_status(dev_priv, m); - mutex_unlock(&dev_priv->psr.lock); + if (psr->psr2_enabled) { + u32 su_frames_val[3]; + int frame; - if (READ_ONCE(dev_priv->psr.debug) & I915_PSR_DEBUG_IRQ) { - seq_printf(m, "Last attempted entry at: %lld\n", - dev_priv->psr.last_entry_attempt); - seq_printf(m, "Last exit at: %lld\n", - dev_priv->psr.last_exit); + /* + * Reading all 3 registers before hand to minimize crossing a + * frame boundary between register reads + */ + for (frame = 0; frame < PSR2_SU_STATUS_FRAMES; frame += 3) + su_frames_val[frame / 3] = I915_READ(PSR2_SU_STATUS(frame)); + + seq_puts(m, "Frame:\tPSR2 SU blocks:\n"); + + for (frame = 0; frame < PSR2_SU_STATUS_FRAMES; frame++) { + u32 su_blocks; + + su_blocks = su_frames_val[frame / 3] & + PSR2_SU_STATUS_MASK(frame); + su_blocks = su_blocks >> PSR2_SU_STATUS_SHIFT(frame); + seq_printf(m, "%d\t%d\n", frame, su_blocks); + } } - intel_runtime_pm_put(dev_priv); +unlock: + mutex_unlock(&psr->lock); + intel_runtime_pm_put(dev_priv, wakeref); + return 0; } @@ -2611,6 +2608,7 @@ i915_edp_psr_debug_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; struct drm_modeset_acquire_ctx ctx; + intel_wakeref_t wakeref; int ret; if (!CAN_PSR(dev_priv)) @@ -2618,7 +2616,7 @@ i915_edp_psr_debug_set(void *data, u64 val) DRM_DEBUG_KMS("Setting PSR debug to %llx\n", val); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE); @@ -2633,7 +2631,7 @@ retry: drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return ret; } @@ -2658,24 +2656,20 @@ static int i915_energy_uJ(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); unsigned long long power; + intel_wakeref_t wakeref; u32 units; if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - intel_runtime_pm_get(dev_priv); - - if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) { - intel_runtime_pm_put(dev_priv); + if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) return -ENODEV; - } units = (power & 0x1f00) >> 8; - power = I915_READ(MCH_SECP_NRG_STTS); - power = (1000000 * power) >> units; /* convert to uJ */ - - intel_runtime_pm_put(dev_priv); + with_intel_runtime_pm(dev_priv, wakeref) + power = I915_READ(MCH_SECP_NRG_STTS); + power = (1000000 * power) >> units; /* convert to uJ */ seq_printf(m, "%llu", power); return 0; @@ -2689,6 +2683,9 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) if (!HAS_RUNTIME_PM(dev_priv)) seq_puts(m, "Runtime power management not supported\n"); + seq_printf(m, "Runtime power status: %s\n", + enableddisabled(!dev_priv->power_domains.wakeref)); + seq_printf(m, "GPU idle: %s (epoch %u)\n", yesno(!dev_priv->gt.awake), dev_priv->gt.epoch); seq_printf(m, "IRQs disabled: %s\n", @@ -2703,6 +2700,12 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) pci_power_name(pdev->current_state), pdev->current_state); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)) { + struct drm_printer p = drm_seq_file_printer(m); + + print_intel_runtime_pm_wakeref(dev_priv, &p); + } + return 0; } @@ -2737,6 +2740,7 @@ static int i915_power_domain_info(struct seq_file *m, void *unused) static int i915_dmc_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; struct intel_csr *csr; if (!HAS_CSR(dev_priv)) @@ -2744,7 +2748,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused) csr = &dev_priv->csr; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); seq_printf(m, "fw loaded: %s\n", yesno(csr->dmc_payload != NULL)); seq_printf(m, "path: %s\n", csr->fw_path); @@ -2770,7 +2774,7 @@ out: seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE)); seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL)); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -3053,8 +3057,10 @@ static int i915_display_info(struct seq_file *m, void *unused) struct intel_crtc *crtc; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(dev_priv); - intel_runtime_pm_get(dev_priv); seq_printf(m, "CRTC info\n"); seq_printf(m, "---------\n"); for_each_intel_crtc(dev, crtc) { @@ -3102,7 +3108,7 @@ static int i915_display_info(struct seq_file *m, void *unused) drm_connector_list_iter_end(&conn_iter); mutex_unlock(&dev->mode_config.mutex); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -3111,10 +3117,11 @@ static int i915_engine_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_engine_cs *engine; + intel_wakeref_t wakeref; enum intel_engine_id id; struct drm_printer p; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); seq_printf(m, "GT awake? %s (epoch %u)\n", yesno(dev_priv->gt.awake), dev_priv->gt.epoch); @@ -3127,7 +3134,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) for_each_engine(engine, dev_priv, id) intel_engine_dump(engine, &p, "%s\n", engine->name); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -3240,20 +3247,21 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf, { struct seq_file *m = file->private_data; struct drm_i915_private *dev_priv = m->private; - int ret; + intel_wakeref_t wakeref; bool enable; + int ret; ret = kstrtobool_from_user(ubuf, len, &enable); if (ret < 0) return ret; - intel_runtime_pm_get(dev_priv); - if (!dev_priv->ipc_enabled && enable) - DRM_INFO("Enabling IPC: WM will be proper only after next commit\n"); - dev_priv->wm.distrust_bios_wm = true; - dev_priv->ipc_enabled = enable; - intel_enable_ipc(dev_priv); - intel_runtime_pm_put(dev_priv); + with_intel_runtime_pm(dev_priv, wakeref) { + if (!dev_priv->ipc_enabled && enable) + DRM_INFO("Enabling IPC: WM will be proper only after next commit\n"); + dev_priv->wm.distrust_bios_wm = true; + dev_priv->ipc_enabled = enable; + intel_enable_ipc(dev_priv); + } return len; } @@ -3621,7 +3629,7 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data) } DEFINE_SHOW_ATTRIBUTE(i915_displayport_test_type); -static void wm_latency_show(struct seq_file *m, const uint16_t wm[8]) +static void wm_latency_show(struct seq_file *m, const u16 wm[8]) { struct drm_i915_private *dev_priv = m->private; struct drm_device *dev = &dev_priv->drm; @@ -3664,7 +3672,7 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8]) static int pri_wm_latency_show(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = m->private; - const uint16_t *latencies; + const u16 *latencies; if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; @@ -3679,7 +3687,7 @@ static int pri_wm_latency_show(struct seq_file *m, void *data) static int spr_wm_latency_show(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = m->private; - const uint16_t *latencies; + const u16 *latencies; if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; @@ -3694,7 +3702,7 @@ static int spr_wm_latency_show(struct seq_file *m, void *data) static int cur_wm_latency_show(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = m->private; - const uint16_t *latencies; + const u16 *latencies; if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; @@ -3720,7 +3728,7 @@ static int spr_wm_latency_open(struct inode *inode, struct file *file) { struct drm_i915_private *dev_priv = inode->i_private; - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) return -ENODEV; return single_open(file, spr_wm_latency_show, dev_priv); @@ -3730,19 +3738,19 @@ static int cur_wm_latency_open(struct inode *inode, struct file *file) { struct drm_i915_private *dev_priv = inode->i_private; - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) return -ENODEV; return single_open(file, cur_wm_latency_show, dev_priv); } static ssize_t wm_latency_write(struct file *file, const char __user *ubuf, - size_t len, loff_t *offp, uint16_t wm[8]) + size_t len, loff_t *offp, u16 wm[8]) { struct seq_file *m = file->private_data; struct drm_i915_private *dev_priv = m->private; struct drm_device *dev = &dev_priv->drm; - uint16_t new[8] = { 0 }; + u16 new[8] = { 0 }; int num_levels; int level; int ret; @@ -3787,7 +3795,7 @@ static ssize_t pri_wm_latency_write(struct file *file, const char __user *ubuf, { struct seq_file *m = file->private_data; struct drm_i915_private *dev_priv = m->private; - uint16_t *latencies; + u16 *latencies; if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; @@ -3802,7 +3810,7 @@ static ssize_t spr_wm_latency_write(struct file *file, const char __user *ubuf, { struct seq_file *m = file->private_data; struct drm_i915_private *dev_priv = m->private; - uint16_t *latencies; + u16 *latencies; if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; @@ -3817,7 +3825,7 @@ static ssize_t cur_wm_latency_write(struct file *file, const char __user *ubuf, { struct seq_file *m = file->private_data; struct drm_i915_private *dev_priv = m->private; - uint16_t *latencies; + u16 *latencies; if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; @@ -3868,8 +3876,6 @@ static int i915_wedged_set(void *data, u64 val) { struct drm_i915_private *i915 = data; - struct intel_engine_cs *engine; - unsigned int tmp; /* * There is no safeguard against this debugfs entry colliding @@ -3882,18 +3888,8 @@ i915_wedged_set(void *data, u64 val) if (i915_reset_backoff(&i915->gpu_error)) return -EAGAIN; - for_each_engine_masked(engine, i915, val, tmp) { - engine->hangcheck.seqno = intel_engine_get_seqno(engine); - engine->hangcheck.stalled = true; - } - i915_handle_error(i915, val, I915_ERROR_CAPTURE, "Manually set wedged engine mask = %llx", val); - - wait_on_bit(&i915->gpu_error.flags, - I915_RESET_HANDOFF, - TASK_UNINTERRUPTIBLE); - return 0; } @@ -3901,94 +3897,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, i915_wedged_get, i915_wedged_set, "%llu\n"); -static int -fault_irq_set(struct drm_i915_private *i915, - unsigned long *irq, - unsigned long val) -{ - int err; - - err = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (err) - return err; - - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unlock; - - *irq = val; - mutex_unlock(&i915->drm.struct_mutex); - - /* Flush idle worker to disarm irq */ - drain_delayed_work(&i915->gt.idle_work); - - return 0; - -err_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int -i915_ring_missed_irq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = dev_priv->gpu_error.missed_irq_rings; - return 0; -} - -static int -i915_ring_missed_irq_set(void *data, u64 val) -{ - struct drm_i915_private *i915 = data; - - return fault_irq_set(i915, &i915->gpu_error.missed_irq_rings, val); -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops, - i915_ring_missed_irq_get, i915_ring_missed_irq_set, - "0x%08llx\n"); - -static int -i915_ring_test_irq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = dev_priv->gpu_error.test_irq_rings; - - return 0; -} - -static int -i915_ring_test_irq_set(void *data, u64 val) -{ - struct drm_i915_private *i915 = data; - - /* GuC keeps the user interrupt permanently enabled for submission */ - if (USES_GUC_SUBMISSION(i915)) - return -ENODEV; - - /* - * From icl, we can no longer individually mask interrupt generation - * from each engine. - */ - if (INTEL_GEN(i915) >= 11) - return -ENODEV; - - val &= INTEL_INFO(i915)->ring_mask; - DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val); - - return fault_irq_set(i915, &i915->gpu_error.test_irq_rings, val); -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops, - i915_ring_test_irq_get, i915_ring_test_irq_set, - "0x%08llx\n"); - #define DROP_UNBOUND BIT(0) #define DROP_BOUND BIT(1) #define DROP_RETIRE BIT(2) @@ -4019,13 +3927,15 @@ static int i915_drop_caches_set(void *data, u64 val) { struct drm_i915_private *i915 = data; + intel_wakeref_t wakeref; int ret = 0; DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", val, val & DROP_ALL); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); - if (val & DROP_RESET_ACTIVE && !intel_engines_are_idle(i915)) + if (val & DROP_RESET_ACTIVE && + wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) i915_gem_set_wedged(i915); /* No need to check and wait for gpu resets, only libdrm auto-restarts @@ -4047,13 +3957,8 @@ i915_drop_caches_set(void *data, u64 val) mutex_unlock(&i915->drm.struct_mutex); } - if (val & DROP_RESET_ACTIVE && - i915_terminally_wedged(&i915->gpu_error)) { + if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error)) i915_handle_error(i915, ALL_ENGINES, 0, NULL); - wait_on_bit(&i915->gpu_error.flags, - I915_RESET_HANDOFF, - TASK_UNINTERRUPTIBLE); - } fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) @@ -4078,7 +3983,7 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_drain_freed_objects(i915); out: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); return ret; } @@ -4091,16 +3996,14 @@ static int i915_cache_sharing_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; - u32 snpcr; + intel_wakeref_t wakeref; + u32 snpcr = 0; if (!(IS_GEN_RANGE(dev_priv, 6, 7))) return -ENODEV; - intel_runtime_pm_get(dev_priv); - - snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); - - intel_runtime_pm_put(dev_priv); + with_intel_runtime_pm(dev_priv, wakeref) + snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); *val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT; @@ -4111,7 +4014,7 @@ static int i915_cache_sharing_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; - u32 snpcr; + intel_wakeref_t wakeref; if (!(IS_GEN_RANGE(dev_priv, 6, 7))) return -ENODEV; @@ -4119,16 +4022,17 @@ i915_cache_sharing_set(void *data, u64 val) if (val > 3) return -EINVAL; - intel_runtime_pm_get(dev_priv); DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val); + with_intel_runtime_pm(dev_priv, wakeref) { + u32 snpcr; + + /* Update the cache sharing policy here as well */ + snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); + snpcr &= ~GEN6_MBC_SNPCR_MASK; + snpcr |= val << GEN6_MBC_SNPCR_SHIFT; + I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); + } - /* Update the cache sharing policy here as well */ - snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); - snpcr &= ~GEN6_MBC_SNPCR_MASK; - snpcr |= (val << GEN6_MBC_SNPCR_SHIFT); - I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); - - intel_runtime_pm_put(dev_priv); return 0; } @@ -4350,6 +4254,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct sseu_dev_info sseu; + intel_wakeref_t wakeref; if (INTEL_GEN(dev_priv) < 8) return -ENODEV; @@ -4364,20 +4269,17 @@ static int i915_sseu_status(struct seq_file *m, void *unused) sseu.max_eus_per_subslice = RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice; - intel_runtime_pm_get(dev_priv); - - if (IS_CHERRYVIEW(dev_priv)) { - cherryview_sseu_device_status(dev_priv, &sseu); - } else if (IS_BROADWELL(dev_priv)) { - broadwell_sseu_device_status(dev_priv, &sseu); - } else if (IS_GEN(dev_priv, 9)) { - gen9_sseu_device_status(dev_priv, &sseu); - } else if (INTEL_GEN(dev_priv) >= 10) { - gen10_sseu_device_status(dev_priv, &sseu); + with_intel_runtime_pm(dev_priv, wakeref) { + if (IS_CHERRYVIEW(dev_priv)) + cherryview_sseu_device_status(dev_priv, &sseu); + else if (IS_BROADWELL(dev_priv)) + broadwell_sseu_device_status(dev_priv, &sseu); + else if (IS_GEN(dev_priv, 9)) + gen9_sseu_device_status(dev_priv, &sseu); + else if (INTEL_GEN(dev_priv) >= 10) + gen10_sseu_device_status(dev_priv, &sseu); } - intel_runtime_pm_put(dev_priv); - i915_print_sseu_info(m, false, &sseu); return 0; @@ -4390,7 +4292,7 @@ static int i915_forcewake_open(struct inode *inode, struct file *file) if (INTEL_GEN(i915) < 6) return 0; - intel_runtime_pm_get(i915); + file->private_data = (void *)(uintptr_t)intel_runtime_pm_get(i915); intel_uncore_forcewake_user_get(i915); return 0; @@ -4404,7 +4306,8 @@ static int i915_forcewake_release(struct inode *inode, struct file *file) return 0; intel_uncore_forcewake_user_put(i915); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, + (intel_wakeref_t)(uintptr_t)file->private_data); return 0; } @@ -4757,8 +4660,6 @@ static const struct i915_debugfs_files { } i915_debugfs_files[] = { {"i915_wedged", &i915_wedged_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops}, - {"i915_ring_missed_irq", &i915_ring_missed_irq_fops}, - {"i915_ring_test_irq", &i915_ring_test_irq_fops}, {"i915_gem_drop_caches", &i915_drop_caches_fops}, #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) {"i915_error_state", &i915_error_state_fops}, @@ -4837,7 +4738,7 @@ static int i915_dpcd_show(struct seq_file *m, void *data) struct drm_connector *connector = m->private; struct intel_dp *intel_dp = enc_to_intel_dp(&intel_attached_encoder(connector)->base); - uint8_t buf[16]; + u8 buf[16]; ssize_t err; int i; @@ -4952,9 +4853,8 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data) crtc_state = to_intel_crtc_state(crtc->state); seq_printf(m, "DSC_Enabled: %s\n", yesno(crtc_state->dsc_params.compression_enable)); - if (intel_dp->dsc_dpcd) - seq_printf(m, "DSC_Sink_Support: %s\n", - yesno(drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd))); + seq_printf(m, "DSC_Sink_Support: %s\n", + yesno(drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd))); if (!intel_dp_is_edp(intel_dp)) seq_printf(m, "FEC_Sink_Support: %s\n", yesno(drm_dp_sink_supports_fec(intel_dp->fec_capable))); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 16fd5ae06997..6630212f2faf 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -50,6 +50,7 @@ #include "i915_drv.h" #include "i915_trace.h" #include "i915_pmu.h" +#include "i915_reset.h" #include "i915_query.h" #include "i915_vgpu.h" #include "intel_drv.h" @@ -907,6 +908,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->pps_mutex); i915_memcpy_init_early(dev_priv); + intel_runtime_pm_init_early(dev_priv); ret = i915_workqueues_init(dev_priv); if (ret < 0) @@ -1781,6 +1783,9 @@ void i915_driver_unload(struct drm_device *dev) i915_driver_unregister(dev_priv); + /* Flush any external code that still may be under the RCU lock */ + synchronize_rcu(); + if (i915_gem_suspend(dev_priv)) DRM_ERROR("failed to idle hardware; continuing to unload!\n"); @@ -1809,8 +1814,7 @@ void i915_driver_unload(struct drm_device *dev) i915_driver_cleanup_mmio(dev_priv); enable_rpm_wakeref_asserts(dev_priv); - - WARN_ON(atomic_read(&dev_priv->runtime_pm.wakeref_count)); + intel_runtime_pm_cleanup(dev_priv); } static void i915_driver_release(struct drm_device *dev) @@ -2012,6 +2016,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) out: enable_rpm_wakeref_asserts(dev_priv); + if (!dev_priv->uncore.user_forcewake.count) + intel_runtime_pm_cleanup(dev_priv); return ret; } @@ -2202,211 +2208,6 @@ static int i915_resume_switcheroo(struct drm_device *dev) return i915_drm_resume(dev); } -/** - * i915_reset - reset chip after a hang - * @i915: #drm_i915_private to reset - * @stalled_mask: mask of the stalled engines with the guilty requests - * @reason: user error message for why we are resetting - * - * Reset the chip. Useful if a hang is detected. Marks the device as wedged - * on failure. - * - * Caller must hold the struct_mutex. - * - * Procedure is fairly simple: - * - reset the chip using the reset reg - * - re-init context state - * - re-init hardware status page - * - re-init ring buffer - * - re-init interrupt state - * - re-init display - */ -void i915_reset(struct drm_i915_private *i915, - unsigned int stalled_mask, - const char *reason) -{ - struct i915_gpu_error *error = &i915->gpu_error; - int ret; - int i; - - GEM_TRACE("flags=%lx\n", error->flags); - - might_sleep(); - lockdep_assert_held(&i915->drm.struct_mutex); - assert_rpm_wakelock_held(i915); - GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); - - if (!test_bit(I915_RESET_HANDOFF, &error->flags)) - return; - - /* Clear any previous failed attempts at recovery. Time to try again. */ - if (!i915_gem_unset_wedged(i915)) - goto wakeup; - - if (reason) - dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); - error->reset_count++; - - ret = i915_gem_reset_prepare(i915); - if (ret) { - dev_err(i915->drm.dev, "GPU recovery failed\n"); - goto taint; - } - - if (!intel_has_gpu_reset(i915)) { - if (i915_modparams.reset) - dev_err(i915->drm.dev, "GPU reset not supported\n"); - else - DRM_DEBUG_DRIVER("GPU reset disabled\n"); - goto error; - } - - for (i = 0; i < 3; i++) { - ret = intel_gpu_reset(i915, ALL_ENGINES); - if (ret == 0) - break; - - msleep(100); - } - if (ret) { - dev_err(i915->drm.dev, "Failed to reset chip\n"); - goto taint; - } - - /* Ok, now get things going again... */ - - /* - * Everything depends on having the GTT running, so we need to start - * there. - */ - ret = i915_ggtt_enable_hw(i915); - if (ret) { - DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n", - ret); - goto error; - } - - i915_gem_reset(i915, stalled_mask); - intel_overlay_reset(i915); - - /* - * Next we need to restore the context, but we don't use those - * yet either... - * - * Ring buffer needs to be re-initialized in the KMS case, or if X - * was running at the time of the reset (i.e. we weren't VT - * switched away). - */ - ret = i915_gem_init_hw(i915); - if (ret) { - DRM_ERROR("Failed to initialise HW following reset (%d)\n", - ret); - goto error; - } - - i915_queue_hangcheck(i915); - -finish: - i915_gem_reset_finish(i915); -wakeup: - clear_bit(I915_RESET_HANDOFF, &error->flags); - wake_up_bit(&error->flags, I915_RESET_HANDOFF); - return; - -taint: - /* - * History tells us that if we cannot reset the GPU now, we - * never will. This then impacts everything that is run - * subsequently. On failing the reset, we mark the driver - * as wedged, preventing further execution on the GPU. - * We also want to go one step further and add a taint to the - * kernel so that any subsequent faults can be traced back to - * this failure. This is important for CI, where if the - * GPU/driver fails we would like to reboot and restart testing - * rather than continue on into oblivion. For everyone else, - * the system should still plod along, but they have been warned! - */ - add_taint(TAINT_WARN, LOCKDEP_STILL_OK); -error: - i915_gem_set_wedged(i915); - i915_retire_requests(i915); - goto finish; -} - -static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine) -{ - return intel_gpu_reset(dev_priv, intel_engine_flag(engine)); -} - -/** - * i915_reset_engine - reset GPU engine to recover from a hang - * @engine: engine to reset - * @msg: reason for GPU reset; or NULL for no dev_notice() - * - * Reset a specific GPU engine. Useful if a hang is detected. - * Returns zero on successful reset or otherwise an error code. - * - * Procedure is: - * - identifies the request that caused the hang and it is dropped - * - reset engine (which will force the engine to idle) - * - re-init/configure engine - */ -int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) -{ - struct i915_gpu_error *error = &engine->i915->gpu_error; - struct i915_request *active_request; - int ret; - - GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); - GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); - - active_request = i915_gem_reset_prepare_engine(engine); - if (IS_ERR_OR_NULL(active_request)) { - /* Either the previous reset failed, or we pardon the reset. */ - ret = PTR_ERR(active_request); - goto out; - } - - if (msg) - dev_notice(engine->i915->drm.dev, - "Resetting %s for %s\n", engine->name, msg); - error->reset_engine_count[engine->id]++; - - if (!engine->i915->guc.execbuf_client) - ret = intel_gt_reset_engine(engine->i915, engine); - else - ret = intel_guc_reset_engine(&engine->i915->guc, engine); - if (ret) { - /* If we fail here, we expect to fallback to a global reset */ - DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", - engine->i915->guc.execbuf_client ? "GuC " : "", - engine->name, ret); - goto out; - } - - /* - * The request that caused the hang is stuck on elsp, we know the - * active request and can drop it, adjust head to skip the offending - * request to resume executing remaining requests in the queue. - */ - i915_gem_reset_engine(engine, active_request, true); - - /* - * The engine and its registers (and workarounds in case of render) - * have been reset to their default values. Follow the init_ring - * process to program RING_MODE, HWSP and re-enable submission. - */ - ret = engine->init_hw(engine); - if (ret) - goto out; - -out: - intel_engine_cancel_stop_cs(engine); - i915_gem_reset_finish_engine(engine); - return ret; -} - static int i915_pm_prepare(struct device *kdev) { struct pci_dev *pdev = to_pci_dev(kdev); @@ -2744,6 +2545,10 @@ static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv) static int vlv_wait_for_pw_status(struct drm_i915_private *dev_priv, u32 mask, u32 val) { + i915_reg_t reg = VLV_GTLC_PW_STATUS; + u32 reg_value; + int ret; + /* The HW does not like us polling for PW_STATUS frequently, so * use the sleeping loop rather than risk the busy spin within * intel_wait_for_register(). @@ -2751,8 +2556,12 @@ static int vlv_wait_for_pw_status(struct drm_i915_private *dev_priv, * Transitioning between RC6 states should be at most 2ms (see * valleyview_enable_rps) so use a 3ms timeout. */ - return wait_for((I915_READ_NOTRACE(VLV_GTLC_PW_STATUS) & mask) == val, - 3); + ret = wait_for(((reg_value = I915_READ_NOTRACE(reg)) & mask) == val, 3); + + /* just trace the final value */ + trace_i915_reg_rw(false, reg, reg_value, sizeof(reg_value), true); + + return ret; } int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on) @@ -2967,7 +2776,7 @@ static int intel_runtime_suspend(struct device *kdev) } enable_rpm_wakeref_asserts(dev_priv); - WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); + intel_runtime_pm_cleanup(dev_priv); if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv)) DRM_ERROR("Unclaimed access detected prior to suspending\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5df26ccda8a4..9adc7bb9e69c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -45,6 +45,7 @@ #include <linux/pm_qos.h> #include <linux/reservation.h> #include <linux/shmem_fs.h> +#include <linux/stackdepot.h> #include <drm/intel-gtt.h> #include <drm/drm_legacy.h> /* for struct drm_dma_handle */ @@ -90,8 +91,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20190110" -#define DRIVER_TIMESTAMP 1547162337 +#define DRIVER_DATE "20190207" +#define DRIVER_TIMESTAMP 1549572331 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -130,6 +131,8 @@ bool i915_error_injected(void); __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \ fmt, ##__VA_ARGS__) +typedef depot_stack_handle_t intel_wakeref_t; + enum hpd_pin { HPD_NONE = 0, HPD_TV = HPD_NONE, /* TV is known to be unreliable */ @@ -320,8 +323,20 @@ struct drm_i915_display_funcs { /* display clock increase/decrease */ /* pll clock increase/decrease */ - void (*load_csc_matrix)(struct intel_crtc_state *crtc_state); - void (*load_luts)(struct intel_crtc_state *crtc_state); + /* + * Program double buffered color management registers during + * vblank evasion. The registers should then latch during the + * next vblank start, alongside any other double buffered registers + * involved with the same commit. + */ + void (*color_commit)(const struct intel_crtc_state *crtc_state); + /* + * Load LUTs (and other single buffered color management + * registers). Will (hopefully) be called during the vblank + * following the latching of any double buffered registers + * involved with the same commit. + */ + void (*load_luts)(const struct intel_crtc_state *crtc_state); }; #define CSR_VERSION(major, minor) ((major) << 16 | (minor)) @@ -331,16 +346,17 @@ struct drm_i915_display_funcs { struct intel_csr { struct work_struct work; const char *fw_path; - uint32_t required_version; - uint32_t max_fw_size; /* bytes */ - uint32_t *dmc_payload; - uint32_t dmc_fw_size; /* dwords */ - uint32_t version; - uint32_t mmio_count; + u32 required_version; + u32 max_fw_size; /* bytes */ + u32 *dmc_payload; + u32 dmc_fw_size; /* dwords */ + u32 version; + u32 mmio_count; i915_reg_t mmioaddr[8]; - uint32_t mmiodata[8]; - uint32_t dc_state; - uint32_t allowed_dc_mask; + u32 mmiodata[8]; + u32 dc_state; + u32 allowed_dc_mask; + intel_wakeref_t wakeref; }; enum i915_cache_level { @@ -396,7 +412,7 @@ struct intel_fbc { struct { unsigned int mode_flags; - uint32_t hsw_bdw_pixel_rate; + u32 hsw_bdw_pixel_rate; } crtc; struct { @@ -415,7 +431,7 @@ struct intel_fbc { int y; - uint16_t pixel_blend_mode; + u16 pixel_blend_mode; } plane; struct { @@ -555,7 +571,7 @@ struct i915_suspend_saved_registers { u32 saveSWF0[16]; u32 saveSWF1[16]; u32 saveSWF3[3]; - uint64_t saveFENCE[I915_MAX_NUM_FENCES]; + u64 saveFENCE[I915_MAX_NUM_FENCES]; u32 savePCH_PORT_HOTPLUG; u16 saveGCDGMBUS; }; @@ -818,6 +834,8 @@ struct i915_power_domains { bool display_core_suspended; int power_well_count; + intel_wakeref_t wakeref; + struct mutex lock; int domain_use_count[POWER_DOMAIN_NUM]; struct i915_power_well *power_wells; @@ -900,9 +918,9 @@ struct i915_gem_mm { atomic_t bsd_engine_dispatch_index; /** Bit 6 swizzling required for X tiling */ - uint32_t bit_6_swizzle_x; + u32 bit_6_swizzle_x; /** Bit 6 swizzling required for Y tiling */ - uint32_t bit_6_swizzle_y; + u32 bit_6_swizzle_y; /* accounting, useful for userland debugging */ spinlock_t object_stat_lock; @@ -929,20 +947,20 @@ struct ddi_vbt_port_info { * populate this field. */ #define HDMI_LEVEL_SHIFT_UNKNOWN 0xff - uint8_t hdmi_level_shift; + u8 hdmi_level_shift; - uint8_t supports_dvi:1; - uint8_t supports_hdmi:1; - uint8_t supports_dp:1; - uint8_t supports_edp:1; - uint8_t supports_typec_usb:1; - uint8_t supports_tbt:1; + u8 supports_dvi:1; + u8 supports_hdmi:1; + u8 supports_dp:1; + u8 supports_edp:1; + u8 supports_typec_usb:1; + u8 supports_tbt:1; - uint8_t alternate_aux_channel; - uint8_t alternate_ddc_pin; + u8 alternate_aux_channel; + u8 alternate_ddc_pin; - uint8_t dp_boost_level; - uint8_t hdmi_boost_level; + u8 dp_boost_level; + u8 hdmi_boost_level; int dp_max_link_rate; /* 0 for not limited by VBT */ }; @@ -1033,41 +1051,41 @@ enum intel_ddb_partitioning { struct intel_wm_level { bool enable; - uint32_t pri_val; - uint32_t spr_val; - uint32_t cur_val; - uint32_t fbc_val; + u32 pri_val; + u32 spr_val; + u32 cur_val; + u32 fbc_val; }; struct ilk_wm_values { - uint32_t wm_pipe[3]; - uint32_t wm_lp[3]; - uint32_t wm_lp_spr[3]; - uint32_t wm_linetime[3]; + u32 wm_pipe[3]; + u32 wm_lp[3]; + u32 wm_lp_spr[3]; + u32 wm_linetime[3]; bool enable_fbc_wm; enum intel_ddb_partitioning partitioning; }; struct g4x_pipe_wm { - uint16_t plane[I915_MAX_PLANES]; - uint16_t fbc; + u16 plane[I915_MAX_PLANES]; + u16 fbc; }; struct g4x_sr_wm { - uint16_t plane; - uint16_t cursor; - uint16_t fbc; + u16 plane; + u16 cursor; + u16 fbc; }; struct vlv_wm_ddl_values { - uint8_t plane[I915_MAX_PLANES]; + u8 plane[I915_MAX_PLANES]; }; struct vlv_wm_values { struct g4x_pipe_wm pipe[3]; struct g4x_sr_wm sr; struct vlv_wm_ddl_values ddl[3]; - uint8_t level; + u8 level; bool cxsr; }; @@ -1081,10 +1099,10 @@ struct g4x_wm_values { }; struct skl_ddb_entry { - uint16_t start, end; /* in number of blocks, 'end' is exclusive */ + u16 start, end; /* in number of blocks, 'end' is exclusive */ }; -static inline uint16_t skl_ddb_entry_size(const struct skl_ddb_entry *entry) +static inline u16 skl_ddb_entry_size(const struct skl_ddb_entry *entry) { return entry->end - entry->start; } @@ -1108,8 +1126,9 @@ struct skl_ddb_values { }; struct skl_wm_level { - uint16_t plane_res_b; - uint8_t plane_res_l; + u16 min_ddb_alloc; + u16 plane_res_b; + u8 plane_res_l; bool plane_en; }; @@ -1118,15 +1137,15 @@ struct skl_wm_params { bool x_tiled, y_tiled; bool rc_surface; bool is_planar; - uint32_t width; - uint8_t cpp; - uint32_t plane_pixel_rate; - uint32_t y_min_scanlines; - uint32_t plane_bytes_per_line; + u32 width; + u8 cpp; + u32 plane_pixel_rate; + u32 y_min_scanlines; + u32 plane_bytes_per_line; uint_fixed_16_16_t plane_blocks_per_line; uint_fixed_16_16_t y_tile_minimum; - uint32_t linetime_us; - uint32_t dbuf_block_size; + u32 linetime_us; + u32 dbuf_block_size; }; /* @@ -1156,6 +1175,25 @@ struct i915_runtime_pm { atomic_t wakeref_count; bool suspended; bool irqs_enabled; + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) + /* + * To aide detection of wakeref leaks and general misuse, we + * track all wakeref holders. With manual markup (i.e. returning + * a cookie to each rpm_get caller which they then supply to their + * paired rpm_put) we can remove corresponding pairs of and keep + * the array trimmed to active wakerefs. + */ + struct intel_runtime_pm_debug { + spinlock_t lock; + + depot_stack_handle_t last_acquire; + depot_stack_handle_t last_release; + + depot_stack_handle_t *owners; + unsigned long count; + } debug; +#endif }; enum intel_pipe_crc_source { @@ -1312,6 +1350,12 @@ struct i915_perf_stream { struct list_head link; /** + * @wakeref: As we keep the device awake while the perf stream is + * active, we track our runtime pm reference for later release. + */ + intel_wakeref_t wakeref; + + /** * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` * properties given when opening a stream, representing the contents * of a single sample as read() by userspace. @@ -1484,14 +1528,14 @@ struct drm_i915_private { * Base address of where the gmbus and gpio blocks are located (either * on PCH or on SoC for platforms without PCH). */ - uint32_t gpio_mmio_base; + u32 gpio_mmio_base; /* MMIO base address for MIPI regs */ - uint32_t mipi_mmio_base; + u32 mipi_mmio_base; - uint32_t psr_mmio_base; + u32 psr_mmio_base; - uint32_t pps_mmio_base; + u32 pps_mmio_base; wait_queue_head_t gmbus_wait_queue; @@ -1746,17 +1790,17 @@ struct drm_i915_private { * in 0.5us units for WM1+. */ /* primary */ - uint16_t pri_latency[5]; + u16 pri_latency[5]; /* sprite */ - uint16_t spr_latency[5]; + u16 spr_latency[5]; /* cursor */ - uint16_t cur_latency[5]; + u16 cur_latency[5]; /* * Raw watermark memory latency values * for SKL for all 8 levels * in 1us units. */ - uint16_t skl_latency[8]; + u16 skl_latency[8]; /* current hardware state */ union { @@ -1766,7 +1810,7 @@ struct drm_i915_private { struct g4x_wm_values g4x; }; - uint8_t max_level; + u8 max_level; /* * Should be held around atomic WM register writing; also @@ -1944,7 +1988,14 @@ struct drm_i915_private { void (*resume)(struct drm_i915_private *); void (*cleanup_engine)(struct intel_engine_cs *engine); - struct list_head timelines; + struct i915_gt_timelines { + struct mutex mutex; /* protects list, tainted by GPU */ + struct list_head active_list; + + /* Pack multiple timelines' seqnos into the same page */ + spinlock_t hwsp_lock; + struct list_head hwsp_free_list; + } timelines; struct list_head active_rings; struct list_head closed_vma; @@ -1957,7 +2008,7 @@ struct drm_i915_private { * In order to reduce the effect on performance, there * is a slight delay before we do so. */ - bool awake; + intel_wakeref_t awake; /** * The number of times we have woken up. @@ -2314,6 +2365,8 @@ static inline unsigned int i915_sg_segment_size(void) INTEL_INFO(dev_priv)->gt == 3) #define IS_CNL_WITH_PORT_F(dev_priv) (IS_CANNONLAKE(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0x0004) == 0x0004) +#define IS_ICL_WITH_PORT_F(dev_priv) (IS_ICELAKE(dev_priv) && \ + INTEL_DEVID(dev_priv) != 0x8A51) #define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support) @@ -2449,7 +2502,7 @@ static inline unsigned int i915_sg_segment_size(void) #define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) #define HAS_FBC(dev_priv) (INTEL_INFO(dev_priv)->display.has_fbc) -#define HAS_CUR_FBC(dev_priv) (!HAS_GMCH_DISPLAY(dev_priv) && INTEL_GEN(dev_priv) >= 7) +#define HAS_CUR_FBC(dev_priv) (!HAS_GMCH(dev_priv) && INTEL_GEN(dev_priv) >= 7) #define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) @@ -2529,7 +2582,7 @@ static inline unsigned int i915_sg_segment_size(void) #define HAS_PCH_NOP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_NOP) #define HAS_PCH_SPLIT(dev_priv) (INTEL_PCH_TYPE(dev_priv) != PCH_NONE) -#define HAS_GMCH_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->display.has_gmch_display) +#define HAS_GMCH(dev_priv) (INTEL_INFO(dev_priv)->display.has_gmch) #define HAS_LSPCON(dev_priv) (INTEL_GEN(dev_priv) >= 9) @@ -2584,19 +2637,7 @@ extern const struct dev_pm_ops i915_pm_ops; extern int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent); extern void i915_driver_unload(struct drm_device *dev); -extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask); -extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); - -extern void i915_reset(struct drm_i915_private *i915, - unsigned int stalled_mask, - const char *reason); -extern int i915_reset_engine(struct intel_engine_cs *engine, - const char *reason); - -extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv); -extern int intel_reset_guc(struct drm_i915_private *dev_priv); -extern int intel_guc_reset_engine(struct intel_guc *guc, - struct intel_engine_cs *engine); + extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); extern void intel_hangcheck_init(struct drm_i915_private *dev_priv); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); @@ -2639,20 +2680,11 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) &dev_priv->gpu_error.hangcheck_work, delay); } -__printf(4, 5) -void i915_handle_error(struct drm_i915_private *dev_priv, - u32 engine_mask, - unsigned long flags, - const char *fmt, ...); -#define I915_ERROR_CAPTURE BIT(0) - extern void intel_irq_init(struct drm_i915_private *dev_priv); extern void intel_irq_fini(struct drm_i915_private *dev_priv); int intel_irq_install(struct drm_i915_private *dev_priv); void intel_irq_uninstall(struct drm_i915_private *dev_priv); -void i915_clear_error_registers(struct drm_i915_private *dev_priv); - static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) { return dev_priv->gvt; @@ -2676,45 +2708,45 @@ i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv); void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv); void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv, - uint32_t mask, - uint32_t bits); + u32 mask, + u32 bits); void ilk_update_display_irq(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask); + u32 interrupt_mask, + u32 enabled_irq_mask); static inline void -ilk_enable_display_irq(struct drm_i915_private *dev_priv, uint32_t bits) +ilk_enable_display_irq(struct drm_i915_private *dev_priv, u32 bits) { ilk_update_display_irq(dev_priv, bits, bits); } static inline void -ilk_disable_display_irq(struct drm_i915_private *dev_priv, uint32_t bits) +ilk_disable_display_irq(struct drm_i915_private *dev_priv, u32 bits) { ilk_update_display_irq(dev_priv, bits, 0); } void bdw_update_pipe_irq(struct drm_i915_private *dev_priv, enum pipe pipe, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask); + u32 interrupt_mask, + u32 enabled_irq_mask); static inline void bdw_enable_pipe_irq(struct drm_i915_private *dev_priv, - enum pipe pipe, uint32_t bits) + enum pipe pipe, u32 bits) { bdw_update_pipe_irq(dev_priv, pipe, bits, bits); } static inline void bdw_disable_pipe_irq(struct drm_i915_private *dev_priv, - enum pipe pipe, uint32_t bits) + enum pipe pipe, u32 bits) { bdw_update_pipe_irq(dev_priv, pipe, bits, 0); } void ibx_display_interrupt_update(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask); + u32 interrupt_mask, + u32 enabled_irq_mask); static inline void -ibx_enable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits) +ibx_enable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits) { ibx_display_interrupt_update(dev_priv, bits, bits); } static inline void -ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits) +ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits) { ibx_display_interrupt_update(dev_priv, bits, 0); } @@ -2904,8 +2936,8 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */ }; -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, - enum i915_mm_subclass subclass); +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, + enum i915_mm_subclass subclass); void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj); enum i915_map_type { @@ -2974,7 +3006,7 @@ int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev, - uint32_t handle, uint64_t *offset); + u32 handle, u64 *offset); int i915_gem_mmap_gtt_version(void); void i915_gem_track_fb(struct drm_i915_gem_object *old, @@ -2991,11 +3023,6 @@ static inline bool i915_reset_backoff(struct i915_gpu_error *error) return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags)); } -static inline bool i915_reset_handoff(struct i915_gpu_error *error) -{ - return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags)); -} - static inline bool i915_terminally_wedged(struct i915_gpu_error *error) { return unlikely(test_bit(I915_WEDGED, &error->flags)); @@ -3017,18 +3044,8 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, return READ_ONCE(error->reset_engine_count[engine->id]); } -struct i915_request * -i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); -int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); -void i915_gem_reset(struct drm_i915_private *dev_priv, - unsigned int stalled_mask); -void i915_gem_reset_finish_engine(struct intel_engine_cs *engine); -void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv); -void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct i915_request *request, - bool stalled); void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); @@ -3125,7 +3142,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file); void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct i915_gem_context *ctx, - uint32_t *reg_state); + u32 *reg_state); /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, @@ -3300,6 +3317,20 @@ mkwrite_device_info(struct drm_i915_private *dev_priv) return (struct intel_device_info *)INTEL_INFO(dev_priv); } +static inline struct intel_sseu +intel_device_default_sseu(struct drm_i915_private *i915) +{ + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + struct intel_sseu value = { + .slice_mask = sseu->slice_mask, + .subslice_mask = sseu->subslice_mask[0], + .min_eus_per_subslice = sseu->max_eus_per_subslice, + .max_eus_per_subslice = sseu->max_eus_per_subslice, + }; + + return value; +} + /* modesetting */ extern void intel_modeset_init_hw(struct drm_device *dev); extern int intel_modeset_init(struct drm_device *dev); @@ -3377,10 +3408,10 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, enum dpio_phy phy); bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, enum dpio_phy phy); -uint8_t bxt_ddi_phy_calc_lane_lat_optim_mask(uint8_t lane_count); +u8 bxt_ddi_phy_calc_lane_lat_optim_mask(u8 lane_count); void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, - uint8_t lane_lat_optim_mask); -uint8_t bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder); + u8 lane_lat_optim_mask); +u8 bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder); void chv_set_phy_signal_level(struct intel_encoder *encoder, u32 deemph_reg_value, u32 margin_reg_value, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 04aef3f64c45..6728ea5c71d4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -28,15 +28,6 @@ #include <drm/drm_vma_manager.h> #include <drm/drm_pci.h> #include <drm/i915_drm.h> -#include "i915_drv.h" -#include "i915_gem_clflush.h" -#include "i915_vgpu.h" -#include "i915_trace.h" -#include "intel_drv.h" -#include "intel_frontbuffer.h" -#include "intel_mocs.h" -#include "intel_workarounds.h" -#include "i915_gemfs.h" #include <linux/dma-fence-array.h> #include <linux/kthread.h> #include <linux/reservation.h> @@ -48,6 +39,18 @@ #include <linux/dma-buf.h> #include <linux/mman.h> +#include "i915_drv.h" +#include "i915_gem_clflush.h" +#include "i915_gemfs.h" +#include "i915_reset.h" +#include "i915_trace.h" +#include "i915_vgpu.h" + +#include "intel_drv.h" +#include "intel_frontbuffer.h" +#include "intel_mocs.h" +#include "intel_workarounds.h" + static void i915_gem_flush_free_objects(struct drm_i915_private *i915); static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) @@ -140,6 +143,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) static u32 __i915_gem_park(struct drm_i915_private *i915) { + intel_wakeref_t wakeref; + GEM_TRACE("\n"); lockdep_assert_held(&i915->drm.struct_mutex); @@ -170,14 +175,13 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) i915_pmu_gt_parked(i915); i915_vma_parked(i915); - i915->gt.awake = false; + wakeref = fetch_and_zero(&i915->gt.awake); + GEM_BUG_ON(!wakeref); if (INTEL_GEN(i915) >= 6) gen6_rps_idle(i915); - intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); - - intel_runtime_pm_put(i915); + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); return i915->gt.epoch; } @@ -202,12 +206,11 @@ void i915_gem_unpark(struct drm_i915_private *i915) lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!i915->gt.active_requests); + assert_rpm_wakelock_held(i915); if (i915->gt.awake) return; - intel_runtime_pm_get_noresume(i915); - /* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during @@ -219,9 +222,9 @@ void i915_gem_unpark(struct drm_i915_private *i915) * Work around it by grabbing a GT IRQ power domain whilst there is any * GT activity, preventing any DC state transitions. */ - intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + GEM_BUG_ON(!i915->gt.awake); - i915->gt.awake = true; if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ i915->gt.epoch = 1; @@ -244,21 +247,19 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; struct drm_i915_gem_get_aperture *args = data; struct i915_vma *vma; u64 pinned; + mutex_lock(&ggtt->vm.mutex); + pinned = ggtt->vm.reserved; - mutex_lock(&dev->struct_mutex); - list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) + list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) if (i915_vma_is_pinned(vma)) pinned += vma->node.size; - list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) - if (i915_vma_is_pinned(vma)) - pinned += vma->node.size; - mutex_unlock(&dev->struct_mutex); + + mutex_unlock(&ggtt->vm.mutex); args->aper_size = ggtt->vm.total; args->aper_available_size = args->aper_size - pinned; @@ -438,15 +439,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) if (ret) return ret; - while ((vma = list_first_entry_or_null(&obj->vma_list, - struct i915_vma, - obj_link))) { + spin_lock(&obj->vma.lock); + while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { list_move_tail(&vma->obj_link, &still_in_list); + spin_unlock(&obj->vma.lock); + ret = i915_vma_unbind(vma); - if (ret) - break; + + spin_lock(&obj->vma.lock); } - list_splice(&still_in_list, &obj->vma_list); + list_splice(&still_in_list, &obj->vma.list); + spin_unlock(&obj->vma.lock); return ret; } @@ -656,11 +661,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj, struct intel_rps_client *rps_client) { might_sleep(); -#if IS_ENABLED(CONFIG_LOCKDEP) - GEM_BUG_ON(debug_locks && - !!lockdep_is_held(&obj->base.dev->struct_mutex) != - !!(flags & I915_WAIT_LOCKED)); -#endif GEM_BUG_ON(timeout < 0); timeout = i915_gem_object_wait_reservation(obj->resv, @@ -712,8 +712,8 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) static int i915_gem_create(struct drm_file *file, struct drm_i915_private *dev_priv, - uint64_t size, - uint32_t *handle_p) + u64 size, + u32 *handle_p) { struct drm_i915_gem_object *obj; int ret; @@ -784,6 +784,8 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) { + intel_wakeref_t wakeref; + /* * No actual flushing is required for the GTT write domain for reads * from the GTT domain. Writes to it "immediately" go to main memory @@ -810,13 +812,13 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) i915_gem_chipset_flush(dev_priv); - intel_runtime_pm_get(dev_priv); - spin_lock_irq(&dev_priv->uncore.lock); + with_intel_runtime_pm(dev_priv, wakeref) { + spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); + POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); - spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv); + spin_unlock_irq(&dev_priv->uncore.lock); + } } static void @@ -1068,6 +1070,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt *ggtt = &i915->ggtt; + intel_wakeref_t wakeref; struct drm_mm_node node; struct i915_vma *vma; void __user *user_data; @@ -1078,7 +1081,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, if (ret) return ret; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | PIN_NONFAULT | @@ -1151,7 +1154,7 @@ out_unpin: i915_vma_unpin(vma); } out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return ret; @@ -1252,6 +1255,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt *ggtt = &i915->ggtt; + intel_wakeref_t wakeref; struct drm_mm_node node; struct i915_vma *vma; u64 remain, offset; @@ -1270,13 +1274,14 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, * This easily dwarfs any performance advantage from * using the cache bypass of indirect GGTT access. */ - if (!intel_runtime_pm_get_if_in_use(i915)) { + wakeref = intel_runtime_pm_get_if_in_use(i915); + if (!wakeref) { ret = -EFAULT; goto out_unlock; } } else { /* No backing pages, no fallback, we must force GGTT access */ - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); } vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, @@ -1358,7 +1363,7 @@ out_unpin: i915_vma_unpin(vma); } out_rpm: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); out_unlock: mutex_unlock(&i915->drm.struct_mutex); return ret; @@ -1531,23 +1536,21 @@ err: static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915; + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct list_head *list; struct i915_vma *vma; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + mutex_lock(&i915->ggtt.vm.mutex); for_each_ggtt_vma(vma, obj) { - if (i915_vma_is_active(vma)) - continue; - if (!drm_mm_node_allocated(&vma->node)) continue; - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); } + mutex_unlock(&i915->ggtt.vm.mutex); - i915 = to_i915(obj->base.dev); spin_lock(&i915->mm.obj_lock); list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; list_move_tail(&obj->mm.link, list); @@ -1567,8 +1570,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_set_domain *args = data; struct drm_i915_gem_object *obj; - uint32_t read_domains = args->read_domains; - uint32_t write_domain = args->write_domain; + u32 read_domains = args->read_domains; + u32 write_domain = args->write_domain; int err; /* Only handle setting domains to types used by the CPU. */ @@ -1678,6 +1681,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, return 0; } +static inline bool +__vma_matches(struct vm_area_struct *vma, struct file *filp, + unsigned long addr, unsigned long size) +{ + if (vma->vm_file != filp) + return false; + + return vma->vm_start == addr && (vma->vm_end - vma->vm_start) == size; +} + /** * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address * it is mapped to. @@ -1727,6 +1740,9 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, addr = vm_mmap(obj->base.filp, 0, args->size, PROT_READ | PROT_WRITE, MAP_SHARED, args->offset); + if (IS_ERR_VALUE(addr)) + goto err; + if (args->flags & I915_MMAP_WC) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -1736,23 +1752,28 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, return -EINTR; } vma = find_vma(mm, addr); - if (vma) + if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); else addr = -ENOMEM; up_write(&mm->mmap_sem); + if (IS_ERR_VALUE(addr)) + goto err; /* This may race, but that's ok, it only gets set */ WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); } i915_gem_object_put(obj); - if (IS_ERR((void *)addr)) - return addr; - args->addr_ptr = (uint64_t) addr; + args->addr_ptr = (u64)addr; return 0; + +err: + i915_gem_object_put(obj); + + return addr; } static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) @@ -1863,6 +1884,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; bool write = area->vm_flags & VM_WRITE; + intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; int ret; @@ -1892,7 +1914,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) if (ret) goto err; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); ret = i915_mutex_lock_interruptible(dev); if (ret) @@ -1970,7 +1992,7 @@ err_unpin: err_unlock: mutex_unlock(&dev->struct_mutex); err_rpm: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); i915_gem_object_unpin_pages(obj); err: switch (ret) { @@ -2043,6 +2065,7 @@ void i915_gem_release_mmap(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; /* Serialisation between user GTT access and our code depends upon * revoking the CPU's PTE whilst the mutex is held. The next user @@ -2053,7 +2076,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) * wakeref. */ lockdep_assert_held(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (!obj->userfault_count) goto out; @@ -2070,7 +2093,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) wmb(); out: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); } void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) @@ -2150,8 +2173,8 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) int i915_gem_mmap_gtt(struct drm_file *file, struct drm_device *dev, - uint32_t handle, - uint64_t *offset) + u32 handle, + u64 *offset) { struct drm_i915_gem_object *obj; int ret; @@ -2298,8 +2321,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) struct sg_table *pages; pages = fetch_and_zero(&obj->mm.pages); - if (!pages) - return NULL; + if (IS_ERR_OR_NULL(pages)) + return pages; spin_lock(&i915->mm.obj_lock); list_del(&obj->mm.link); @@ -2323,22 +2346,23 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) return pages; } -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, - enum i915_mm_subclass subclass) +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, + enum i915_mm_subclass subclass) { struct sg_table *pages; + int ret; if (i915_gem_object_has_pinned_pages(obj)) - return; + return -EBUSY; GEM_BUG_ON(obj->bind_count); - if (!i915_gem_object_has_pages(obj)) - return; /* May be called by shrinker from within get_pages() (on another bo) */ mutex_lock_nested(&obj->mm.lock, subclass); - if (unlikely(atomic_read(&obj->mm.pages_pin_count))) + if (unlikely(atomic_read(&obj->mm.pages_pin_count))) { + ret = -EBUSY; goto unlock; + } /* * ->put_pages might need to allocate memory for the bit17 swizzle @@ -2346,11 +2370,24 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, * lists early. */ pages = __i915_gem_object_unset_pages(obj); + + /* + * XXX Temporary hijinx to avoid updating all backends to handle + * NULL pages. In the future, when we have more asynchronous + * get_pages backends we should be better able to handle the + * cancellation of the async task in a more uniform manner. + */ + if (!pages && !i915_gem_object_needs_async_cancel(obj)) + pages = ERR_PTR(-EINVAL); + if (!IS_ERR(pages)) obj->ops->put_pages(obj, pages); + ret = 0; unlock: mutex_unlock(&obj->mm.lock); + + return ret; } bool i915_sg_trim(struct sg_table *orig_st) @@ -2854,59 +2891,12 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } -static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, - const struct i915_gem_context *ctx) +static bool match_ring(struct i915_request *rq) { - unsigned int score; - unsigned long prev_hang; - - if (i915_gem_context_is_banned(ctx)) - score = I915_CLIENT_SCORE_CONTEXT_BAN; - else - score = 0; - - prev_hang = xchg(&file_priv->hang_timestamp, jiffies); - if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) - score += I915_CLIENT_SCORE_HANG_FAST; + struct drm_i915_private *dev_priv = rq->i915; + u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); - if (score) { - atomic_add(score, &file_priv->ban_score); - - DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", - ctx->name, score, - atomic_read(&file_priv->ban_score)); - } -} - -static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) -{ - unsigned int score; - bool banned, bannable; - - atomic_inc(&ctx->guilty_count); - - bannable = i915_gem_context_is_bannable(ctx); - score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); - banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; - - /* Cool contexts don't accumulate client ban score */ - if (!bannable) - return; - - if (banned) { - DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", - ctx->name, atomic_read(&ctx->guilty_count), - score); - i915_gem_context_set_banned(ctx); - } - - if (!IS_ERR_OR_NULL(ctx->file_priv)) - i915_gem_client_mark_guilty(ctx->file_priv, ctx); -} - -static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) -{ - atomic_inc(&ctx->active_count); + return ring == i915_ggtt_offset(rq->ring->vma); } struct i915_request * @@ -2928,9 +2918,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) */ spin_lock_irqsave(&engine->timeline.lock, flags); list_for_each_entry(request, &engine->timeline.requests, link) { - if (__i915_request_completed(request, request->global_seqno)) + if (i915_request_completed(request)) continue; + if (!i915_request_started(request)) + break; + + /* More than one preemptible request may match! */ + if (!match_ring(request)) + break; + active = request; break; } @@ -2939,361 +2936,6 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) return active; } -/* - * Ensure irq handler finishes, and not run again. - * Also return the active request so that we only search for it once. - */ -struct i915_request * -i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) -{ - struct i915_request *request; - - /* - * During the reset sequence, we must prevent the engine from - * entering RC6. As the context state is undefined until we restart - * the engine, if it does enter RC6 during the reset, the state - * written to the powercontext is undefined and so we may lose - * GPU state upon resume, i.e. fail to restart after a reset. - */ - intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); - - request = engine->reset.prepare(engine); - if (request && request->fence.error == -EIO) - request = ERR_PTR(-EIO); /* Previous reset failed! */ - - return request; -} - -int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - struct i915_request *request; - enum intel_engine_id id; - int err = 0; - - for_each_engine(engine, dev_priv, id) { - request = i915_gem_reset_prepare_engine(engine); - if (IS_ERR(request)) { - err = PTR_ERR(request); - continue; - } - - engine->hangcheck.active_request = request; - } - - i915_gem_revoke_fences(dev_priv); - intel_uc_sanitize(dev_priv); - - return err; -} - -static void engine_skip_context(struct i915_request *request) -{ - struct intel_engine_cs *engine = request->engine; - struct i915_gem_context *hung_ctx = request->gem_context; - struct i915_timeline *timeline = request->timeline; - unsigned long flags; - - GEM_BUG_ON(timeline == &engine->timeline); - - spin_lock_irqsave(&engine->timeline.lock, flags); - spin_lock(&timeline->lock); - - list_for_each_entry_continue(request, &engine->timeline.requests, link) - if (request->gem_context == hung_ctx) - i915_request_skip(request, -EIO); - - list_for_each_entry(request, &timeline->requests, link) - i915_request_skip(request, -EIO); - - spin_unlock(&timeline->lock); - spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - -/* Returns the request if it was guilty of the hang */ -static struct i915_request * -i915_gem_reset_request(struct intel_engine_cs *engine, - struct i915_request *request, - bool stalled) -{ - /* The guilty request will get skipped on a hung engine. - * - * Users of client default contexts do not rely on logical - * state preserved between batches so it is safe to execute - * queued requests following the hang. Non default contexts - * rely on preserved state, so skipping a batch loses the - * evolution of the state and it needs to be considered corrupted. - * Executing more queued batches on top of corrupted state is - * risky. But we take the risk by trying to advance through - * the queued requests in order to make the client behaviour - * more predictable around resets, by not throwing away random - * amount of batches it has prepared for execution. Sophisticated - * clients can use gem_reset_stats_ioctl and dma fence status - * (exported via sync_file info ioctl on explicit fences) to observe - * when it loses the context state and should rebuild accordingly. - * - * The context ban, and ultimately the client ban, mechanism are safety - * valves if client submission ends up resulting in nothing more than - * subsequent hangs. - */ - - if (i915_request_completed(request)) { - GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n", - engine->name, request->global_seqno, - request->fence.context, request->fence.seqno, - intel_engine_get_seqno(engine)); - stalled = false; - } - - if (stalled) { - i915_gem_context_mark_guilty(request->gem_context); - i915_request_skip(request, -EIO); - - /* If this context is now banned, skip all pending requests. */ - if (i915_gem_context_is_banned(request->gem_context)) - engine_skip_context(request); - } else { - /* - * Since this is not the hung engine, it may have advanced - * since the hang declaration. Double check by refinding - * the active request at the time of the reset. - */ - request = i915_gem_find_active_request(engine); - if (request) { - unsigned long flags; - - i915_gem_context_mark_innocent(request->gem_context); - dma_fence_set_error(&request->fence, -EAGAIN); - - /* Rewind the engine to replay the incomplete rq */ - spin_lock_irqsave(&engine->timeline.lock, flags); - request = list_prev_entry(request, link); - if (&request->link == &engine->timeline.requests) - request = NULL; - spin_unlock_irqrestore(&engine->timeline.lock, flags); - } - } - - return request; -} - -void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct i915_request *request, - bool stalled) -{ - if (request) - request = i915_gem_reset_request(engine, request, stalled); - - /* Setup the CS to resume from the breadcrumb of the hung request */ - engine->reset.reset(engine, request); -} - -void i915_gem_reset(struct drm_i915_private *dev_priv, - unsigned int stalled_mask) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - i915_retire_requests(dev_priv); - - for_each_engine(engine, dev_priv, id) { - struct intel_context *ce; - - i915_gem_reset_engine(engine, - engine->hangcheck.active_request, - stalled_mask & ENGINE_MASK(id)); - ce = fetch_and_zero(&engine->last_retired_context); - if (ce) - intel_context_unpin(ce); - - /* - * Ostensibily, we always want a context loaded for powersaving, - * so if the engine is idle after the reset, send a request - * to load our scratch kernel_context. - * - * More mysteriously, if we leave the engine idle after a reset, - * the next userspace batch may hang, with what appears to be - * an incoherent read by the CS (presumably stale TLB). An - * empty request appears sufficient to paper over the glitch. - */ - if (intel_engine_is_idle(engine)) { - struct i915_request *rq; - - rq = i915_request_alloc(engine, - dev_priv->kernel_context); - if (!IS_ERR(rq)) - i915_request_add(rq); - } - } - - i915_gem_restore_fences(dev_priv); -} - -void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) -{ - engine->reset.finish(engine); - - intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); -} - -void i915_gem_reset_finish(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - for_each_engine(engine, dev_priv, id) { - engine->hangcheck.active_request = NULL; - i915_gem_reset_finish_engine(engine); - } -} - -static void nop_submit_request(struct i915_request *request) -{ - unsigned long flags; - - GEM_TRACE("%s fence %llx:%lld -> -EIO\n", - request->engine->name, - request->fence.context, request->fence.seqno); - dma_fence_set_error(&request->fence, -EIO); - - spin_lock_irqsave(&request->engine->timeline.lock, flags); - __i915_request_submit(request); - intel_engine_write_global_seqno(request->engine, request->global_seqno); - spin_unlock_irqrestore(&request->engine->timeline.lock, flags); -} - -void i915_gem_set_wedged(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - GEM_TRACE("start\n"); - - if (GEM_SHOW_DEBUG()) { - struct drm_printer p = drm_debug_printer(__func__); - - for_each_engine(engine, i915, id) - intel_engine_dump(engine, &p, "%s\n", engine->name); - } - - if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags)) - goto out; - - /* - * First, stop submission to hw, but do not yet complete requests by - * rolling the global seqno forward (since this would complete requests - * for which we haven't set the fence error to EIO yet). - */ - for_each_engine(engine, i915, id) - i915_gem_reset_prepare_engine(engine); - - /* Even if the GPU reset fails, it should still stop the engines */ - if (INTEL_GEN(i915) >= 5) - intel_gpu_reset(i915, ALL_ENGINES); - - for_each_engine(engine, i915, id) { - engine->submit_request = nop_submit_request; - engine->schedule = NULL; - } - i915->caps.scheduler = 0; - - /* - * Make sure no request can slip through without getting completed by - * either this call here to intel_engine_write_global_seqno, or the one - * in nop_submit_request. - */ - synchronize_rcu(); - - /* Mark all executing requests as skipped */ - for_each_engine(engine, i915, id) - engine->cancel_requests(engine); - - for_each_engine(engine, i915, id) { - i915_gem_reset_finish_engine(engine); - intel_engine_wakeup(engine); - } - -out: - GEM_TRACE("end\n"); - - wake_up_all(&i915->gpu_error.reset_queue); -} - -bool i915_gem_unset_wedged(struct drm_i915_private *i915) -{ - struct i915_timeline *tl; - - lockdep_assert_held(&i915->drm.struct_mutex); - if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) - return true; - - if (!i915->gt.scratch) /* Never full initialised, recovery impossible */ - return false; - - GEM_TRACE("start\n"); - - /* - * Before unwedging, make sure that all pending operations - * are flushed and errored out - we may have requests waiting upon - * third party fences. We marked all inflight requests as EIO, and - * every execbuf since returned EIO, for consistency we want all - * the currently pending requests to also be marked as EIO, which - * is done inside our nop_submit_request - and so we must wait. - * - * No more can be submitted until we reset the wedged bit. - */ - list_for_each_entry(tl, &i915->gt.timelines, link) { - struct i915_request *rq; - - rq = i915_gem_active_peek(&tl->last_request, - &i915->drm.struct_mutex); - if (!rq) - continue; - - /* - * We can't use our normal waiter as we want to - * avoid recursively trying to handle the current - * reset. The basic dma_fence_default_wait() installs - * a callback for dma_fence_signal(), which is - * triggered by our nop handler (indirectly, the - * callback enables the signaler thread which is - * woken by the nop_submit_request() advancing the seqno - * and when the seqno passes the fence, the signaler - * then signals the fence waking us up). - */ - if (dma_fence_default_wait(&rq->fence, true, - MAX_SCHEDULE_TIMEOUT) < 0) - return false; - } - i915_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests); - - intel_engines_sanitize(i915, false); - - /* - * Undo nop_submit_request. We prevent all new i915 requests from - * being queued (by disallowing execbuf whilst wedged) so having - * waited for all active requests above, we know the system is idle - * and do not have to worry about a thread being inside - * engine->submit_request() as we swap over. So unlike installing - * the nop_submit_request on reset, we can do this from normal - * context and do not require stop_machine(). - */ - intel_engines_reset_default_submission(i915); - i915_gem_contexts_lost(i915); - - GEM_TRACE("end\n"); - - smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ - clear_bit(I915_WEDGED, &i915->gpu_error.flags); - - return true; -} - static void i915_gem_retire_work_handler(struct work_struct *work) { @@ -3396,7 +3038,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) GEM_BUG_ON(i915->gt.active_requests); for_each_engine(engine, i915, id) { - GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); + GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request)); GEM_BUG_ON(engine->last_retired_context != to_intel_context(i915->kernel_context, engine)); } @@ -3615,33 +3257,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static long wait_for_timeline(struct i915_timeline *tl, - unsigned int flags, long timeout) -{ - struct i915_request *rq; - - rq = i915_gem_active_get_unlocked(&tl->last_request); - if (!rq) - return timeout; - - /* - * "Race-to-idle". - * - * Switching to the kernel context is often used a synchronous - * step prior to idling, e.g. in suspend for flushing all - * current operations to memory before sleeping. These we - * want to complete as quickly as possible to avoid prolonged - * stalls, so allow the gpu to boost to maximum clocks. - */ - if (flags & I915_WAIT_FOR_IDLE_BOOST) - gen6_rps_boost(rq, NULL); - - timeout = i915_request_wait(rq, flags, timeout); - i915_request_put(rq); - - return timeout; -} - static int wait_for_engines(struct drm_i915_private *i915) { if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { @@ -3655,6 +3270,52 @@ static int wait_for_engines(struct drm_i915_private *i915) return 0; } +static long +wait_for_timelines(struct drm_i915_private *i915, + unsigned int flags, long timeout) +{ + struct i915_gt_timelines *gt = &i915->gt.timelines; + struct i915_timeline *tl; + + if (!READ_ONCE(i915->gt.active_requests)) + return timeout; + + mutex_lock(>->mutex); + list_for_each_entry(tl, >->active_list, link) { + struct i915_request *rq; + + rq = i915_active_request_get_unlocked(&tl->last_request); + if (!rq) + continue; + + mutex_unlock(>->mutex); + + /* + * "Race-to-idle". + * + * Switching to the kernel context is often used a synchronous + * step prior to idling, e.g. in suspend for flushing all + * current operations to memory before sleeping. These we + * want to complete as quickly as possible to avoid prolonged + * stalls, so allow the gpu to boost to maximum clocks. + */ + if (flags & I915_WAIT_FOR_IDLE_BOOST) + gen6_rps_boost(rq, NULL); + + timeout = i915_request_wait(rq, flags, timeout); + i915_request_put(rq); + if (timeout < 0) + return timeout; + + /* restart after reacquiring the lock */ + mutex_lock(>->mutex); + tl = list_entry(>->active_list, typeof(*tl), link); + } + mutex_unlock(>->mutex); + + return timeout; +} + int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags, long timeout) { @@ -3666,17 +3327,15 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, if (!READ_ONCE(i915->gt.awake)) return 0; + timeout = wait_for_timelines(i915, flags, timeout); + if (timeout < 0) + return timeout; + if (flags & I915_WAIT_LOCKED) { - struct i915_timeline *tl; int err; lockdep_assert_held(&i915->drm.struct_mutex); - list_for_each_entry(tl, &i915->gt.timelines, link) { - timeout = wait_for_timeline(tl, flags, timeout); - if (timeout < 0) - return timeout; - } if (GEM_SHOW_DEBUG() && !timeout) { /* Presume that timeout was non-zero to begin with! */ dev_warn(&i915->drm.pdev->dev, @@ -3690,17 +3349,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, i915_retire_requests(i915); GEM_BUG_ON(i915->gt.active_requests); - } else { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - struct i915_timeline *tl = &engine->timeline; - - timeout = wait_for_timeline(tl, flags, timeout); - if (timeout < 0) - return timeout; - } } return 0; @@ -3886,7 +3534,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * reading an invalid PTE on older architectures. */ restart: - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3964,7 +3612,7 @@ restart: */ } - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3974,7 +3622,7 @@ restart: } } - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) vma->node.color = cache_level; i915_gem_object_set_cache_coherency(obj, cache_level); obj->cache_dirty = true; /* Always invalidate stale cachelines */ @@ -4537,7 +4185,8 @@ out: } static void -frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) +frontbuffer_retire(struct i915_active_request *active, + struct i915_request *request) { struct drm_i915_gem_object *obj = container_of(active, typeof(*obj), frontbuffer_write); @@ -4550,7 +4199,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { mutex_init(&obj->mm.lock); - INIT_LIST_HEAD(&obj->vma_list); + spin_lock_init(&obj->vma.lock); + INIT_LIST_HEAD(&obj->vma.list); + INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4562,7 +4213,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->resv = &obj->__builtin_resv; obj->frontbuffer_ggtt_origin = ORIGIN_GTT; - init_request_active(&obj->frontbuffer_write, frontbuffer_retire); + i915_active_request_init(&obj->frontbuffer_write, + NULL, frontbuffer_retire); obj->mm.madv = I915_MADV_WILLNEED; INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); @@ -4705,8 +4357,9 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, struct llist_node *freed) { struct drm_i915_gem_object *obj, *on; + intel_wakeref_t wakeref; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); llist_for_each_entry_safe(obj, on, freed, freed) { struct i915_vma *vma, *vn; @@ -4715,14 +4368,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, mutex_lock(&i915->drm.struct_mutex); GEM_BUG_ON(i915_gem_object_is_active(obj)); - list_for_each_entry_safe(vma, vn, - &obj->vma_list, obj_link) { + list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; i915_vma_destroy(vma); } - GEM_BUG_ON(!list_empty(&obj->vma_list)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); + GEM_BUG_ON(!list_empty(&obj->vma.list)); + GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); /* This serializes freeing with the shrinker. Since the free * is delayed, first by RCU then by the workqueue, we want the @@ -4767,7 +4419,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, if (on) cond_resched(); } - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); } static void i915_gem_flush_free_objects(struct drm_i915_private *i915) @@ -4876,11 +4528,11 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) void i915_gem_sanitize(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + intel_wakeref_t wakeref; - mutex_lock(&i915->drm.struct_mutex); + GEM_TRACE("\n"); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); /* @@ -4903,21 +4555,25 @@ void i915_gem_sanitize(struct drm_i915_private *i915) intel_engines_sanitize(i915, false); intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); + mutex_lock(&i915->drm.struct_mutex); i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); } int i915_gem_suspend(struct drm_i915_private *i915) { + intel_wakeref_t wakeref; int ret; GEM_TRACE("\n"); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); intel_suspend_gt_powersave(i915); + flush_workqueue(i915->wq); + mutex_lock(&i915->drm.struct_mutex); /* @@ -4947,11 +4603,9 @@ int i915_gem_suspend(struct drm_i915_private *i915) i915_retire_requests(i915); /* ensure we flush after wedging */ mutex_unlock(&i915->drm.struct_mutex); + i915_reset_flush(i915); - intel_uc_suspend(i915); - - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - cancel_delayed_work_sync(&i915->gt.retire_work); + drain_delayed_work(&i915->gt.retire_work); /* * As the idle_work is rearming if it detects a race, play safe and @@ -4959,6 +4613,8 @@ int i915_gem_suspend(struct drm_i915_private *i915) */ drain_delayed_work(&i915->gt.idle_work); + intel_uc_suspend(i915); + /* * Assert that we successfully flushed all the work and * reset the GPU back to its idle, low power state. @@ -4967,12 +4623,12 @@ int i915_gem_suspend(struct drm_i915_private *i915) if (WARN_ON(!intel_engines_are_idle(i915))) i915_gem_set_wedged(i915); /* no hope, discard everything */ - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); return 0; err_unlock: mutex_unlock(&i915->drm.struct_mutex); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); return ret; } @@ -5395,6 +5051,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) dev_priv->gt.cleanup_engine = intel_engine_cleanup; } + i915_timelines_init(dev_priv); + ret = i915_gem_init_userptr(dev_priv); if (ret) return ret; @@ -5517,8 +5175,10 @@ err_unlock: err_uc_misc: intel_uc_fini_misc(dev_priv); - if (ret != -EIO) + if (ret != -EIO) { i915_gem_cleanup_userptr(dev_priv); + i915_timelines_fini(dev_priv); + } if (ret == -EIO) { mutex_lock(&dev_priv->drm.struct_mutex); @@ -5569,6 +5229,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini_misc(dev_priv); i915_gem_cleanup_userptr(dev_priv); + i915_timelines_fini(dev_priv); i915_gem_drain_freed_objects(dev_priv); @@ -5671,7 +5332,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) if (!dev_priv->priorities) goto err_dependencies; - INIT_LIST_HEAD(&dev_priv->gt.timelines); INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.closed_vma); @@ -5683,6 +5343,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) i915_gem_idle_work_handler); init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); + mutex_init(&dev_priv->gpu_error.wedge_mutex); atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); @@ -5714,7 +5375,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.object_count); - WARN_ON(!list_empty(&dev_priv->gt.timelines)); kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 5933adbe3d99..280813a4bf82 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -89,6 +89,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" #include "i915_trace.h" +#include "intel_lrc_reg.h" #include "intel_workarounds.h" #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 @@ -321,6 +322,32 @@ static u32 default_desc_template(const struct drm_i915_private *i915, return desc; } +static void intel_context_retire(struct i915_active_request *active, + struct i915_request *rq) +{ + struct intel_context *ce = + container_of(active, typeof(*ce), active_tracker); + + intel_context_unpin(ce); +} + +void +intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + ce->gem_context = ctx; + + INIT_LIST_HEAD(&ce->signal_link); + INIT_LIST_HEAD(&ce->signals); + + /* Use the whole device by default */ + ce->sseu = intel_device_default_sseu(ctx->i915); + + i915_active_request_init(&ce->active_tracker, + NULL, intel_context_retire); +} + static struct i915_gem_context * __create_hw_context(struct drm_i915_private *dev_priv, struct drm_i915_file_private *file_priv) @@ -338,11 +365,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, ctx->i915 = dev_priv; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); - for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { - struct intel_context *ce = &ctx->__engine[n]; - - ce->gem_context = ctx; - } + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) + intel_context_init(&ctx->__engine[n], ctx, dev_priv->engine[n]); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -645,8 +669,8 @@ last_request_on_engine(struct i915_timeline *timeline, GEM_BUG_ON(timeline == &engine->timeline); - rq = i915_gem_active_raw(&timeline->last_request, - &engine->i915->drm.struct_mutex); + rq = i915_active_request_raw(&timeline->last_request, + &engine->i915->drm.struct_mutex); if (rq && rq->engine == engine) { GEM_TRACE("last request for %s on engine %s: %llx:%llu\n", timeline->name, engine->name, @@ -839,6 +863,56 @@ out: return 0; } +static int get_sseu(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_engine_cs *engine; + struct intel_context *ce; + int ret; + + if (args->size == 0) + goto out; + else if (args->size < sizeof(user_sseu)) + return -EINVAL; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.flags || user_sseu.rsvd) + return -EINVAL; + + engine = intel_engine_lookup_user(ctx->i915, + user_sseu.engine_class, + user_sseu.engine_instance); + if (!engine) + return -EINVAL; + + /* Only use for mutex here is to serialize get_param and set_param. */ + ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + if (ret) + return ret; + + ce = to_intel_context(ctx, engine); + + user_sseu.slice_mask = ce->sseu.slice_mask; + user_sseu.subslice_mask = ce->sseu.subslice_mask; + user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice; + user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice; + + mutex_unlock(&ctx->i915->drm.struct_mutex); + + if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu, + sizeof(user_sseu))) + return -EFAULT; + +out: + args->size = sizeof(user_sseu); + + return 0; +} + int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -851,15 +925,17 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, if (!ctx) return -ENOENT; - args->size = 0; switch (args->param) { case I915_CONTEXT_PARAM_BAN_PERIOD: ret = -EINVAL; break; case I915_CONTEXT_PARAM_NO_ZEROMAP: + args->size = 0; args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); break; case I915_CONTEXT_PARAM_GTT_SIZE: + args->size = 0; + if (ctx->ppgtt) args->value = ctx->ppgtt->vm.total; else if (to_i915(dev)->mm.aliasing_ppgtt) @@ -868,14 +944,20 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = to_i915(dev)->ggtt.vm.total; break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: + args->size = 0; args->value = i915_gem_context_no_error_capture(ctx); break; case I915_CONTEXT_PARAM_BANNABLE: + args->size = 0; args->value = i915_gem_context_is_bannable(ctx); break; case I915_CONTEXT_PARAM_PRIORITY: + args->size = 0; args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT; break; + case I915_CONTEXT_PARAM_SSEU: + ret = get_sseu(ctx, args); + break; default: ret = -EINVAL; break; @@ -885,6 +967,281 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, return ret; } +static int gen8_emit_rpcs_config(struct i915_request *rq, + struct intel_context *ce, + struct intel_sseu sseu) +{ + u64 offset; + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + offset = i915_ggtt_offset(ce->state) + + LRC_STATE_PN * PAGE_SIZE + + (CTX_R_PWR_CLK_STATE + 1) * 4; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = gen8_make_rpcs(rq->i915, &sseu); + + intel_ring_advance(rq, cs); + + return 0; +} + +static int +gen8_modify_rpcs_gpu(struct intel_context *ce, + struct intel_engine_cs *engine, + struct intel_sseu sseu) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_request *rq, *prev; + intel_wakeref_t wakeref; + int ret; + + GEM_BUG_ON(!ce->pin_count); + + lockdep_assert_held(&i915->drm.struct_mutex); + + /* Submitting requests etc needs the hw awake. */ + wakeref = intel_runtime_pm_get(i915); + + rq = i915_request_alloc(engine, i915->kernel_context); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto out_put; + } + + /* Queue this switch after all other activity by this context. */ + prev = i915_active_request_raw(&ce->ring->timeline->last_request, + &i915->drm.struct_mutex); + if (prev && !i915_request_completed(prev)) { + ret = i915_request_await_dma_fence(rq, &prev->fence); + if (ret < 0) + goto out_add; + } + + /* Order all following requests to be after. */ + ret = i915_timeline_set_barrier(ce->ring->timeline, rq); + if (ret) + goto out_add; + + ret = gen8_emit_rpcs_config(rq, ce, sseu); + if (ret) + goto out_add; + + /* + * Guarantee context image and the timeline remains pinned until the + * modifying request is retired by setting the ce activity tracker. + * + * But we only need to take one pin on the account of it. Or in other + * words transfer the pinned ce object to tracked active request. + */ + if (!i915_active_request_isset(&ce->active_tracker)) + __intel_context_pin(ce); + __i915_active_request_set(&ce->active_tracker, rq); + +out_add: + i915_request_add(rq); +out_put: + intel_runtime_pm_put(i915, wakeref); + + return ret; +} + +static int +__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_sseu sseu) +{ + struct intel_context *ce = to_intel_context(ctx, engine); + int ret = 0; + + GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8); + GEM_BUG_ON(engine->id != RCS); + + /* Nothing to do if unmodified. */ + if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) + return 0; + + /* + * If context is not idle we have to submit an ordered request to modify + * its context image via the kernel context. Pristine and idle contexts + * will be configured on pinning. + */ + if (ce->pin_count) + ret = gen8_modify_rpcs_gpu(ce, engine, sseu); + + if (!ret) + ce->sseu = sseu; + + return ret; +} + +static int +i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_sseu sseu) +{ + int ret; + + ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + if (ret) + return ret; + + ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu); + + mutex_unlock(&ctx->i915->drm.struct_mutex); + + return ret; +} + +static int +user_to_context_sseu(struct drm_i915_private *i915, + const struct drm_i915_gem_context_param_sseu *user, + struct intel_sseu *context) +{ + const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu; + + /* No zeros in any field. */ + if (!user->slice_mask || !user->subslice_mask || + !user->min_eus_per_subslice || !user->max_eus_per_subslice) + return -EINVAL; + + /* Max > min. */ + if (user->max_eus_per_subslice < user->min_eus_per_subslice) + return -EINVAL; + + /* + * Some future proofing on the types since the uAPI is wider than the + * current internal implementation. + */ + if (overflows_type(user->slice_mask, context->slice_mask) || + overflows_type(user->subslice_mask, context->subslice_mask) || + overflows_type(user->min_eus_per_subslice, + context->min_eus_per_subslice) || + overflows_type(user->max_eus_per_subslice, + context->max_eus_per_subslice)) + return -EINVAL; + + /* Check validity against hardware. */ + if (user->slice_mask & ~device->slice_mask) + return -EINVAL; + + if (user->subslice_mask & ~device->subslice_mask[0]) + return -EINVAL; + + if (user->max_eus_per_subslice > device->max_eus_per_subslice) + return -EINVAL; + + context->slice_mask = user->slice_mask; + context->subslice_mask = user->subslice_mask; + context->min_eus_per_subslice = user->min_eus_per_subslice; + context->max_eus_per_subslice = user->max_eus_per_subslice; + + /* Part specific restrictions. */ + if (IS_GEN(i915, 11)) { + unsigned int hw_s = hweight8(device->slice_mask); + unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]); + unsigned int req_s = hweight8(context->slice_mask); + unsigned int req_ss = hweight8(context->subslice_mask); + + /* + * Only full subslice enablement is possible if more than one + * slice is turned on. + */ + if (req_s > 1 && req_ss != hw_ss_per_s) + return -EINVAL; + + /* + * If more than four (SScount bitfield limit) subslices are + * requested then the number has to be even. + */ + if (req_ss > 4 && (req_ss & 1)) + return -EINVAL; + + /* + * If only one slice is enabled and subslice count is below the + * device full enablement, it must be at most half of the all + * available subslices. + */ + if (req_s == 1 && req_ss < hw_ss_per_s && + req_ss > (hw_ss_per_s / 2)) + return -EINVAL; + + /* ABI restriction - VME use case only. */ + + /* All slices or one slice only. */ + if (req_s != 1 && req_s != hw_s) + return -EINVAL; + + /* + * Half subslices or full enablement only when one slice is + * enabled. + */ + if (req_s == 1 && + (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2))) + return -EINVAL; + + /* No EU configuration changes. */ + if ((user->min_eus_per_subslice != + device->max_eus_per_subslice) || + (user->max_eus_per_subslice != + device->max_eus_per_subslice)) + return -EINVAL; + } + + return 0; +} + +static int set_sseu(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_engine_cs *engine; + struct intel_sseu sseu; + int ret; + + if (args->size < sizeof(user_sseu)) + return -EINVAL; + + if (!IS_GEN(i915, 11)) + return -ENODEV; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.flags || user_sseu.rsvd) + return -EINVAL; + + engine = intel_engine_lookup_user(i915, + user_sseu.engine_class, + user_sseu.engine_instance); + if (!engine) + return -EINVAL; + + /* Only render engine supports RPCS configuration. */ + if (engine->class != RENDER_CLASS) + return -ENODEV; + + ret = user_to_context_sseu(i915, &user_sseu, &sseu); + if (ret) + return ret; + + ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu); + if (ret) + return ret; + + args->size = sizeof(user_sseu); + + return 0; +} + int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -947,7 +1304,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, I915_USER_PRIORITY(priority); } break; - + case I915_CONTEXT_PARAM_SSEU: + ret = set_sseu(ctx, args); + break; default: ret = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index f6d870b1f73e..ca150a764c24 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -31,6 +31,7 @@ #include "i915_gem.h" #include "i915_scheduler.h" +#include "intel_device_info.h" struct pid; @@ -53,6 +54,16 @@ struct intel_context_ops { void (*destroy)(struct intel_context *ce); }; +/* + * Powergating configuration for a particular (context,engine). + */ +struct intel_sseu { + u8 slice_mask; + u8 subslice_mask; + u8 min_eus_per_subslice; + u8 max_eus_per_subslice; +}; + /** * struct i915_gem_context - client state * @@ -164,13 +175,24 @@ struct i915_gem_context { struct intel_context { struct i915_gem_context *gem_context; struct intel_engine_cs *active; + struct list_head signal_link; + struct list_head signals; struct i915_vma *state; struct intel_ring *ring; u32 *lrc_reg_state; u64 lrc_desc; int pin_count; + /** + * active_tracker: Active tracker for the external rq activity + * on this intel_context object. + */ + struct i915_active_request active_tracker; + const struct intel_context_ops *ops; + + /** sseu: Control eu/slice partitioning */ + struct intel_sseu sseu; } __engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ @@ -364,4 +386,8 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +void intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine); + #endif /* !__I915_GEM_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index f6855401f247..68d74c50ac39 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -126,31 +126,25 @@ i915_gem_evict_something(struct i915_address_space *vm, struct drm_i915_private *dev_priv = vm->i915; struct drm_mm_scan scan; struct list_head eviction_list; - struct list_head *phases[] = { - &vm->inactive_list, - &vm->active_list, - NULL, - }, **phase; struct i915_vma *vma, *next; struct drm_mm_node *node; enum drm_mm_insert_mode mode; + struct i915_vma *active; int ret; lockdep_assert_held(&vm->i915->drm.struct_mutex); trace_i915_gem_evict(vm, min_size, alignment, flags); /* - * The goal is to evict objects and amalgamate space in LRU order. - * The oldest idle objects reside on the inactive list, which is in - * retirement order. The next objects to retire are those in flight, - * on the active list, again in retirement order. + * The goal is to evict objects and amalgamate space in rough LRU order. + * Since both active and inactive objects reside on the same list, + * in a mix of creation and last scanned order, as we process the list + * we sort it into inactive/active, which keeps the active portion + * in a rough MRU order. * * The retirement sequence is thus: - * 1. Inactive objects (already retired) - * 2. Active objects (will stall on unbinding) - * - * On each list, the oldest objects lie at the HEAD with the freshest - * object on the TAIL. + * 1. Inactive objects (already retired, random order) + * 2. Active objects (will stall on unbinding, oldest scanned first) */ mode = DRM_MM_INSERT_BEST; if (flags & PIN_HIGH) @@ -169,17 +163,46 @@ i915_gem_evict_something(struct i915_address_space *vm, */ if (!(flags & PIN_NONBLOCK)) i915_retire_requests(dev_priv); - else - phases[1] = NULL; search_again: + active = NULL; INIT_LIST_HEAD(&eviction_list); - phase = phases; - do { - list_for_each_entry(vma, *phase, vm_link) - if (mark_free(&scan, vma, flags, &eviction_list)) - goto found; - } while (*++phase); + list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) { + /* + * We keep this list in a rough least-recently scanned order + * of active elements (inactive elements are cheap to reap). + * New entries are added to the end, and we move anything we + * scan to the end. The assumption is that the working set + * of applications is either steady state (and thanks to the + * userspace bo cache it almost always is) or volatile and + * frequently replaced after a frame, which are self-evicting! + * Given that assumption, the MRU order of the scan list is + * fairly static, and keeping it in least-recently scan order + * is suitable. + * + * To notice when we complete one full cycle, we record the + * first active element seen, before moving it to the tail. + */ + if (i915_vma_is_active(vma)) { + if (vma == active) { + if (flags & PIN_NONBLOCK) + break; + + active = ERR_PTR(-EAGAIN); + } + + if (active != ERR_PTR(-EAGAIN)) { + if (!active) + active = vma; + + list_move_tail(&vma->vm_link, &vm->bound_list); + continue; + } + } + + if (mark_free(&scan, vma, flags, &eviction_list)) + goto found; + } /* Nothing found, clean up and bail out! */ list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { @@ -388,11 +411,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, */ int i915_gem_evict_vm(struct i915_address_space *vm) { - struct list_head *phases[] = { - &vm->inactive_list, - &vm->active_list, - NULL - }, **phase; struct list_head eviction_list; struct i915_vma *vma, *next; int ret; @@ -412,16 +430,15 @@ int i915_gem_evict_vm(struct i915_address_space *vm) } INIT_LIST_HEAD(&eviction_list); - phase = phases; - do { - list_for_each_entry(vma, *phase, vm_link) { - if (i915_vma_is_pinned(vma)) - continue; + mutex_lock(&vm->mutex); + list_for_each_entry(vma, &vm->bound_list, vm_link) { + if (i915_vma_is_pinned(vma)) + continue; - __i915_vma_pin(vma); - list_add(&vma->evict_link, &eviction_list); - } - } while (*++phase); + __i915_vma_pin(vma); + list_add(&vma->evict_link, &eviction_list); + } + mutex_unlock(&vm->mutex); ret = 0; list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e7994505d850..02adcaf6ebea 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -753,6 +753,68 @@ static int eb_select_context(struct i915_execbuffer *eb) return 0; } +static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) +{ + struct i915_request *rq; + + /* + * Completely unscientific finger-in-the-air estimates for suitable + * maximum user request size (to avoid blocking) and then backoff. + */ + if (intel_ring_update_space(ring) >= PAGE_SIZE) + return NULL; + + /* + * Find a request that after waiting upon, there will be at least half + * the ring available. The hysteresis allows us to compete for the + * shared ring and should mean that we sleep less often prior to + * claiming our resources, but not so long that the ring completely + * drains before we can submit our next request. + */ + list_for_each_entry(rq, &ring->request_list, ring_link) { + if (__intel_ring_space(rq->postfix, + ring->emit, ring->size) > ring->size / 2) + break; + } + if (&rq->ring_link == &ring->request_list) + return NULL; /* weird, we will check again later for real */ + + return i915_request_get(rq); +} + +static int eb_wait_for_ring(const struct i915_execbuffer *eb) +{ + const struct intel_context *ce; + struct i915_request *rq; + int ret = 0; + + /* + * Apply a light amount of backpressure to prevent excessive hogs + * from blocking waiting for space whilst holding struct_mutex and + * keeping all of their resources pinned. + */ + + ce = to_intel_context(eb->ctx, eb->engine); + if (!ce->ring) /* first use, assume empty! */ + return 0; + + rq = __eb_wait_for_ring(ce->ring); + if (rq) { + mutex_unlock(&eb->i915->drm.struct_mutex); + + if (i915_request_wait(rq, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) + ret = -EINTR; + + i915_request_put(rq); + + mutex_lock(&eb->i915->drm.struct_mutex); + } + + return ret; +} + static int eb_lookup_vmas(struct i915_execbuffer *eb) { struct radix_tree_root *handles_vma = &eb->ctx->handles_vma; @@ -1976,6 +2038,18 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } + /* + * After we completed waiting for other engines (using HW semaphores) + * then we can signal that this request/batch is ready to run. This + * allows us to determine if the batch is still waiting on the GPU + * or actually running by checking the breadcrumb. + */ + if (eb->engine->emit_init_breadcrumb) { + err = eb->engine->emit_init_breadcrumb(eb->request); + if (err) + return err; + } + err = eb->engine->emit_bb_start(eb->request, eb->batch->node.start + eb->batch_start_offset, @@ -2202,6 +2276,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct sync_file *out_fence = NULL; + intel_wakeref_t wakeref; int out_fence_fd = -1; int err; @@ -2272,12 +2347,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, * wakeref that we hold until the GPU has been idle for at least * 100ms. */ - intel_runtime_pm_get(eb.i915); + wakeref = intel_runtime_pm_get(eb.i915); err = i915_mutex_lock_interruptible(dev); if (err) goto err_rpm; + err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ + if (unlikely(err)) + goto err_unlock; + err = eb_relocate(&eb); if (err) { /* @@ -2422,9 +2501,10 @@ err_batch_unpin: err_vma: if (eb.exec) eb_release_vmas(&eb); +err_unlock: mutex_unlock(&dev->struct_mutex); err_rpm: - intel_runtime_pm_put(eb.i915); + intel_runtime_pm_put(eb.i915, wakeref); i915_gem_context_put(eb.ctx); err_destroy: eb_destroy(&eb); diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index d67c07cdd0b8..e037e94792f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -209,6 +209,7 @@ static void fence_write(struct drm_i915_fence_reg *fence, static int fence_update(struct drm_i915_fence_reg *fence, struct i915_vma *vma) { + intel_wakeref_t wakeref; int ret; if (vma) { @@ -222,7 +223,7 @@ static int fence_update(struct drm_i915_fence_reg *fence, i915_gem_object_get_tiling(vma->obj))) return -EINVAL; - ret = i915_gem_active_retire(&vma->last_fence, + ret = i915_active_request_retire(&vma->last_fence, &vma->obj->base.dev->struct_mutex); if (ret) return ret; @@ -231,7 +232,7 @@ static int fence_update(struct drm_i915_fence_reg *fence, if (fence->vma) { struct i915_vma *old = fence->vma; - ret = i915_gem_active_retire(&old->last_fence, + ret = i915_active_request_retire(&old->last_fence, &old->obj->base.dev->struct_mutex); if (ret) return ret; @@ -256,9 +257,10 @@ static int fence_update(struct drm_i915_fence_reg *fence, * If the device is currently powered down, we will defer the write * to the runtime resume, see i915_gem_restore_fences(). */ - if (intel_runtime_pm_get_if_in_use(fence->i915)) { + wakeref = intel_runtime_pm_get_if_in_use(fence->i915); + if (wakeref) { fence_write(fence, vma); - intel_runtime_pm_put(fence->i915); + intel_runtime_pm_put(fence->i915, wakeref); } if (vma) { @@ -553,8 +555,8 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv) void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv) { - uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) { /* @@ -577,7 +579,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv) swizzle_y = I915_BIT_6_SWIZZLE_NONE; } } else { - uint32_t dimm_c0, dimm_c1; + u32 dimm_c0, dimm_c1; dimm_c0 = I915_READ(MAD_DIMM_C0); dimm_c1 = I915_READ(MAD_DIMM_C1); dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; @@ -609,7 +611,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv) swizzle_y = I915_BIT_6_SWIZZLE_NONE; } else if (IS_MOBILE(dev_priv) || IS_I915G(dev_priv) || IS_I945G(dev_priv)) { - uint32_t dcc; + u32 dcc; /* On 9xx chipsets, channel interleave by the CPU is * determined by DCC. For single-channel, neither the CPU diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h index 99a31ded4dfd..09dcaf14121b 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h @@ -50,4 +50,3 @@ struct drm_i915_fence_reg { }; #endif - diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a8807fbed0aa..d646d37eec2f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -37,6 +37,7 @@ #include "i915_drv.h" #include "i915_vgpu.h" +#include "i915_reset.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" @@ -473,8 +474,7 @@ static void vm_free_page(struct i915_address_space *vm, struct page *page) spin_unlock(&vm->free_pages.lock); } -static void i915_address_space_init(struct i915_address_space *vm, - struct drm_i915_private *dev_priv) +static void i915_address_space_init(struct i915_address_space *vm, int subclass) { /* * The vm->mutex must be reclaim safe (for use in the shrinker). @@ -482,6 +482,7 @@ static void i915_address_space_init(struct i915_address_space *vm, * attempt holding the lock is immediately reported by lockdep. */ mutex_init(&vm->mutex); + lockdep_set_subclass(&vm->mutex, subclass); i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); GEM_BUG_ON(!vm->total); @@ -490,9 +491,8 @@ static void i915_address_space_init(struct i915_address_space *vm, stash_init(&vm->free_pages); - INIT_LIST_HEAD(&vm->active_list); - INIT_LIST_HEAD(&vm->inactive_list); INIT_LIST_HEAD(&vm->unbound_list); + INIT_LIST_HEAD(&vm->bound_list); } static void i915_address_space_fini(struct i915_address_space *vm) @@ -1547,7 +1547,7 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) /* From bdw, there is support for read-only pages in the PPGTT. */ ppgtt->vm.has_read_only = true; - i915_address_space_init(&ppgtt->vm, i915); + i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. @@ -1917,21 +1917,23 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) if (!vma) return ERR_PTR(-ENOMEM); - init_request_active(&vma->last_fence, NULL); + i915_active_init(i915, &vma->active, NULL); + INIT_ACTIVE_REQUEST(&vma->last_fence); vma->vm = &ggtt->vm; vma->ops = &pd_vma_ops; vma->private = ppgtt; - vma->active = RB_ROOT; - vma->size = size; vma->fence_size = size; vma->flags = I915_VMA_GGTT; vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ INIT_LIST_HEAD(&vma->obj_link); + + mutex_lock(&vma->vm->mutex); list_add(&vma->vm_link, &vma->vm->unbound_list); + mutex_unlock(&vma->vm->mutex); return vma; } @@ -1996,7 +1998,7 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) ppgtt->base.vm.total = I915_PDES * GEN6_PTES * I915_GTT_PAGE_SIZE; - i915_address_space_init(&ppgtt->base.vm, i915); + i915_address_space_init(&ppgtt->base.vm, VM_CLASS_PPGTT); ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; @@ -2110,8 +2112,7 @@ void i915_ppgtt_close(struct i915_address_space *vm) static void ppgtt_destroy_vma(struct i915_address_space *vm) { struct list_head *phases[] = { - &vm->active_list, - &vm->inactive_list, + &vm->bound_list, &vm->unbound_list, NULL, }, **phase; @@ -2134,8 +2135,7 @@ void i915_ppgtt_release(struct kref *kref) ppgtt_destroy_vma(&ppgtt->vm); - GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list)); - GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list)); + GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list)); GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list)); ppgtt->vm.cleanup(&ppgtt->vm); @@ -2527,6 +2527,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, { struct drm_i915_private *i915 = vma->vm->i915; struct drm_i915_gem_object *obj = vma->obj; + intel_wakeref_t wakeref; u32 pte_flags; /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ @@ -2534,9 +2535,8 @@ static int ggtt_bind_vma(struct i915_vma *vma, if (i915_gem_object_is_readonly(obj)) pte_flags |= PTE_READ_ONLY; - intel_runtime_pm_get(i915); - vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - intel_runtime_pm_put(i915); + with_intel_runtime_pm(i915, wakeref) + vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; @@ -2553,10 +2553,10 @@ static int ggtt_bind_vma(struct i915_vma *vma, static void ggtt_unbind_vma(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; + intel_wakeref_t wakeref; - intel_runtime_pm_get(i915); - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); - intel_runtime_pm_put(i915); + with_intel_runtime_pm(i915, wakeref) + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } static int aliasing_gtt_bind_vma(struct i915_vma *vma, @@ -2588,9 +2588,12 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, } if (flags & I915_VMA_GLOBAL_BIND) { - intel_runtime_pm_get(i915); - vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - intel_runtime_pm_put(i915); + intel_wakeref_t wakeref; + + with_intel_runtime_pm(i915, wakeref) { + vma->vm->insert_entries(vma->vm, vma, + cache_level, pte_flags); + } } return 0; @@ -2601,9 +2604,11 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma) struct drm_i915_private *i915 = vma->vm->i915; if (vma->flags & I915_VMA_GLOBAL_BIND) { - intel_runtime_pm_get(i915); - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); - intel_runtime_pm_put(i915); + struct i915_address_space *vm = vma->vm; + intel_wakeref_t wakeref; + + with_intel_runtime_pm(i915, wakeref) + vm->clear_range(vm, vma->node.start, vma->size); } if (vma->flags & I915_VMA_LOCAL_BIND) { @@ -2795,8 +2800,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_fini_aliasing_ppgtt(dev_priv); - GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); - list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) WARN_ON(i915_vma_unbind(vma)); if (drm_mm_node_allocated(&ggtt->error_capture)) @@ -3227,7 +3231,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.insert_entries = gen8_ggtt_insert_entries; /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ - if (intel_ggtt_update_needs_vtd_wa(dev_priv)) { + if (intel_ggtt_update_needs_vtd_wa(dev_priv) || + IS_CHERRYVIEW(dev_priv) /* fails with concurrent use/update */) { ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; if (ggtt->vm.clear_range != nop_clear_range) @@ -3428,7 +3433,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) * and beyond the end of the GTT if we do not provide a guard. */ mutex_lock(&dev_priv->drm.struct_mutex); - i915_address_space_init(&ggtt->vm, dev_priv); + i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); ggtt->vm.is_ggtt = true; @@ -3501,32 +3506,39 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); + mutex_lock(&ggtt->vm.mutex); + /* First fill our portion of the GTT with scratch pages */ ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ /* clflush objects bound into the GGTT and rebind them. */ - GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); - list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) { + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; if (!(vma->flags & I915_VMA_GLOBAL_BIND)) continue; + mutex_unlock(&ggtt->vm.mutex); + if (!i915_vma_unbind(vma)) - continue; + goto lock; WARN_ON(i915_vma_bind(vma, obj ? obj->cache_level : 0, PIN_UPDATE)); if (obj) WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + +lock: + mutex_lock(&ggtt->vm.mutex); } ggtt->vm.closed = false; i915_ggtt_invalidate(dev_priv); + mutex_unlock(&ggtt->vm.mutex); + if (INTEL_GEN(dev_priv) >= 8) { struct intel_ppat *ppat = &dev_priv->ppat; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index e2360f16427a..03ade71b8d9a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -39,6 +39,7 @@ #include <linux/pagevec.h> #include "i915_request.h" +#include "i915_reset.h" #include "i915_selftest.h" #include "i915_timeline.h" @@ -288,6 +289,8 @@ struct i915_address_space { bool closed; struct mutex mutex; /* protects vma and our lists */ +#define VM_CLASS_GGTT 0 +#define VM_CLASS_PPGTT 1 u64 scratch_pte; struct i915_page_dma scratch_page; @@ -296,32 +299,12 @@ struct i915_address_space { struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */ /** - * List of objects currently involved in rendering. - * - * Includes buffers having the contents of their GPU caches - * flushed, not necessarily primitives. last_read_req - * represents when the rendering involved will be completed. - * - * A reference is held on the buffer while on this list. + * List of vma currently bound. */ - struct list_head active_list; + struct list_head bound_list; /** - * LRU list of objects which are not in the ringbuffer and - * are ready to unbind, but are still in the GTT. - * - * last_read_req is NULL while an object is in this list. - * - * A reference is not held on the buffer while on this list, - * as merely being GTT-bound shouldn't prevent its being - * freed, and we'll pull it off the list in the free path. - */ - struct list_head inactive_list; - - /** - * List of vma that have been unbound. - * - * A reference is not held on the buffer while on this list. + * List of vma that are not unbound. */ struct list_head unbound_list; @@ -659,19 +642,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, /* Flags used by pin/bind&friends. */ #define PIN_NONBLOCK BIT_ULL(0) -#define PIN_MAPPABLE BIT_ULL(1) -#define PIN_ZONE_4G BIT_ULL(2) -#define PIN_NONFAULT BIT_ULL(3) -#define PIN_NOEVICT BIT_ULL(4) - -#define PIN_MBZ BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */ -#define PIN_GLOBAL BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */ -#define PIN_USER BIT_ULL(7) /* I915_VMA_LOCAL_BIND */ -#define PIN_UPDATE BIT_ULL(8) - -#define PIN_HIGH BIT_ULL(9) -#define PIN_OFFSET_BIAS BIT_ULL(10) -#define PIN_OFFSET_FIXED BIT_ULL(11) +#define PIN_NONFAULT BIT_ULL(1) +#define PIN_NOEVICT BIT_ULL(2) +#define PIN_MAPPABLE BIT_ULL(3) +#define PIN_ZONE_4G BIT_ULL(4) +#define PIN_HIGH BIT_ULL(5) +#define PIN_OFFSET_BIAS BIT_ULL(6) +#define PIN_OFFSET_FIXED BIT_ULL(7) + +#define PIN_MBZ BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */ +#define PIN_GLOBAL BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT_ULL(10) /* I915_VMA_LOCAL_BIND */ +#define PIN_UPDATE BIT_ULL(11) + #define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) #endif diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index ff3da64470dd..fab040331cdb 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -57,6 +57,7 @@ struct drm_i915_gem_object_ops { #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) #define I915_GEM_OBJECT_IS_PROXY BIT(2) +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -86,24 +87,33 @@ struct drm_i915_gem_object { const struct drm_i915_gem_object_ops *ops; - /** - * @vma_list: List of VMAs backed by this object - * - * The VMA on this list are ordered by type, all GGTT vma are placed - * at the head and all ppGTT vma are placed at the tail. The different - * types of GGTT vma are unordered between themselves, use the - * @vma_tree (which has a defined order between all VMA) to find an - * exact match. - */ - struct list_head vma_list; - /** - * @vma_tree: Ordered tree of VMAs backed by this object - * - * All VMA created for this object are placed in the @vma_tree for - * fast retrieval via a binary search in i915_vma_instance(). - * They are also added to @vma_list for easy iteration. - */ - struct rb_root vma_tree; + struct { + /** + * @vma.lock: protect the list/tree of vmas + */ + spinlock_t lock; + + /** + * @vma.list: List of VMAs backed by this object + * + * The VMA on this list are ordered by type, all GGTT vma are + * placed at the head and all ppGTT vma are placed at the tail. + * The different types of GGTT vma are unordered between + * themselves, use the @vma.tree (which has a defined order + * between all VMA) to quickly find an exact match. + */ + struct list_head list; + + /** + * @vma.tree: Ordered tree of VMAs backed by this object + * + * All VMA created for this object are placed in the @vma.tree + * for fast retrieval via a binary search in + * i915_vma_instance(). They are also added to @vma.list for + * easy iteration. + */ + struct rb_root tree; + } vma; /** * @lut_list: List of vma lookup entries in use for this object. @@ -165,7 +175,7 @@ struct drm_i915_gem_object { atomic_t frontbuffer_bits; unsigned int frontbuffer_ggtt_origin; /* write once */ - struct i915_gem_active frontbuffer_write; + struct i915_active_request frontbuffer_write; /** Current tiling stride for the object, if it's tiled. */ unsigned int tiling_and_stride; @@ -388,6 +398,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) } static inline bool +i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL; +} + +static inline bool i915_gem_object_is_active(const struct drm_i915_gem_object *obj) { return obj->active_count; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 6cc2b964c955..6da795c7e62e 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -153,6 +153,7 @@ i915_gem_shrink(struct drm_i915_private *i915, { &i915->mm.bound_list, I915_SHRINK_BOUND }, { NULL, 0 }, }, *phase; + intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; bool unlock; @@ -182,9 +183,11 @@ i915_gem_shrink(struct drm_i915_private *i915, * device just to recover a little memory. If absolutely necessary, * we will force the wake during oom-notifier. */ - if ((flags & I915_SHRINK_BOUND) && - !intel_runtime_pm_get_if_in_use(i915)) - flags &= ~I915_SHRINK_BOUND; + if (flags & I915_SHRINK_BOUND) { + wakeref = intel_runtime_pm_get_if_in_use(i915); + if (!wakeref) + flags &= ~I915_SHRINK_BOUND; + } /* * As we may completely rewrite the (un)bound list whilst unbinding @@ -265,7 +268,7 @@ i915_gem_shrink(struct drm_i915_private *i915, } if (flags & I915_SHRINK_BOUND) - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); i915_retire_requests(i915); @@ -292,14 +295,15 @@ i915_gem_shrink(struct drm_i915_private *i915, */ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) { - unsigned long freed; - - intel_runtime_pm_get(i915); - freed = i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_ACTIVE); - intel_runtime_pm_put(i915); + intel_wakeref_t wakeref; + unsigned long freed = 0; + + with_intel_runtime_pm(i915, wakeref) { + freed = i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); + } return freed; } @@ -370,14 +374,16 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { - intel_runtime_pm_get(i915); - freed += i915_gem_shrink(i915, - sc->nr_to_scan - sc->nr_scanned, - &sc->nr_scanned, - I915_SHRINK_ACTIVE | - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); - intel_runtime_pm_put(i915); + intel_wakeref_t wakeref; + + with_intel_runtime_pm(i915, wakeref) { + freed += i915_gem_shrink(i915, + sc->nr_to_scan - sc->nr_scanned, + &sc->nr_scanned, + I915_SHRINK_ACTIVE | + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND); + } } shrinker_unlock(i915, unlock); @@ -392,12 +398,13 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) container_of(nb, struct drm_i915_private, mm.oom_notifier); struct drm_i915_gem_object *obj; unsigned long unevictable, bound, unbound, freed_pages; + intel_wakeref_t wakeref; - intel_runtime_pm_get(i915); - freed_pages = i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); - intel_runtime_pm_put(i915); + freed_pages = 0; + with_intel_runtime_pm(i915, wakeref) + freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not @@ -435,6 +442,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr container_of(nb, struct drm_i915_private, mm.vmap_notifier); struct i915_vma *vma, *next; unsigned long freed_pages = 0; + intel_wakeref_t wakeref; bool unlock; if (!shrinker_lock(i915, 0, &unlock)) @@ -446,20 +454,27 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr MAX_SCHEDULE_TIMEOUT)) goto out; - intel_runtime_pm_get(i915); - freed_pages += i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_VMAPS); - intel_runtime_pm_put(i915); + with_intel_runtime_pm(i915, wakeref) + freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_VMAPS); /* We also want to clear any cached iomaps as they wrap vmap */ + mutex_lock(&i915->ggtt.vm.mutex); list_for_each_entry_safe(vma, next, - &i915->ggtt.vm.inactive_list, vm_link) { + &i915->ggtt.vm.bound_list, vm_link) { unsigned long count = vma->node.size >> PAGE_SHIFT; - if (vma->iomap && i915_vma_unbind(vma) == 0) + + if (!vma->iomap || i915_vma_is_active(vma)) + continue; + + mutex_unlock(&i915->ggtt.vm.mutex); + if (i915_vma_unbind(vma) == 0) freed_pages += count; + mutex_lock(&i915->ggtt.vm.mutex); } + mutex_unlock(&i915->ggtt.vm.mutex); out: shrinker_unlock(i915, unlock); diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 9df615eea2d8..74a9661479ca 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -701,7 +701,10 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv vma->pages = obj->mm.pages; vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); - list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list); + + mutex_lock(&ggtt->vm.mutex); + list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); + mutex_unlock(&ggtt->vm.mutex); spin_lock(&dev_priv->mm.obj_lock); list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 1fb6a7bb5054..1d3f9a31ad61 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -49,77 +49,67 @@ struct i915_mmu_notifier { struct hlist_node node; struct mmu_notifier mn; struct rb_root_cached objects; - struct workqueue_struct *wq; + struct i915_mm_struct *mm; }; struct i915_mmu_object { struct i915_mmu_notifier *mn; struct drm_i915_gem_object *obj; struct interval_tree_node it; - struct list_head link; - struct work_struct work; - bool attached; }; -static void cancel_userptr(struct work_struct *work) +static void add_object(struct i915_mmu_object *mo) { - struct i915_mmu_object *mo = container_of(work, typeof(*mo), work); - struct drm_i915_gem_object *obj = mo->obj; - struct work_struct *active; - - /* Cancel any active worker and force us to re-evaluate gup */ - mutex_lock(&obj->mm.lock); - active = fetch_and_zero(&obj->userptr.work); - mutex_unlock(&obj->mm.lock); - if (active) - goto out; - - i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL); - - mutex_lock(&obj->base.dev->struct_mutex); - - /* We are inside a kthread context and can't be interrupted */ - if (i915_gem_object_unbind(obj) == 0) - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - WARN_ONCE(i915_gem_object_has_pages(obj), - "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n", - obj->bind_count, - atomic_read(&obj->mm.pages_pin_count), - obj->pin_global); - - mutex_unlock(&obj->base.dev->struct_mutex); - -out: - i915_gem_object_put(obj); + GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb)); + interval_tree_insert(&mo->it, &mo->mn->objects); } -static void add_object(struct i915_mmu_object *mo) +static void del_object(struct i915_mmu_object *mo) { - if (mo->attached) + if (RB_EMPTY_NODE(&mo->it.rb)) return; - interval_tree_insert(&mo->it, &mo->mn->objects); - mo->attached = true; + interval_tree_remove(&mo->it, &mo->mn->objects); + RB_CLEAR_NODE(&mo->it.rb); } -static void del_object(struct i915_mmu_object *mo) +static void +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) { - if (!mo->attached) + struct i915_mmu_object *mo = obj->userptr.mmu_object; + + /* + * During mm_invalidate_range we need to cancel any userptr that + * overlaps the range being invalidated. Doing so requires the + * struct_mutex, and that risks recursion. In order to cause + * recursion, the user must alias the userptr address space with + * a GTT mmapping (possible with a MAP_FIXED) - then when we have + * to invalidate that mmaping, mm_invalidate_range is called with + * the userptr address *and* the struct_mutex held. To prevent that + * we set a flag under the i915_mmu_notifier spinlock to indicate + * whether this object is valid. + */ + if (!mo) return; - interval_tree_remove(&mo->it, &mo->mn->objects); - mo->attached = false; + spin_lock(&mo->mn->lock); + if (value) + add_object(mo); + else + del_object(mo); + spin_unlock(&mo->mn->lock); } -static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, - const struct mmu_notifier_range *range) +static int +userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, + const struct mmu_notifier_range *range) { struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn); - struct i915_mmu_object *mo; struct interval_tree_node *it; - LIST_HEAD(cancelled); + struct mutex *unlock = NULL; unsigned long end; + int ret = 0; if (RB_EMPTY_ROOT(&mn->objects.rb_root)) return 0; @@ -130,11 +120,15 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, spin_lock(&mn->lock); it = interval_tree_iter_first(&mn->objects, range->start, end); while (it) { + struct drm_i915_gem_object *obj; + if (!range->blockable) { - spin_unlock(&mn->lock); - return -EAGAIN; + ret = -EAGAIN; + break; } - /* The mmu_object is released late when destroying the + + /* + * The mmu_object is released late when destroying the * GEM object so it is entirely possible to gain a * reference on an object in the process of being freed * since our serialisation is via the spinlock and not @@ -143,29 +137,65 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, * use-after-free we only acquire a reference on the * object if it is not in the process of being destroyed. */ - mo = container_of(it, struct i915_mmu_object, it); - if (kref_get_unless_zero(&mo->obj->base.refcount)) - queue_work(mn->wq, &mo->work); + obj = container_of(it, struct i915_mmu_object, it)->obj; + if (!kref_get_unless_zero(&obj->base.refcount)) { + it = interval_tree_iter_next(it, range->start, end); + continue; + } + spin_unlock(&mn->lock); + + if (!unlock) { + unlock = &mn->mm->i915->drm.struct_mutex; + + switch (mutex_trylock_recursive(unlock)) { + default: + case MUTEX_TRYLOCK_FAILED: + if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { + i915_gem_object_put(obj); + return -EINTR; + } + /* fall through */ + case MUTEX_TRYLOCK_SUCCESS: + break; + + case MUTEX_TRYLOCK_RECURSIVE: + unlock = ERR_PTR(-EEXIST); + break; + } + } + + ret = i915_gem_object_unbind(obj); + if (ret == 0) + ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + i915_gem_object_put(obj); + if (ret) + goto unlock; - list_add(&mo->link, &cancelled); - it = interval_tree_iter_next(it, range->start, end); + spin_lock(&mn->lock); + + /* + * As we do not (yet) protect the mmu from concurrent insertion + * over this range, there is no guarantee that this search will + * terminate given a pathologic workload. + */ + it = interval_tree_iter_first(&mn->objects, range->start, end); } - list_for_each_entry(mo, &cancelled, link) - del_object(mo); spin_unlock(&mn->lock); - if (!list_empty(&cancelled)) - flush_workqueue(mn->wq); +unlock: + if (!IS_ERR_OR_NULL(unlock)) + mutex_unlock(unlock); + + return ret; - return 0; } static const struct mmu_notifier_ops i915_gem_userptr_notifier = { - .invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start, + .invalidate_range_start = userptr_mn_invalidate_range_start, }; static struct i915_mmu_notifier * -i915_mmu_notifier_create(struct mm_struct *mm) +i915_mmu_notifier_create(struct i915_mm_struct *mm) { struct i915_mmu_notifier *mn; @@ -176,13 +206,7 @@ i915_mmu_notifier_create(struct mm_struct *mm) spin_lock_init(&mn->lock); mn->mn.ops = &i915_gem_userptr_notifier; mn->objects = RB_ROOT_CACHED; - mn->wq = alloc_workqueue("i915-userptr-release", - WQ_UNBOUND | WQ_MEM_RECLAIM, - 0); - if (mn->wq == NULL) { - kfree(mn); - return ERR_PTR(-ENOMEM); - } + mn->mm = mm; return mn; } @@ -192,16 +216,14 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) { struct i915_mmu_object *mo; - mo = obj->userptr.mmu_object; - if (mo == NULL) + mo = fetch_and_zero(&obj->userptr.mmu_object); + if (!mo) return; spin_lock(&mo->mn->lock); del_object(mo); spin_unlock(&mo->mn->lock); kfree(mo); - - obj->userptr.mmu_object = NULL; } static struct i915_mmu_notifier * @@ -214,7 +236,7 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm) if (mn) return mn; - mn = i915_mmu_notifier_create(mm->mm); + mn = i915_mmu_notifier_create(mm); if (IS_ERR(mn)) err = PTR_ERR(mn); @@ -237,10 +259,8 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm) mutex_unlock(&mm->i915->mm_lock); up_write(&mm->mm->mmap_sem); - if (mn && !IS_ERR(mn)) { - destroy_workqueue(mn->wq); + if (mn && !IS_ERR(mn)) kfree(mn); - } return err ? ERR_PTR(err) : mm->mn; } @@ -263,14 +283,14 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, return PTR_ERR(mn); mo = kzalloc(sizeof(*mo), GFP_KERNEL); - if (mo == NULL) + if (!mo) return -ENOMEM; mo->mn = mn; mo->obj = obj; mo->it.start = obj->userptr.ptr; mo->it.last = obj->userptr.ptr + obj->base.size - 1; - INIT_WORK(&mo->work, cancel_userptr); + RB_CLEAR_NODE(&mo->it.rb); obj->userptr.mmu_object = mo; return 0; @@ -284,13 +304,17 @@ i915_mmu_notifier_free(struct i915_mmu_notifier *mn, return; mmu_notifier_unregister(&mn->mn, mm); - destroy_workqueue(mn->wq); kfree(mn); } #else static void +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) +{ +} + +static void i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) { } @@ -458,42 +482,6 @@ alloc_table: return st; } -static int -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, - bool value) -{ - int ret = 0; - - /* During mm_invalidate_range we need to cancel any userptr that - * overlaps the range being invalidated. Doing so requires the - * struct_mutex, and that risks recursion. In order to cause - * recursion, the user must alias the userptr address space with - * a GTT mmapping (possible with a MAP_FIXED) - then when we have - * to invalidate that mmaping, mm_invalidate_range is called with - * the userptr address *and* the struct_mutex held. To prevent that - * we set a flag under the i915_mmu_notifier spinlock to indicate - * whether this object is valid. - */ -#if defined(CONFIG_MMU_NOTIFIER) - if (obj->userptr.mmu_object == NULL) - return 0; - - spin_lock(&obj->userptr.mmu_object->mn->lock); - /* In order to serialise get_pages with an outstanding - * cancel_userptr, we must drop the struct_mutex and try again. - */ - if (!value) - del_object(obj->userptr.mmu_object); - else if (!work_pending(&obj->userptr.mmu_object->work)) - add_object(obj->userptr.mmu_object); - else - ret = -EAGAIN; - spin_unlock(&obj->userptr.mmu_object->mn->lock); -#endif - - return ret; -} - static void __i915_gem_userptr_get_pages_worker(struct work_struct *_work) { @@ -679,8 +667,11 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, struct sgt_iter sgt_iter; struct page *page; - BUG_ON(obj->userptr.work != NULL); + /* Cancel any inflight work and force them to restart their gup */ + obj->userptr.work = NULL; __i915_gem_userptr_set_active(obj, false); + if (!pages) + return; if (obj->mm.madv != I915_MADV_WILLNEED) obj->mm.dirty = false; @@ -718,7 +709,8 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, + I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_ASYNC_CANCEL, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, .dmabuf_export = i915_gem_userptr_dmabuf_export, diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5eaf586c4d48..9a65341fec09 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -447,9 +447,14 @@ static void error_print_request(struct drm_i915_error_state_buf *m, if (!erq->seqno) return; - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, erq->sched_attr.priority, + erq->context, erq->seqno, + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &erq->flags) ? "!" : "", + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &erq->flags) ? "+" : "", + erq->sched_attr.priority, jiffies_to_msecs(erq->jiffies - epoch), erq->start, erq->head, erq->tail); } @@ -530,13 +535,9 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, } err_printf(m, " seqno: 0x%08x\n", ee->seqno); err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno); - err_printf(m, " waiting: %s\n", yesno(ee->waiting)); err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); - err_printf(m, " hangcheck stall: %s\n", yesno(ee->hangcheck_stalled)); - err_printf(m, " hangcheck action: %s\n", - hangcheck_action_to_str(ee->hangcheck_action)); - err_printf(m, " hangcheck action timestamp: %dms (%lu%s)\n", + err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n", jiffies_to_msecs(ee->hangcheck_timestamp - epoch), ee->hangcheck_timestamp, ee->hangcheck_timestamp == epoch ? "; epoch" : ""); @@ -684,15 +685,15 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, jiffies_to_msecs(error->capture - error->epoch)); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - if (error->engine[i].hangcheck_stalled && - error->engine[i].context.pid) { - err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n", - engine_name(m->i915, i), - error->engine[i].context.comm, - error->engine[i].context.pid, - error->engine[i].context.ban_score, - bannable(&error->engine[i].context)); - } + if (!error->engine[i].context.pid) + continue; + + err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n", + engine_name(m->i915, i), + error->engine[i].context.comm, + error->engine[i].context.pid, + error->engine[i].context.ban_score, + bannable(&error->engine[i].context)); } err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Suspend count: %u\n", error->suspend_count); @@ -722,8 +723,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); err_printf(m, "CCID: 0x%08x\n", error->ccid); - err_printf(m, "Missed interrupts: 0x%08lx\n", - m->i915->gpu_error.missed_irq_rings); for (i = 0; i < error->nfence; i++) err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); @@ -807,21 +806,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, error->epoch); } - if (IS_ERR(ee->waiters)) { - err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n", - m->i915->engine[i]->name); - } else if (ee->num_waiters) { - err_printf(m, "%s --- %d waiters\n", - m->i915->engine[i]->name, - ee->num_waiters); - for (j = 0; j < ee->num_waiters; j++) { - err_printf(m, " seqno 0x%08x for %s [%d]\n", - ee->waiters[j].seqno, - ee->waiters[j].comm, - ee->waiters[j].pid); - } - } - print_error_obj(m, m->i915->engine[i], "ringbuffer", ee->ringbuffer); @@ -1003,8 +987,6 @@ void __i915_gpu_state_free(struct kref *error_ref) i915_error_object_free(ee->wa_ctx); kfree(ee->requests); - if (!IS_ERR_OR_NULL(ee->waiters)) - kfree(ee->waiters); } for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) @@ -1080,23 +1062,23 @@ i915_error_object_create(struct drm_i915_private *i915, } /* The error capture is special as tries to run underneath the normal - * locking rules - so we use the raw version of the i915_gem_active lookup. + * locking rules - so we use the raw version of the i915_active_request lookup. */ -static inline uint32_t -__active_get_seqno(struct i915_gem_active *active) +static inline u32 +__active_get_seqno(struct i915_active_request *active) { struct i915_request *request; - request = __i915_gem_active_peek(active); + request = __i915_active_request_peek(active); return request ? request->global_seqno : 0; } static inline int -__active_get_engine_id(struct i915_gem_active *active) +__active_get_engine_id(struct i915_active_request *active) { struct i915_request *request; - request = __i915_gem_active_peek(active); + request = __i915_active_request_peek(active); return request ? request->engine->id : -1; } @@ -1124,7 +1106,9 @@ static void capture_bo(struct drm_i915_error_buffer *err, static u32 capture_error_bo(struct drm_i915_error_buffer *err, int count, struct list_head *head, - bool pinned_only) + unsigned int flags) +#define ACTIVE_ONLY BIT(0) +#define PINNED_ONLY BIT(1) { struct i915_vma *vma; int i = 0; @@ -1133,7 +1117,10 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, if (!vma->obj) continue; - if (pinned_only && !i915_vma_is_pinned(vma)) + if (flags & ACTIVE_ONLY && !i915_vma_is_active(vma)) + continue; + + if (flags & PINNED_ONLY && !i915_vma_is_pinned(vma)) continue; capture_bo(err++, vma); @@ -1144,7 +1131,8 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, return i; } -/* Generate a semi-unique error code. The code is not meant to have meaning, The +/* + * Generate a semi-unique error code. The code is not meant to have meaning, The * code's only purpose is to try to prevent false duplicated bug reports by * grossly estimating a GPU error state. * @@ -1153,29 +1141,23 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, * * It's only a small step better than a random number in its current form. */ -static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error, - int *engine_id) +static u32 i915_error_generate_code(struct i915_gpu_state *error, + unsigned long engine_mask) { - uint32_t error_code = 0; - int i; - - /* IPEHR would be an ideal way to detect errors, as it's the gross + /* + * IPEHR would be an ideal way to detect errors, as it's the gross * measure of "the command that hung." However, has some very common * synchronization commands which almost always appear in the case * strictly a client bug. Use instdone to differentiate those some. */ - for (i = 0; i < I915_NUM_ENGINES; i++) { - if (error->engine[i].hangcheck_stalled) { - if (engine_id) - *engine_id = i; + if (engine_mask) { + struct drm_i915_error_engine *ee = + &error->engine[ffs(engine_mask)]; - return error->engine[i].ipehr ^ - error->engine[i].instdone.instdone; - } + return ee->ipehr ^ ee->instdone.instdone; } - return error_code; + return 0; } static void gem_record_fences(struct i915_gpu_state *error) @@ -1208,59 +1190,6 @@ static void gen6_record_semaphore_state(struct intel_engine_cs *engine, I915_READ(RING_SYNC_2(engine->mmio_base)); } -static void error_record_engine_waiters(struct intel_engine_cs *engine, - struct drm_i915_error_engine *ee) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct drm_i915_error_waiter *waiter; - struct rb_node *rb; - int count; - - ee->num_waiters = 0; - ee->waiters = NULL; - - if (RB_EMPTY_ROOT(&b->waiters)) - return; - - if (!spin_trylock_irq(&b->rb_lock)) { - ee->waiters = ERR_PTR(-EDEADLK); - return; - } - - count = 0; - for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) - count++; - spin_unlock_irq(&b->rb_lock); - - waiter = NULL; - if (count) - waiter = kmalloc_array(count, - sizeof(struct drm_i915_error_waiter), - GFP_ATOMIC); - if (!waiter) - return; - - if (!spin_trylock_irq(&b->rb_lock)) { - kfree(waiter); - ee->waiters = ERR_PTR(-EDEADLK); - return; - } - - ee->waiters = waiter; - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - strcpy(waiter->comm, w->tsk->comm); - waiter->pid = w->tsk->pid; - waiter->seqno = w->seqno; - waiter++; - - if (++ee->num_waiters == count) - break; - } - spin_unlock_irq(&b->rb_lock); -} - static void error_record_engine_registers(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) @@ -1296,7 +1225,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error, intel_engine_get_instdone(engine, &ee->instdone); - ee->waiting = intel_engine_has_waiter(engine); ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ee->acthd = intel_engine_get_active_head(engine); ee->seqno = intel_engine_get_seqno(engine); @@ -1338,9 +1266,8 @@ static void error_record_engine_registers(struct i915_gpu_state *error, } ee->idle = intel_engine_is_idle(engine); - ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; - ee->hangcheck_action = engine->hangcheck.action; - ee->hangcheck_stalled = engine->hangcheck.stalled; + if (!ee->idle) + ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error, engine); @@ -1371,6 +1298,7 @@ static void record_request(struct i915_request *request, { struct i915_gem_context *ctx = request->gem_context; + erq->flags = request->fence.flags; erq->context = ctx->hw_id; erq->sched_attr = request->sched.attr; erq->ban_score = atomic_read(&ctx->ban_score); @@ -1546,7 +1474,6 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->engine_id = i; error_record_engine_registers(error, engine, ee); - error_record_engine_waiters(engine, ee); error_record_engine_execlists(engine, ee); request = i915_gem_find_active_request(engine); @@ -1610,14 +1537,17 @@ static void gem_capture_vm(struct i915_gpu_state *error, int count; count = 0; - list_for_each_entry(vma, &vm->active_list, vm_link) - count++; + list_for_each_entry(vma, &vm->bound_list, vm_link) + if (i915_vma_is_active(vma)) + count++; active_bo = NULL; if (count) active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC); if (active_bo) - count = capture_error_bo(active_bo, count, &vm->active_list, false); + count = capture_error_bo(active_bo, + count, &vm->bound_list, + ACTIVE_ONLY); else count = 0; @@ -1655,28 +1585,20 @@ static void capture_pinned_buffers(struct i915_gpu_state *error) struct i915_address_space *vm = &error->i915->ggtt.vm; struct drm_i915_error_buffer *bo; struct i915_vma *vma; - int count_inactive, count_active; - - count_inactive = 0; - list_for_each_entry(vma, &vm->inactive_list, vm_link) - count_inactive++; + int count; - count_active = 0; - list_for_each_entry(vma, &vm->active_list, vm_link) - count_active++; + count = 0; + list_for_each_entry(vma, &vm->bound_list, vm_link) + count++; bo = NULL; - if (count_inactive + count_active) - bo = kcalloc(count_inactive + count_active, - sizeof(*bo), GFP_ATOMIC); + if (count) + bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC); if (!bo) return; - count_inactive = capture_error_bo(bo, count_inactive, - &vm->active_list, true); - count_active = capture_error_bo(bo + count_inactive, count_active, - &vm->inactive_list, true); - error->pinned_bo_count = count_inactive + count_active; + error->pinned_bo_count = + capture_error_bo(bo, count, &vm->bound_list, PINNED_ONLY); error->pinned_bo = bo; } @@ -1783,31 +1705,35 @@ static void capture_reg_state(struct i915_gpu_state *error) error->pgtbl_er = I915_READ(PGTBL_ER); } -static void i915_error_capture_msg(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error, - u32 engine_mask, - const char *error_msg) +static const char * +error_msg(struct i915_gpu_state *error, unsigned long engines, const char *msg) { - u32 ecode; - int engine_id = -1, len; + int len; + int i; - ecode = i915_error_generate_code(dev_priv, error, &engine_id); + for (i = 0; i < ARRAY_SIZE(error->engine); i++) + if (!error->engine[i].context.pid) + engines &= ~BIT(i); len = scnprintf(error->error_msg, sizeof(error->error_msg), - "GPU HANG: ecode %d:%d:0x%08x", - INTEL_GEN(dev_priv), engine_id, ecode); - - if (engine_id != -1 && error->engine[engine_id].context.pid) + "GPU HANG: ecode %d:%lx:0x%08x", + INTEL_GEN(error->i915), engines, + i915_error_generate_code(error, engines)); + if (engines) { + /* Just show the first executing process, more is confusing */ + i = ffs(engines); len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->engine[engine_id].context.comm, - error->engine[engine_id].context.pid); + error->engine[i].context.comm, + error->engine[i].context.pid); + } + if (msg) + len += scnprintf(error->error_msg + len, + sizeof(error->error_msg) - len, + ", %s", msg); - scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, - ", reason: %s, action: %s", - error_msg, - engine_mask ? "reset" : "continue"); + return error->error_msg; } static void capture_gen_state(struct i915_gpu_state *error) @@ -1847,7 +1773,7 @@ static unsigned long capture_find_epoch(const struct i915_gpu_state *error) for (i = 0; i < ARRAY_SIZE(error->engine); i++) { const struct drm_i915_error_engine *ee = &error->engine[i]; - if (ee->hangcheck_stalled && + if (ee->hangcheck_timestamp && time_before(ee->hangcheck_timestamp, epoch)) epoch = ee->hangcheck_timestamp; } @@ -1921,7 +1847,7 @@ i915_capture_gpu_state(struct drm_i915_private *i915) * i915_capture_error_state - capture an error record for later analysis * @i915: i915 device * @engine_mask: the mask of engines triggering the hang - * @error_msg: a message to insert into the error capture header + * @msg: a message to insert into the error capture header * * Should be called when an error is detected (either a hang or an error * interrupt) to capture error state from the time of the error. Fills @@ -1929,8 +1855,8 @@ i915_capture_gpu_state(struct drm_i915_private *i915) * to pick up. */ void i915_capture_error_state(struct drm_i915_private *i915, - u32 engine_mask, - const char *error_msg) + unsigned long engine_mask, + const char *msg) { static bool warned; struct i915_gpu_state *error; @@ -1946,8 +1872,7 @@ void i915_capture_error_state(struct drm_i915_private *i915, if (IS_ERR(error)) return; - i915_error_capture_msg(i915, error, engine_mask, error_msg); - DRM_INFO("%s\n", error->error_msg); + dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg)); if (!error->simulated) { spin_lock_irqsave(&i915->gpu_error.lock, flags); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 6d9f45468ac1..53b1f22dd365 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -82,11 +82,7 @@ struct i915_gpu_state { int engine_id; /* Software tracked state */ bool idle; - bool waiting; - int num_waiters; unsigned long hangcheck_timestamp; - bool hangcheck_stalled; - enum intel_engine_hangcheck_action hangcheck_action; struct i915_address_space *vm; int num_requests; u32 reset_count; @@ -149,6 +145,7 @@ struct i915_gpu_state { struct drm_i915_error_object *default_state; struct drm_i915_error_request { + unsigned long flags; long jiffies; pid_t pid; u32 context; @@ -161,12 +158,6 @@ struct i915_gpu_state { } *requests, execlist[EXECLIST_MAX_PORTS]; unsigned int num_ports; - struct drm_i915_error_waiter { - char comm[TASK_COMM_LEN]; - pid_t pid; - u32 seqno; - } *waiters; - struct { u32 gfx_mode; union { @@ -197,6 +188,8 @@ struct i915_gpu_state { struct scatterlist *sgl, *fit; }; +struct i915_gpu_restart; + struct i915_gpu_error { /* For hangcheck timer */ #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ @@ -211,8 +204,6 @@ struct i915_gpu_error { atomic_t pending_fb_pin; - unsigned long missed_irq_rings; - /** * State variable controlling the reset flow and count * @@ -247,15 +238,6 @@ struct i915_gpu_error { * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a * secondary role in preventing two concurrent global reset attempts. * - * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the - * struct_mutex. We try to acquire the struct_mutex in the reset worker, - * but it may be held by some long running waiter (that we cannot - * interrupt without causing trouble). Once we are ready to do the GPU - * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If - * they already hold the struct_mutex and want to participate they can - * inspect the bit and do the reset directly, otherwise the worker - * waits for the struct_mutex. - * * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to * acquire the struct_mutex to reset an engine, we need an explicit * flag to prevent two concurrent reset attempts in the same engine. @@ -269,19 +251,14 @@ struct i915_gpu_error { */ unsigned long flags; #define I915_RESET_BACKOFF 0 -#define I915_RESET_HANDOFF 1 -#define I915_RESET_MODESET 2 +#define I915_RESET_MODESET 1 +#define I915_RESET_ENGINE 2 #define I915_WEDGED (BITS_PER_LONG - 1) -#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES) /** Number of times an engine has been reset */ u32 reset_engine_count[I915_NUM_ENGINES]; - /** Set of stalled engines with guilty requests, in the current reset */ - u32 stalled_mask; - - /** Reason for the current *global* reset */ - const char *reason; + struct mutex wedge_mutex; /* serialises wedging/unwedging */ /** * Waitqueue to signal when a hang is detected. Used to for waiters @@ -295,8 +272,7 @@ struct i915_gpu_error { */ wait_queue_head_t reset_queue; - /* For missed irq/seqno simulation. */ - unsigned long test_irq_rings; + struct i915_gpu_restart *restart; }; struct drm_i915_error_state_buf { @@ -318,7 +294,7 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); void i915_capture_error_state(struct drm_i915_private *dev_priv, - u32 engine_mask, + unsigned long engine_mask, const char *error_msg); static inline struct i915_gpu_state * diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c91d18707475..441d2674b272 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -225,10 +225,10 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir); /* For display hotplug interrupt */ static inline void i915_hotplug_interrupt_update_locked(struct drm_i915_private *dev_priv, - uint32_t mask, - uint32_t bits) + u32 mask, + u32 bits) { - uint32_t val; + u32 val; lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(bits & ~mask); @@ -252,8 +252,8 @@ i915_hotplug_interrupt_update_locked(struct drm_i915_private *dev_priv, * version is also available. */ void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv, - uint32_t mask, - uint32_t bits) + u32 mask, + u32 bits) { spin_lock_irq(&dev_priv->irq_lock); i915_hotplug_interrupt_update_locked(dev_priv, mask, bits); @@ -302,10 +302,10 @@ static bool gen11_reset_one_iir(struct drm_i915_private * const i915, * @enabled_irq_mask: mask of interrupt bits to enable */ void ilk_update_display_irq(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask) + u32 interrupt_mask, + u32 enabled_irq_mask) { - uint32_t new_val; + u32 new_val; lockdep_assert_held(&dev_priv->irq_lock); @@ -332,8 +332,8 @@ void ilk_update_display_irq(struct drm_i915_private *dev_priv, * @enabled_irq_mask: mask of interrupt bits to enable */ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask) + u32 interrupt_mask, + u32 enabled_irq_mask) { lockdep_assert_held(&dev_priv->irq_lock); @@ -347,13 +347,13 @@ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, I915_WRITE(GTIMR, dev_priv->gt_irq_mask); } -void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) +void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, u32 mask) { ilk_update_gt_irq(dev_priv, mask, mask); POSTING_READ_FW(GTIMR); } -void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) +void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, u32 mask) { ilk_update_gt_irq(dev_priv, mask, 0); } @@ -392,10 +392,10 @@ static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv) * @enabled_irq_mask: mask of interrupt bits to enable */ static void snb_update_pm_irq(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask) + u32 interrupt_mask, + u32 enabled_irq_mask) { - uint32_t new_val; + u32 new_val; WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -579,11 +579,11 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv) * @enabled_irq_mask: mask of interrupt bits to enable */ static void bdw_update_port_irq(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask) + u32 interrupt_mask, + u32 enabled_irq_mask) { - uint32_t new_val; - uint32_t old_val; + u32 new_val; + u32 old_val; lockdep_assert_held(&dev_priv->irq_lock); @@ -613,10 +613,10 @@ static void bdw_update_port_irq(struct drm_i915_private *dev_priv, */ void bdw_update_pipe_irq(struct drm_i915_private *dev_priv, enum pipe pipe, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask) + u32 interrupt_mask, + u32 enabled_irq_mask) { - uint32_t new_val; + u32 new_val; lockdep_assert_held(&dev_priv->irq_lock); @@ -643,10 +643,10 @@ void bdw_update_pipe_irq(struct drm_i915_private *dev_priv, * @enabled_irq_mask: mask of interrupt bits to enable */ void ibx_display_interrupt_update(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask) + u32 interrupt_mask, + u32 enabled_irq_mask) { - uint32_t sdeimr = I915_READ(SDEIMR); + u32 sdeimr = I915_READ(SDEIMR); sdeimr &= ~interrupt_mask; sdeimr |= (~enabled_irq_mask & interrupt_mask); @@ -823,11 +823,26 @@ static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv) static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe) { struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + const struct drm_display_mode *mode = &vblank->hwmode; i915_reg_t high_frame, low_frame; u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal; - const struct drm_display_mode *mode = &dev->vblank[pipe].hwmode; unsigned long irqflags; + /* + * On i965gm TV output the frame counter only works up to + * the point when we enable the TV encoder. After that the + * frame counter ceases to work and reads zero. We need a + * vblank wait before enabling the TV encoder and so we + * have to enable vblank interrupts while the frame counter + * is still in a working state. However the core vblank code + * does not like us returning non-zero frame counter values + * when we've told it that we don't have a working frame + * counter. Thus we must stop non-zero values leaking out. + */ + if (!vblank->max_vblank_count) + return 0; + htotal = mode->crtc_htotal; hsync_start = mode->crtc_hsync_start; vbl_start = mode->crtc_vblank_start; @@ -999,6 +1014,9 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, int position; int vbl_start, vbl_end, hsync_start, htotal, vtotal; unsigned long irqflags; + bool use_scanline_counter = INTEL_GEN(dev_priv) >= 5 || + IS_G4X(dev_priv) || IS_GEN(dev_priv, 2) || + mode->private_flags & I915_MODE_FLAG_USE_SCANLINE_COUNTER; if (WARN_ON(!mode->crtc_clock)) { DRM_DEBUG_DRIVER("trying to get scanoutpos for disabled " @@ -1031,7 +1049,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, if (stime) *stime = ktime_get(); - if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { + if (use_scanline_counter) { /* No obvious pixelcount register. Only query vertical * scanout position from Display scan line register. */ @@ -1091,7 +1109,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, else position += vtotal - vbl_end; - if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { + if (use_scanline_counter) { *vpos = position; *hpos = 0; } else { @@ -1153,69 +1171,6 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) return; } -static void notify_ring(struct intel_engine_cs *engine) -{ - const u32 seqno = intel_engine_get_seqno(engine); - struct i915_request *rq = NULL; - struct task_struct *tsk = NULL; - struct intel_wait *wait; - - if (unlikely(!engine->breadcrumbs.irq_armed)) - return; - - rcu_read_lock(); - - spin_lock(&engine->breadcrumbs.irq_lock); - wait = engine->breadcrumbs.irq_wait; - if (wait) { - /* - * We use a callback from the dma-fence to submit - * requests after waiting on our own requests. To - * ensure minimum delay in queuing the next request to - * hardware, signal the fence now rather than wait for - * the signaler to be woken up. We still wake up the - * waiter in order to handle the irq-seqno coherency - * issues (we may receive the interrupt before the - * seqno is written, see __i915_request_irq_complete()) - * and to handle coalescing of multiple seqno updates - * and many waiters. - */ - if (i915_seqno_passed(seqno, wait->seqno)) { - struct i915_request *waiter = wait->request; - - if (waiter && - !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &waiter->fence.flags) && - intel_wait_check_request(wait, waiter)) - rq = i915_request_get(waiter); - - tsk = wait->tsk; - } - - engine->breadcrumbs.irq_count++; - } else { - if (engine->breadcrumbs.irq_armed) - __intel_engine_disarm_breadcrumbs(engine); - } - spin_unlock(&engine->breadcrumbs.irq_lock); - - if (rq) { - spin_lock(&rq->lock); - dma_fence_signal_locked(&rq->fence); - GEM_BUG_ON(!i915_request_completed(rq)); - spin_unlock(&rq->lock); - - i915_request_put(rq); - } - - if (tsk && tsk->state & TASK_NORMAL) - wake_up_process(tsk); - - rcu_read_unlock(); - - trace_intel_engine_notify(engine, wait); -} - static void vlv_c0_read(struct drm_i915_private *dev_priv, struct intel_rps_ei *ei) { @@ -1370,8 +1325,8 @@ static void ivybridge_parity_work(struct work_struct *work) container_of(work, typeof(*dev_priv), l3_parity.error_work); u32 error_status, row, bank, subbank; char *parity_event[6]; - uint32_t misccpctl; - uint8_t slice = 0; + u32 misccpctl; + u8 slice = 0; /* We must turn off DOP level clock gating to access the L3 registers. * In order to prevent a get/put style interface, acquire struct mutex @@ -1460,20 +1415,20 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (gt_iir & ILK_BSD_USER_INTERRUPT) - notify_ring(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); } static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (gt_iir & GT_BSD_USER_INTERRUPT) - notify_ring(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); if (gt_iir & GT_BLT_USER_INTERRUPT) - notify_ring(dev_priv->engine[BCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]); if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | @@ -1493,7 +1448,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) tasklet = true; if (iir & GT_RENDER_USER_INTERRUPT) { - notify_ring(engine); + intel_engine_breadcrumbs_irq(engine); tasklet |= USES_GUC_SUBMISSION(engine->i915); } @@ -1732,13 +1687,13 @@ static void dp_aux_irq_handler(struct drm_i915_private *dev_priv) #if defined(CONFIG_DEBUG_FS) static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, enum pipe pipe, - uint32_t crc0, uint32_t crc1, - uint32_t crc2, uint32_t crc3, - uint32_t crc4) + u32 crc0, u32 crc1, + u32 crc2, u32 crc3, + u32 crc4) { struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe]; struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - uint32_t crcs[5]; + u32 crcs[5]; spin_lock(&pipe_crc->lock); /* @@ -1770,9 +1725,9 @@ static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, static inline void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, enum pipe pipe, - uint32_t crc0, uint32_t crc1, - uint32_t crc2, uint32_t crc3, - uint32_t crc4) {} + u32 crc0, u32 crc1, + u32 crc2, u32 crc3, + u32 crc4) {} #endif @@ -1798,7 +1753,7 @@ static void ivb_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, enum pipe pipe) { - uint32_t res1, res2; + u32 res1, res2; if (INTEL_GEN(dev_priv) >= 3) res1 = I915_READ(PIPE_CRC_RES_RES1_I915(pipe)); @@ -1839,7 +1794,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) if (HAS_VEBOX(dev_priv)) { if (pm_iir & PM_VEBOX_USER_INTERRUPT) - notify_ring(dev_priv->engine[VECS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); @@ -2932,46 +2887,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) return IRQ_HANDLED; } -struct wedge_me { - struct delayed_work work; - struct drm_i915_private *i915; - const char *name; -}; - -static void wedge_me(struct work_struct *work) -{ - struct wedge_me *w = container_of(work, typeof(*w), work.work); - - dev_err(w->i915->drm.dev, - "%s timed out, cancelling all in-flight rendering.\n", - w->name); - i915_gem_set_wedged(w->i915); -} - -static void __init_wedge(struct wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const char *name) -{ - w->i915 = i915; - w->name = name; - - INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); - schedule_delayed_work(&w->work, timeout); -} - -static void __fini_wedge(struct wedge_me *w) -{ - cancel_delayed_work_sync(&w->work); - destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; -} - -#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__init_wedge((W), (DEV), (TIMEOUT), __func__); \ - (W)->i915; \ - __fini_wedge((W))) - static u32 gen11_gt_engine_identity(struct drm_i915_private * const i915, const unsigned int bank, const unsigned int bit) @@ -3182,203 +3097,6 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) return IRQ_HANDLED; } -static void i915_reset_device(struct drm_i915_private *dev_priv, - u32 engine_mask, - const char *reason) -{ - struct i915_gpu_error *error = &dev_priv->gpu_error; - struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj; - char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; - char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; - char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; - struct wedge_me w; - - kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); - - DRM_DEBUG_DRIVER("resetting chip\n"); - kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); - - /* Use a watchdog to ensure that our reset completes */ - i915_wedge_on_timeout(&w, dev_priv, 5*HZ) { - intel_prepare_reset(dev_priv); - - error->reason = reason; - error->stalled_mask = engine_mask; - - /* Signal that locked waiters should reset the GPU */ - smp_mb__before_atomic(); - set_bit(I915_RESET_HANDOFF, &error->flags); - wake_up_all(&error->wait_queue); - - /* Wait for anyone holding the lock to wakeup, without - * blocking indefinitely on struct_mutex. - */ - do { - if (mutex_trylock(&dev_priv->drm.struct_mutex)) { - i915_reset(dev_priv, engine_mask, reason); - mutex_unlock(&dev_priv->drm.struct_mutex); - } - } while (wait_on_bit_timeout(&error->flags, - I915_RESET_HANDOFF, - TASK_UNINTERRUPTIBLE, - 1)); - - error->stalled_mask = 0; - error->reason = NULL; - - intel_finish_reset(dev_priv); - } - - if (!test_bit(I915_WEDGED, &error->flags)) - kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); -} - -void i915_clear_error_registers(struct drm_i915_private *dev_priv) -{ - u32 eir; - - if (!IS_GEN(dev_priv, 2)) - I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER)); - - if (INTEL_GEN(dev_priv) < 4) - I915_WRITE(IPEIR, I915_READ(IPEIR)); - else - I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965)); - - I915_WRITE(EIR, I915_READ(EIR)); - eir = I915_READ(EIR); - if (eir) { - /* - * some errors might have become stuck, - * mask them. - */ - DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); - I915_WRITE(EMR, I915_READ(EMR) | eir); - I915_WRITE(IIR, I915_MASTER_ERROR_INTERRUPT); - } - - if (INTEL_GEN(dev_priv) >= 8) { - I915_WRITE(GEN8_RING_FAULT_REG, - I915_READ(GEN8_RING_FAULT_REG) & ~RING_FAULT_VALID); - POSTING_READ(GEN8_RING_FAULT_REG); - } else if (INTEL_GEN(dev_priv) >= 6) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, dev_priv, id) { - I915_WRITE(RING_FAULT_REG(engine), - I915_READ(RING_FAULT_REG(engine)) & - ~RING_FAULT_VALID); - } - POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); - } -} - -/** - * i915_handle_error - handle a gpu error - * @dev_priv: i915 device private - * @engine_mask: mask representing engines that are hung - * @flags: control flags - * @fmt: Error message format string - * - * Do some basic checking of register state at error time and - * dump it to the syslog. Also call i915_capture_error_state() to make - * sure we get a record and make it available in debugfs. Fire a uevent - * so userspace knows something bad happened (should trigger collection - * of a ring dump etc.). - */ -void i915_handle_error(struct drm_i915_private *dev_priv, - u32 engine_mask, - unsigned long flags, - const char *fmt, ...) -{ - struct intel_engine_cs *engine; - unsigned int tmp; - char error_msg[80]; - char *msg = NULL; - - if (fmt) { - va_list args; - - va_start(args, fmt); - vscnprintf(error_msg, sizeof(error_msg), fmt, args); - va_end(args); - - msg = error_msg; - } - - /* - * In most cases it's guaranteed that we get here with an RPM - * reference held, for example because there is a pending GPU - * request that won't finish until the reset is done. This - * isn't the case at least when we get here by doing a - * simulated reset via debugfs, so get an RPM reference. - */ - intel_runtime_pm_get(dev_priv); - - engine_mask &= INTEL_INFO(dev_priv)->ring_mask; - - if (flags & I915_ERROR_CAPTURE) { - i915_capture_error_state(dev_priv, engine_mask, msg); - i915_clear_error_registers(dev_priv); - } - - /* - * Try engine reset when available. We fall back to full reset if - * single reset fails. - */ - if (intel_has_reset_engine(dev_priv) && - !i915_terminally_wedged(&dev_priv->gpu_error)) { - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { - BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); - if (test_and_set_bit(I915_RESET_ENGINE + engine->id, - &dev_priv->gpu_error.flags)) - continue; - - if (i915_reset_engine(engine, msg) == 0) - engine_mask &= ~intel_engine_flag(engine); - - clear_bit(I915_RESET_ENGINE + engine->id, - &dev_priv->gpu_error.flags); - wake_up_bit(&dev_priv->gpu_error.flags, - I915_RESET_ENGINE + engine->id); - } - } - - if (!engine_mask) - goto out; - - /* Full reset needs the mutex, stop any other user trying to do so. */ - if (test_and_set_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) { - wait_event(dev_priv->gpu_error.reset_queue, - !test_bit(I915_RESET_BACKOFF, - &dev_priv->gpu_error.flags)); - goto out; - } - - /* Prevent any other reset-engine attempt. */ - for_each_engine(engine, dev_priv, tmp) { - while (test_and_set_bit(I915_RESET_ENGINE + engine->id, - &dev_priv->gpu_error.flags)) - wait_on_bit(&dev_priv->gpu_error.flags, - I915_RESET_ENGINE + engine->id, - TASK_UNINTERRUPTIBLE); - } - - i915_reset_device(dev_priv, engine_mask, msg); - - for_each_engine(engine, dev_priv, tmp) { - clear_bit(I915_RESET_ENGINE + engine->id, - &dev_priv->gpu_error.flags); - } - - clear_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags); - wake_up_all(&dev_priv->gpu_error.reset_queue); - -out: - intel_runtime_pm_put(dev_priv); -} - /* Called from drm generic code, passed 'crtc' which * we use as a pipe index */ @@ -3411,7 +3129,7 @@ static int ironlake_enable_vblank(struct drm_device *dev, unsigned int pipe) { struct drm_i915_private *dev_priv = to_i915(dev); unsigned long irqflags; - uint32_t bit = INTEL_GEN(dev_priv) >= 7 ? + u32 bit = INTEL_GEN(dev_priv) >= 7 ? DE_PIPE_VBLANK_IVB(pipe) : DE_PIPE_VBLANK(pipe); spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -3473,7 +3191,7 @@ static void ironlake_disable_vblank(struct drm_device *dev, unsigned int pipe) { struct drm_i915_private *dev_priv = to_i915(dev); unsigned long irqflags; - uint32_t bit = INTEL_GEN(dev_priv) >= 7 ? + u32 bit = INTEL_GEN(dev_priv) >= 7 ? DE_PIPE_VBLANK_IVB(pipe) : DE_PIPE_VBLANK(pipe); spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -3691,7 +3409,7 @@ static void gen11_irq_reset(struct drm_device *dev) void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, u8 pipe_mask) { - uint32_t extra_ier = GEN8_PIPE_VBLANK | GEN8_PIPE_FIFO_UNDERRUN; + u32 extra_ier = GEN8_PIPE_VBLANK | GEN8_PIPE_FIFO_UNDERRUN; enum pipe pipe; spin_lock_irq(&dev_priv->irq_lock); @@ -4160,7 +3878,7 @@ static int valleyview_irq_postinstall(struct drm_device *dev) static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) { /* These are interrupts we'll toggle with the ring mask register */ - uint32_t gt_interrupts[] = { + u32 gt_interrupts[] = { GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | @@ -4188,8 +3906,8 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) { - uint32_t de_pipe_masked = GEN8_PIPE_CDCLK_CRC_DONE; - uint32_t de_pipe_enables; + u32 de_pipe_masked = GEN8_PIPE_CDCLK_CRC_DONE; + u32 de_pipe_enables; u32 de_port_masked = GEN8_AUX_CHANNEL_A; u32 de_port_enables; u32 de_misc_masked = GEN8_DE_EDP_PSR; @@ -4329,6 +4047,7 @@ static int gen11_irq_postinstall(struct drm_device *dev) I915_WRITE(GEN11_DISPLAY_INT_CTL, GEN11_DISPLAY_IRQ_ENABLE); gen11_master_intr_enable(dev_priv->regs); + POSTING_READ(GEN11_GFX_MSTR_IRQ); return 0; } @@ -4499,7 +4218,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) I915_WRITE16(IIR, iir); if (iir & I915_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (iir & I915_MASTER_ERROR_INTERRUPT) i8xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4607,7 +4326,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) I915_WRITE(IIR, iir); if (iir & I915_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4752,10 +4471,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) I915_WRITE(IIR, iir); if (iir & I915_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (iir & I915_BSD_USER_INTERRUPT) - notify_ring(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4818,16 +4537,10 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 8) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - if (IS_GEN(dev_priv, 2)) { - /* Gen2 doesn't have a hardware frame counter */ - dev->max_vblank_count = 0; - } else if (IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { - dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */ + if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) dev->driver->get_vblank_counter = g4x_get_vblank_counter; - } else { + else if (INTEL_GEN(dev_priv) >= 3) dev->driver->get_vblank_counter = i915_get_vblank_counter; - dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */ - } /* * Opt out of the vblank disable timer on everything except gen2. diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 9f0539bdaa39..b5be0abbba35 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -97,8 +97,10 @@ i915_param_named_unsafe(disable_power_well, int, 0400, i915_param_named_unsafe(enable_ips, int, 0600, "Enable IPS (default: true)"); -i915_param_named(fastboot, bool, 0600, - "Try to skip unnecessary mode sets at boot time (default: false)"); +i915_param_named(fastboot, int, 0600, + "Try to skip unnecessary mode sets at boot time " + "(0=disabled, 1=enabled) " + "Default: -1 (use per-chip default)"); i915_param_named_unsafe(prefault_disable, bool, 0600, "Disable page prefaulting for pread/pwrite/reloc (default:false). " diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 6efcf330bdab..3f14e9881a0d 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -63,10 +63,10 @@ struct drm_printer; param(int, edp_vswing, 0) \ param(int, reset, 2) \ param(unsigned int, inject_load_failure, 0) \ + param(int, fastboot, -1) \ /* leave bools at the end to not create holes */ \ param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \ param(bool, enable_hangcheck, true) \ - param(bool, fastboot, false) \ param(bool, prefault_disable, false) \ param(bool, load_detect_test, false) \ param(bool, force_reset_modeset_test, false) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 44c23ac60347..66f82f3f050f 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -28,6 +28,7 @@ #include <drm/drm_drv.h> +#include "i915_active.h" #include "i915_drv.h" #include "i915_selftest.h" @@ -69,9 +70,15 @@ #define BDW_COLORS \ .color = { .degamma_lut_size = 512, .gamma_lut_size = 512 } #define CHV_COLORS \ - .color = { .degamma_lut_size = 65, .gamma_lut_size = 257 } + .color = { .degamma_lut_size = 65, .gamma_lut_size = 257, \ + .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \ + .gamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \ + } #define GLK_COLORS \ - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024, \ + .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING | \ + DRM_COLOR_LUT_EQUAL_CHANNELS, \ + } /* Keep in gen based order, and chronological order within a gen */ @@ -83,7 +90,7 @@ .num_pipes = 1, \ .display.has_overlay = 1, \ .display.overlay_needs_physical = 1, \ - .display.has_gmch_display = 1, \ + .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ .hws_needs_physical = 1, \ .unfenced_needs_alignment = 1, \ @@ -124,7 +131,7 @@ static const struct intel_device_info intel_i865g_info = { #define GEN3_FEATURES \ GEN(3), \ .num_pipes = 2, \ - .display.has_gmch_display = 1, \ + .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ .ring_mask = RENDER_RING, \ .has_snoop = true, \ @@ -201,7 +208,7 @@ static const struct intel_device_info intel_pineview_info = { GEN(4), \ .num_pipes = 2, \ .display.has_hotplug = 1, \ - .display.has_gmch_display = 1, \ + .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ .ring_mask = RENDER_RING, \ .has_snoop = true, \ @@ -377,7 +384,7 @@ static const struct intel_device_info intel_valleyview_info = { .num_pipes = 2, .has_runtime_pm = 1, .has_rc6 = 1, - .display.has_gmch_display = 1, + .display.has_gmch = 1, .display.has_hotplug = 1, .ppgtt = INTEL_PPGTT_FULL, .has_snoop = true, @@ -469,7 +476,7 @@ static const struct intel_device_info intel_cherryview_info = { .has_runtime_pm = 1, .has_rc6 = 1, .has_logical_ring_contexts = 1, - .display.has_gmch_display = 1, + .display.has_gmch = 1, .ppgtt = INTEL_PPGTT_FULL, .has_reset_engine = 1, .has_snoop = true, @@ -707,6 +714,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_AML_KBL_GT2_IDS(&intel_kabylake_gt2_info), INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info), INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info), + INTEL_CFL_H_GT1_IDS(&intel_coffeelake_gt1_info), INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), @@ -793,6 +801,8 @@ static int __init i915_init(void) bool use_kms = true; int err; + i915_global_active_init(); + err = i915_mock_selftests(); if (err) return err > 0 ? 0 : err; @@ -824,6 +834,7 @@ static void __exit i915_exit(void) return; pci_unregister_driver(&i915_pci_driver); + i915_global_active_exit(); } module_init(i915_init); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 5b1ae5ed97b3..9ebf99f3d8d3 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1365,7 +1365,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) free_oa_buffer(dev_priv); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, stream->wakeref); if (stream->ctx) oa_put_render_ctx_id(stream); @@ -1677,6 +1677,11 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, CTX_REG(reg_state, state_offset, flex_regs[i], value); } + + CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, + gen8_make_rpcs(dev_priv, + &to_intel_context(ctx, + dev_priv->engine[RCS])->sseu)); } /* @@ -2087,7 +2092,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * In our case we are expecting that taking pm + FORCEWAKE * references will effectively disable RC6. */ - intel_runtime_pm_get(dev_priv); + stream->wakeref = intel_runtime_pm_get(dev_priv); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); ret = alloc_oa_buffer(dev_priv); @@ -2098,21 +2103,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (ret) goto err_lock; + stream->ops = &i915_oa_stream_ops; + dev_priv->perf.oa.exclusive_stream = stream; + ret = dev_priv->perf.oa.ops.enable_metric_set(stream); if (ret) { DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; } - stream->ops = &i915_oa_stream_ops; - - dev_priv->perf.oa.exclusive_stream = stream; - mutex_unlock(&dev_priv->drm.struct_mutex); return 0; err_enable: + dev_priv->perf.oa.exclusive_stream = NULL; dev_priv->perf.oa.ops.disable_metric_set(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); @@ -2123,7 +2128,7 @@ err_oa_buf_alloc: put_oa_config(dev_priv, stream->oa_config); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, stream->wakeref); err_config: if (stream->ctx) @@ -3021,7 +3026,7 @@ static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) (addr >= 0x182300 && addr <= 0x1823A4); } -static uint32_t mask_reg_value(u32 reg, u32 val) +static u32 mask_reg_value(u32 reg, u32 val) { /* HALF_SLICE_CHICKEN2 is programmed with a the * WaDisableSTUnitPowerOptimization workaround. Make sure the value diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index d6c8f8fdfda5..13d70b90dd0f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -167,6 +167,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) { struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; bool fw = false; if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) @@ -175,7 +176,8 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) if (!dev_priv->gt.awake) return; - if (!intel_runtime_pm_get_if_in_use(dev_priv)) + wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + if (!wakeref) return; for_each_engine(engine, dev_priv, id) { @@ -210,7 +212,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) if (fw) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); } static void @@ -227,11 +229,12 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) u32 val; val = dev_priv->gt_pm.rps.cur_freq; - if (dev_priv->gt.awake && - intel_runtime_pm_get_if_in_use(dev_priv)) { - val = intel_get_cagf(dev_priv, - I915_READ_NOTRACE(GEN6_RPSTAT1)); - intel_runtime_pm_put(dev_priv); + if (dev_priv->gt.awake) { + intel_wakeref_t wakeref; + + with_intel_runtime_pm_if_in_use(dev_priv, wakeref) + val = intel_get_cagf(dev_priv, + I915_READ_NOTRACE(GEN6_RPSTAT1)); } add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], @@ -443,12 +446,14 @@ static u64 __get_rc6(struct drm_i915_private *i915) static u64 get_rc6(struct drm_i915_private *i915) { #if IS_ENABLED(CONFIG_PM) + intel_wakeref_t wakeref; unsigned long flags; u64 val; - if (intel_runtime_pm_get_if_in_use(i915)) { + wakeref = intel_runtime_pm_get_if_in_use(i915); + if (wakeref) { val = __get_rc6(i915); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); /* * If we are coming back from being runtime suspended we must @@ -594,7 +599,8 @@ static void i915_pmu_enable(struct perf_event *event) * Update the bitmask of enabled events and increment * the event reference counter. */ - GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + BUILD_BUG_ON(ARRAY_SIZE(i915->pmu.enable_count) != I915_PMU_MASK_BITS); + GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count)); GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); i915->pmu.enable |= BIT_ULL(bit); i915->pmu.enable_count[bit]++; @@ -615,11 +621,16 @@ static void i915_pmu_enable(struct perf_event *event) engine = intel_engine_lookup_user(i915, engine_event_class(event), engine_event_instance(event)); - GEM_BUG_ON(!engine); - engine->pmu.enable |= BIT(sample); - GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) != + I915_ENGINE_SAMPLE_COUNT); + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != + I915_ENGINE_SAMPLE_COUNT); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); + + engine->pmu.enable |= BIT(sample); engine->pmu.enable_count[sample]++; } @@ -649,9 +660,11 @@ static void i915_pmu_disable(struct perf_event *event) engine = intel_engine_lookup_user(i915, engine_event_class(event), engine_event_instance(event)); - GEM_BUG_ON(!engine); - GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); + /* * Decrement the reference count and clear the enabled * bitmask when the last listener on an event goes away. @@ -660,7 +673,7 @@ static void i915_pmu_disable(struct perf_event *event) engine->pmu.enable &= ~BIT(sample); } - GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count)); GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); /* * Decrement the reference count and clear the enabled diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 7f164ca3db12..b3728c5f13e7 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -31,6 +31,8 @@ enum { ((1 << I915_PMU_SAMPLE_BITS) + \ (I915_PMU_LAST + 1 - __I915_PMU_OTHER(0))) +#define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1) + struct i915_pmu_sample { u64 cur; }; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 44958d994bfa..638a586469f9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -117,14 +117,14 @@ */ typedef struct { - uint32_t reg; + u32 reg; } i915_reg_t; #define _MMIO(r) ((const i915_reg_t){ .reg = (r) }) #define INVALID_MMIO_REG _MMIO(0) -static inline uint32_t i915_mmio_reg_offset(i915_reg_t reg) +static inline u32 i915_mmio_reg_offset(i915_reg_t reg) { return reg.reg; } @@ -1814,7 +1814,7 @@ enum i915_power_well_id { #define _CNL_PORT_TX_C_LN0_OFFSET 0x162C40 #define _CNL_PORT_TX_D_LN0_OFFSET 0x162E40 #define _CNL_PORT_TX_F_LN0_OFFSET 0x162840 -#define _CNL_PORT_TX_DW_GRP(port, dw) (_PICK((port), \ +#define _CNL_PORT_TX_DW_GRP(dw, port) (_PICK((port), \ _CNL_PORT_TX_AE_GRP_OFFSET, \ _CNL_PORT_TX_B_GRP_OFFSET, \ _CNL_PORT_TX_B_GRP_OFFSET, \ @@ -1822,7 +1822,7 @@ enum i915_power_well_id { _CNL_PORT_TX_AE_GRP_OFFSET, \ _CNL_PORT_TX_F_GRP_OFFSET) + \ 4 * (dw)) -#define _CNL_PORT_TX_DW_LN0(port, dw) (_PICK((port), \ +#define _CNL_PORT_TX_DW_LN0(dw, port) (_PICK((port), \ _CNL_PORT_TX_AE_LN0_OFFSET, \ _CNL_PORT_TX_B_LN0_OFFSET, \ _CNL_PORT_TX_B_LN0_OFFSET, \ @@ -1858,9 +1858,9 @@ enum i915_power_well_id { #define _CNL_PORT_TX_DW4_LN0_AE 0x162450 #define _CNL_PORT_TX_DW4_LN1_AE 0x1624D0 -#define CNL_PORT_TX_DW4_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 4)) -#define CNL_PORT_TX_DW4_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 4)) -#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO(_CNL_PORT_TX_DW_LN0((port), 4) + \ +#define CNL_PORT_TX_DW4_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(4, (port))) +#define CNL_PORT_TX_DW4_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port))) +#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port)) + \ ((ln) * (_CNL_PORT_TX_DW4_LN1_AE - \ _CNL_PORT_TX_DW4_LN0_AE))) #define ICL_PORT_TX_DW4_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(4, port)) @@ -1888,8 +1888,8 @@ enum i915_power_well_id { #define RTERM_SELECT(x) ((x) << 3) #define RTERM_SELECT_MASK (0x7 << 3) -#define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 7)) -#define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 7)) +#define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(7, (port))) +#define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(7, (port))) #define ICL_PORT_TX_DW7_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(7, port)) #define ICL_PORT_TX_DW7_GRP(port) _MMIO(_ICL_PORT_TX_DW_GRP(7, port)) #define ICL_PORT_TX_DW7_LN0(port) _MMIO(_ICL_PORT_TX_DW_LN(7, 0, port)) @@ -2801,6 +2801,9 @@ enum i915_power_well_id { #define GEN6_RCS_PWR_FSM _MMIO(0x22ac) #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4) +#define GEN10_CACHE_MODE_SS _MMIO(0xe420) +#define FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) + /* Fuse readout registers for GT */ #define HSW_PAVP_FUSE1 _MMIO(0x911C) #define HSW_F1_EU_DIS_SHIFT 16 @@ -4272,6 +4275,15 @@ enum { #define EDP_PSR2_STATUS_STATE_MASK (0xf << 28) #define EDP_PSR2_STATUS_STATE_SHIFT 28 +#define _PSR2_SU_STATUS_0 0x6F914 +#define _PSR2_SU_STATUS_1 0x6F918 +#define _PSR2_SU_STATUS_2 0x6F91C +#define _PSR2_SU_STATUS(index) _MMIO(_PICK_EVEN((index), _PSR2_SU_STATUS_0, _PSR2_SU_STATUS_1)) +#define PSR2_SU_STATUS(frame) (_PSR2_SU_STATUS((frame) / 3)) +#define PSR2_SU_STATUS_SHIFT(frame) (((frame) % 3) * 10) +#define PSR2_SU_STATUS_MASK(frame) (0x3ff << PSR2_SU_STATUS_SHIFT(frame)) +#define PSR2_SU_STATUS_FRAMES 8 + /* VGA port control */ #define ADPA _MMIO(0x61100) #define PCH_ADPA _MMIO(0xe1100) @@ -4687,7 +4699,6 @@ enum { #define EDP_FORCE_VDD (1 << 3) #define EDP_BLC_ENABLE (1 << 2) #define PANEL_POWER_RESET (1 << 1) -#define PANEL_POWER_OFF (0 << 0) #define PANEL_POWER_ON (1 << 0) #define _PP_ON_DELAYS 0x61208 @@ -4887,6 +4898,7 @@ enum { # define TV_OVERSAMPLE_NONE (2 << 18) /* Selects 8x oversampling */ # define TV_OVERSAMPLE_8X (3 << 18) +# define TV_OVERSAMPLE_MASK (3 << 18) /* Selects progressive mode rather than interlaced */ # define TV_PROGRESSIVE (1 << 17) /* Sets the colorburst to PAL mode. Required for non-M PAL modes. */ @@ -5701,6 +5713,12 @@ enum { #define PIPEMISC_DITHER_TYPE_SP (0 << 2) #define PIPEMISC(pipe) _MMIO_PIPE2(pipe, _PIPE_MISC_A) +/* Skylake+ pipe bottom (background) color */ +#define _SKL_BOTTOM_COLOR_A 0x70034 +#define SKL_BOTTOM_COLOR_GAMMA_ENABLE (1 << 31) +#define SKL_BOTTOM_COLOR_CSC_ENABLE (1 << 30) +#define SKL_BOTTOM_COLOR(pipe) _MMIO_PIPE2(pipe, _SKL_BOTTOM_COLOR_A) + #define VLV_DPFLIPSTAT _MMIO(VLV_DISPLAY_BASE + 0x70028) #define PIPEB_LINE_COMPARE_INT_EN (1 << 29) #define PIPEB_HLINE_INT_EN (1 << 28) @@ -5982,7 +6000,7 @@ enum { #define PLANE_WM_EN (1 << 31) #define PLANE_WM_LINES_SHIFT 14 #define PLANE_WM_LINES_MASK 0x1f -#define PLANE_WM_BLOCKS_MASK 0x3ff +#define PLANE_WM_BLOCKS_MASK 0x7ff /* skl+: 10 bits, icl+ 11 bits */ #define _CUR_WM_0(pipe) _PIPE(pipe, _CUR_WM_A_0, _CUR_WM_B_0) #define CUR_WM(pipe, level) _MMIO(_CUR_WM_0(pipe) + ((4) * (level))) @@ -6766,8 +6784,7 @@ enum { #define _PLANE_BUF_CFG_1_B 0x7127c #define _PLANE_BUF_CFG_2_B 0x7137c -#define SKL_DDB_ENTRY_MASK 0x3FF -#define ICL_DDB_ENTRY_MASK 0x7FF +#define DDB_ENTRY_MASK 0x7FF /* skl+: 10 bits, icl+ 11 bits */ #define DDB_ENTRY_END_SHIFT 16 #define _PLANE_BUF_CFG_1(pipe) \ _PIPE(pipe, _PLANE_BUF_CFG_1_A, _PLANE_BUF_CFG_1_B) @@ -7600,6 +7617,7 @@ enum { #define _PIPEB_CHICKEN 0x71038 #define _PIPEC_CHICKEN 0x72038 #define PER_PIXEL_ALPHA_BYPASS_EN (1 << 7) +#define PM_FILL_MAINTAIN_DBUF_FULLNESS (1 << 0) #define PIPE_CHICKEN(pipe) _MMIO_PIPE(pipe, _PIPEA_CHICKEN,\ _PIPEB_CHICKEN) @@ -9545,7 +9563,7 @@ enum skl_power_gate { #define _MG_PLL3_ENABLE 0x46038 #define _MG_PLL4_ENABLE 0x4603C /* Bits are the same as DPLL0_ENABLE */ -#define MG_PLL_ENABLE(port) _MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \ +#define MG_PLL_ENABLE(tc_port) _MMIO_PORT((tc_port), _MG_PLL1_ENABLE, \ _MG_PLL2_ENABLE) #define _MG_REFCLKIN_CTL_PORT1 0x16892C @@ -9554,9 +9572,9 @@ enum skl_power_gate { #define _MG_REFCLKIN_CTL_PORT4 0x16B92C #define MG_REFCLKIN_CTL_OD_2_MUX(x) ((x) << 8) #define MG_REFCLKIN_CTL_OD_2_MUX_MASK (0x7 << 8) -#define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \ - _MG_REFCLKIN_CTL_PORT1, \ - _MG_REFCLKIN_CTL_PORT2) +#define MG_REFCLKIN_CTL(tc_port) _MMIO_PORT((tc_port), \ + _MG_REFCLKIN_CTL_PORT1, \ + _MG_REFCLKIN_CTL_PORT2) #define _MG_CLKTOP2_CORECLKCTL1_PORT1 0x1688D8 #define _MG_CLKTOP2_CORECLKCTL1_PORT2 0x1698D8 @@ -9566,9 +9584,9 @@ enum skl_power_gate { #define MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO_MASK (0xff << 16) #define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x) ((x) << 8) #define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK (0xff << 8) -#define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \ - _MG_CLKTOP2_CORECLKCTL1_PORT1, \ - _MG_CLKTOP2_CORECLKCTL1_PORT2) +#define MG_CLKTOP2_CORECLKCTL1(tc_port) _MMIO_PORT((tc_port), \ + _MG_CLKTOP2_CORECLKCTL1_PORT1, \ + _MG_CLKTOP2_CORECLKCTL1_PORT2) #define _MG_CLKTOP2_HSCLKCTL_PORT1 0x1688D4 #define _MG_CLKTOP2_HSCLKCTL_PORT2 0x1698D4 @@ -9586,9 +9604,9 @@ enum skl_power_gate { #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x) ((x) << 8) #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_SHIFT 8 #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK (0xf << 8) -#define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \ - _MG_CLKTOP2_HSCLKCTL_PORT1, \ - _MG_CLKTOP2_HSCLKCTL_PORT2) +#define MG_CLKTOP2_HSCLKCTL(tc_port) _MMIO_PORT((tc_port), \ + _MG_CLKTOP2_HSCLKCTL_PORT1, \ + _MG_CLKTOP2_HSCLKCTL_PORT2) #define _MG_PLL_DIV0_PORT1 0x168A00 #define _MG_PLL_DIV0_PORT2 0x169A00 @@ -9600,8 +9618,8 @@ enum skl_power_gate { #define MG_PLL_DIV0_FBDIV_FRAC(x) ((x) << 8) #define MG_PLL_DIV0_FBDIV_INT_MASK (0xff << 0) #define MG_PLL_DIV0_FBDIV_INT(x) ((x) << 0) -#define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \ - _MG_PLL_DIV0_PORT2) +#define MG_PLL_DIV0(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV0_PORT1, \ + _MG_PLL_DIV0_PORT2) #define _MG_PLL_DIV1_PORT1 0x168A04 #define _MG_PLL_DIV1_PORT2 0x169A04 @@ -9615,8 +9633,8 @@ enum skl_power_gate { #define MG_PLL_DIV1_NDIVRATIO(x) ((x) << 4) #define MG_PLL_DIV1_FBPREDIV_MASK (0xf << 0) #define MG_PLL_DIV1_FBPREDIV(x) ((x) << 0) -#define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \ - _MG_PLL_DIV1_PORT2) +#define MG_PLL_DIV1(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV1_PORT1, \ + _MG_PLL_DIV1_PORT2) #define _MG_PLL_LF_PORT1 0x168A08 #define _MG_PLL_LF_PORT2 0x169A08 @@ -9628,8 +9646,8 @@ enum skl_power_gate { #define MG_PLL_LF_GAINCTRL(x) ((x) << 16) #define MG_PLL_LF_INT_COEFF(x) ((x) << 8) #define MG_PLL_LF_PROP_COEFF(x) ((x) << 0) -#define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \ - _MG_PLL_LF_PORT2) +#define MG_PLL_LF(tc_port) _MMIO_PORT((tc_port), _MG_PLL_LF_PORT1, \ + _MG_PLL_LF_PORT2) #define _MG_PLL_FRAC_LOCK_PORT1 0x168A0C #define _MG_PLL_FRAC_LOCK_PORT2 0x169A0C @@ -9641,9 +9659,9 @@ enum skl_power_gate { #define MG_PLL_FRAC_LOCK_DCODITHEREN (1 << 10) #define MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN (1 << 8) #define MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x) ((x) << 0) -#define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \ - _MG_PLL_FRAC_LOCK_PORT1, \ - _MG_PLL_FRAC_LOCK_PORT2) +#define MG_PLL_FRAC_LOCK(tc_port) _MMIO_PORT((tc_port), \ + _MG_PLL_FRAC_LOCK_PORT1, \ + _MG_PLL_FRAC_LOCK_PORT2) #define _MG_PLL_SSC_PORT1 0x168A10 #define _MG_PLL_SSC_PORT2 0x169A10 @@ -9655,8 +9673,8 @@ enum skl_power_gate { #define MG_PLL_SSC_STEPNUM(x) ((x) << 10) #define MG_PLL_SSC_FLLEN (1 << 9) #define MG_PLL_SSC_STEPSIZE(x) ((x) << 0) -#define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \ - _MG_PLL_SSC_PORT2) +#define MG_PLL_SSC(tc_port) _MMIO_PORT((tc_port), _MG_PLL_SSC_PORT1, \ + _MG_PLL_SSC_PORT2) #define _MG_PLL_BIAS_PORT1 0x168A14 #define _MG_PLL_BIAS_PORT2 0x169A14 @@ -9675,8 +9693,8 @@ enum skl_power_gate { #define MG_PLL_BIAS_VREF_RDAC_MASK (0x7 << 5) #define MG_PLL_BIAS_IREFTRIM(x) ((x) << 0) #define MG_PLL_BIAS_IREFTRIM_MASK (0x1f << 0) -#define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \ - _MG_PLL_BIAS_PORT2) +#define MG_PLL_BIAS(tc_port) _MMIO_PORT((tc_port), _MG_PLL_BIAS_PORT1, \ + _MG_PLL_BIAS_PORT2) #define _MG_PLL_TDC_COLDST_BIAS_PORT1 0x168A18 #define _MG_PLL_TDC_COLDST_BIAS_PORT2 0x169A18 @@ -9687,9 +9705,9 @@ enum skl_power_gate { #define MG_PLL_TDC_COLDST_COLDSTART (1 << 16) #define MG_PLL_TDC_TDCOVCCORR_EN (1 << 2) #define MG_PLL_TDC_TDCSEL(x) ((x) << 0) -#define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \ - _MG_PLL_TDC_COLDST_BIAS_PORT1, \ - _MG_PLL_TDC_COLDST_BIAS_PORT2) +#define MG_PLL_TDC_COLDST_BIAS(tc_port) _MMIO_PORT((tc_port), \ + _MG_PLL_TDC_COLDST_BIAS_PORT1, \ + _MG_PLL_TDC_COLDST_BIAS_PORT2) #define _CNL_DPLL0_CFGCR0 0x6C000 #define _CNL_DPLL1_CFGCR0 0x6C080 diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d1355154886a..c2a5c48c7541 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -29,6 +29,8 @@ #include <linux/sched/signal.h> #include "i915_drv.h" +#include "i915_active.h" +#include "i915_reset.h" static const char *i915_fence_get_driver_name(struct dma_fence *fence) { @@ -59,7 +61,7 @@ static bool i915_fence_signaled(struct dma_fence *fence) static bool i915_fence_enable_signaling(struct dma_fence *fence) { - return intel_engine_enable_signaling(to_request(fence), true); + return i915_request_enable_breadcrumb(to_request(fence)); } static signed long i915_fence_wait(struct dma_fence *fence, @@ -124,12 +126,6 @@ static void unreserve_gt(struct drm_i915_private *i915) i915_gem_park(i915); } -void i915_gem_retire_noop(struct i915_gem_active *active, - struct i915_request *request) -{ - /* Space left intentionally blank */ -} - static void advance_ring(struct i915_request *request) { struct intel_ring *ring = request->ring; @@ -181,10 +177,11 @@ static void free_capture_list(struct i915_request *request) static void __retire_engine_request(struct intel_engine_cs *engine, struct i915_request *rq) { - GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n", __func__, engine->name, rq->fence.context, rq->fence.seqno, rq->global_seqno, + hwsp_seqno(rq), intel_engine_get_seqno(engine)); GEM_BUG_ON(!i915_request_completed(rq)); @@ -197,10 +194,11 @@ static void __retire_engine_request(struct intel_engine_cs *engine, spin_unlock(&engine->timeline.lock); spin_lock(&rq->lock); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + i915_request_mark_complete(rq); + if (!i915_request_signaled(rq)) dma_fence_signal_locked(&rq->fence); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) - intel_engine_cancel_signaling(rq); + i915_request_cancel_breadcrumb(rq); if (rq->waitboost) { GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); atomic_dec(&rq->i915->gt_pm.rps.num_waiters); @@ -241,12 +239,13 @@ static void __retire_engine_upto(struct intel_engine_cs *engine, static void i915_request_retire(struct i915_request *request) { - struct i915_gem_active *active, *next; + struct i915_active_request *active, *next; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", request->engine->name, request->fence.context, request->fence.seqno, request->global_seqno, + hwsp_seqno(request), intel_engine_get_seqno(request->engine)); lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -274,10 +273,10 @@ static void i915_request_retire(struct i915_request *request) * we may spend an inordinate amount of time simply handling * the retirement of requests and processing their callbacks. * Of which, this loop itself is particularly hot due to the - * cache misses when jumping around the list of i915_gem_active. - * So we try to keep this loop as streamlined as possible and - * also prefetch the next i915_gem_active to try and hide - * the likely cache miss. + * cache misses when jumping around the list of + * i915_active_request. So we try to keep this loop as + * streamlined as possible and also prefetch the next + * i915_active_request to try and hide the likely cache miss. */ prefetchw(next); @@ -306,10 +305,11 @@ void i915_request_retire_upto(struct i915_request *rq) struct intel_ring *ring = rq->ring; struct i915_request *tmp; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", rq->engine->name, rq->fence.context, rq->fence.seqno, rq->global_seqno, + hwsp_seqno(rq), intel_engine_get_seqno(rq->engine)); lockdep_assert_held(&rq->i915->drm.struct_mutex); @@ -328,7 +328,7 @@ void i915_request_retire_upto(struct i915_request *rq) static u32 timeline_get_seqno(struct i915_timeline *tl) { - return ++tl->seqno; + return tl->seqno += 1 + tl->has_initial_breadcrumb; } static void move_to_timeline(struct i915_request *request, @@ -342,15 +342,23 @@ static void move_to_timeline(struct i915_request *request, spin_unlock(&request->timeline->lock); } +static u32 next_global_seqno(struct i915_timeline *tl) +{ + if (!++tl->seqno) + ++tl->seqno; + return tl->seqno; +} + void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; u32 seqno; - GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n", engine->name, request->fence.context, request->fence.seqno, engine->timeline.seqno + 1, + hwsp_seqno(request), intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); @@ -358,26 +366,27 @@ void __i915_request_submit(struct i915_request *request) GEM_BUG_ON(request->global_seqno); - seqno = timeline_get_seqno(&engine->timeline); + seqno = next_global_seqno(&engine->timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(intel_engine_signaled(engine, seqno)); /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); request->global_seqno = seqno; - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_enable_signaling(request, false); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && + !i915_request_enable_breadcrumb(request)) + intel_engine_queue_breadcrumbs(engine); spin_unlock(&request->lock); - engine->emit_breadcrumb(request, - request->ring->vaddr + request->postfix); + engine->emit_fini_breadcrumb(request, + request->ring->vaddr + request->postfix); /* Transfer from per-context onto the global per-engine timeline */ move_to_timeline(request, &engine->timeline); trace_i915_request_execute(request); - - wake_up_all(&request->execute); } void i915_request_submit(struct i915_request *request) @@ -397,10 +406,11 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n", engine->name, request->fence.context, request->fence.seqno, request->global_seqno, + hwsp_seqno(request), intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); @@ -419,7 +429,9 @@ void __i915_request_unsubmit(struct i915_request *request) spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); request->global_seqno = 0; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_cancel_signaling(request); + i915_request_cancel_breadcrumb(request); + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); spin_unlock(&request->lock); /* Transfer back from the global per-engine timeline to per-context */ @@ -509,6 +521,11 @@ out: return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL); } +static int add_timeline_barrier(struct i915_request *rq) +{ + return i915_request_await_active_request(rq, &rq->timeline->barrier); +} + /** * i915_request_alloc - allocate a request structure * @@ -565,7 +582,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * We use RCU to look up requests in flight. The lookups may * race with the request being allocated from the slab freelist. * That is the request we are writing to here, may be in the process - * of being read by __i915_gem_active_get_rcu(). As such, + * of being read by __i915_active_request_get_rcu(). As such, * we have to be very careful when overwriting the contents. During * the RCU lookup, we change chase the request->engine pointer, * read the request->global_seqno and increment the reference count. @@ -608,6 +625,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->ring = ce->ring; rq->timeline = ce->ring->timeline; GEM_BUG_ON(rq->timeline == &engine->timeline); + rq->hwsp_seqno = rq->timeline->hwsp_seqno; spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, @@ -618,13 +636,11 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); - init_waitqueue_head(&rq->execute); i915_sched_node_init(&rq->sched); /* No zalloc, must clear what we need by hand */ rq->global_seqno = 0; - rq->signaling.wait.seqno = 0; rq->file_priv = NULL; rq->batch = NULL; rq->capture_list = NULL; @@ -642,7 +658,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * around inside i915_request_add() there is sufficient space at * the beginning of the ring as well. */ - rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32); + rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32); /* * Record the position of the start of the request so that @@ -652,6 +668,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) */ rq->head = rq->ring->emit; + ret = add_timeline_barrier(rq); + if (ret) + goto err_unwind; + ret = engine->request_alloc(rq); if (ret) goto err_unwind; @@ -893,7 +913,7 @@ void i915_request_add(struct i915_request *request) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); + cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw); GEM_BUG_ON(IS_ERR(cs)); request->postfix = intel_ring_offset(request, cs); @@ -904,8 +924,8 @@ void i915_request_add(struct i915_request *request) * see a more recent value in the hws than we are tracking. */ - prev = i915_gem_active_raw(&timeline->last_request, - &request->i915->drm.struct_mutex); + prev = i915_active_request_raw(&timeline->last_request, + &request->i915->drm.struct_mutex); if (prev && !i915_request_completed(prev)) { i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); @@ -921,7 +941,7 @@ void i915_request_add(struct i915_request *request) spin_unlock_irq(&timeline->lock); GEM_BUG_ON(timeline->seqno != request->fence.seqno); - i915_gem_active_set(&timeline->last_request, request); + __i915_active_request_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); if (list_is_first(&request->ring_link, &ring->request_list)) { @@ -952,7 +972,7 @@ void i915_request_add(struct i915_request *request) * Allow interactive/synchronous clients to jump ahead of * the bulk clients. (FQ_CODEL) */ - if (!prev || i915_request_completed(prev)) + if (list_empty(&request->sched.signalers_list)) attr.priority |= I915_PRIORITY_NEWCLIENT; engine->schedule(request, &attr); @@ -1015,13 +1035,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) return this_cpu != cpu; } -static bool __i915_spin_request(const struct i915_request *rq, - u32 seqno, int state, unsigned long timeout_us) +static bool __i915_spin_request(const struct i915_request * const rq, + int state, unsigned long timeout_us) { - struct intel_engine_cs *engine = rq->engine; - unsigned int irq, cpu; - - GEM_BUG_ON(!seqno); + unsigned int cpu; /* * Only wait for the request if we know it is likely to complete. @@ -1029,12 +1046,12 @@ static bool __i915_spin_request(const struct i915_request *rq, * We don't track the timestamps around requests, nor the average * request length, so we do not have a good indicator that this * request will complete within the timeout. What we do know is the - * order in which requests are executed by the engine and so we can - * tell if the request has started. If the request hasn't started yet, - * it is a fair assumption that it will not complete within our - * relatively short timeout. + * order in which requests are executed by the context and so we can + * tell if the request has been started. If the request is not even + * running yet, it is a fair assumption that it will not complete + * within our relatively short timeout. */ - if (!intel_engine_has_started(engine, seqno)) + if (!i915_request_is_running(rq)) return false; /* @@ -1048,20 +1065,10 @@ static bool __i915_spin_request(const struct i915_request *rq, * takes to sleep on a request, on the order of a microsecond. */ - irq = READ_ONCE(engine->breadcrumbs.irq_count); timeout_us += local_clock_us(&cpu); do { - if (intel_engine_has_completed(engine, seqno)) - return seqno == i915_request_global_seqno(rq); - - /* - * Seqno are meant to be ordered *before* the interrupt. If - * we see an interrupt without a corresponding seqno advance, - * assume we won't see one in the near future but require - * the engine->seqno_barrier() to fixup coherency. - */ - if (READ_ONCE(engine->breadcrumbs.irq_count) != irq) - break; + if (i915_request_completed(rq)) + return true; if (signal_pending_state(state, current)) break; @@ -1075,16 +1082,16 @@ static bool __i915_spin_request(const struct i915_request *rq, return false; } -static bool __i915_wait_request_check_and_reset(struct i915_request *request) -{ - struct i915_gpu_error *error = &request->i915->gpu_error; +struct request_wait { + struct dma_fence_cb cb; + struct task_struct *tsk; +}; - if (likely(!i915_reset_handoff(error))) - return false; +static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct request_wait *wait = container_of(cb, typeof(*wait), cb); - __set_current_state(TASK_RUNNING); - i915_reset(request->i915, error->stalled_mask, error->reason); - return true; + wake_up_process(wait->tsk); } /** @@ -1112,17 +1119,9 @@ long i915_request_wait(struct i915_request *rq, { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; - DEFINE_WAIT_FUNC(reset, default_wake_function); - DEFINE_WAIT_FUNC(exec, default_wake_function); - struct intel_wait wait; + struct request_wait wait; might_sleep(); -#if IS_ENABLED(CONFIG_LOCKDEP) - GEM_BUG_ON(debug_locks && - !!lockdep_is_held(&rq->i915->drm.struct_mutex) != - !!(flags & I915_WAIT_LOCKED)); -#endif GEM_BUG_ON(timeout < 0); if (i915_request_completed(rq)) @@ -1133,57 +1132,23 @@ long i915_request_wait(struct i915_request *rq, trace_i915_request_wait_begin(rq, flags); - add_wait_queue(&rq->execute, &exec); - if (flags & I915_WAIT_LOCKED) - add_wait_queue(errq, &reset); + /* Optimistic short spin before touching IRQs */ + if (__i915_spin_request(rq, state, 5)) + goto out; - intel_wait_init(&wait); if (flags & I915_WAIT_PRIORITY) i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); -restart: - do { - set_current_state(state); - if (intel_wait_update_request(&wait, rq)) - break; - - if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(rq)) - continue; - - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - goto complete; - } - - if (!timeout) { - timeout = -ETIME; - goto complete; - } - - timeout = io_schedule_timeout(timeout); - } while (1); - - GEM_BUG_ON(!intel_wait_has_seqno(&wait)); - GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); - - /* Optimistic short spin before touching IRQs */ - if (__i915_spin_request(rq, wait.seqno, state, 5)) - goto complete; + wait.tsk = current; + if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake)) + goto out; - set_current_state(state); - if (intel_engine_add_wait(rq->engine, &wait)) - /* - * In order to check that we haven't missed the interrupt - * as we enabled it, we need to kick ourselves to do a - * coherent check on the seqno before we sleep. - */ - goto wakeup; + for (;;) { + set_current_state(state); - if (flags & I915_WAIT_LOCKED) - __i915_wait_request_check_and_reset(rq); + if (i915_request_completed(rq)) + break; - for (;;) { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; break; @@ -1195,50 +1160,13 @@ restart: } timeout = io_schedule_timeout(timeout); - - if (intel_wait_complete(&wait) && - intel_wait_check_request(&wait, rq)) - break; - - set_current_state(state); - -wakeup: - if (i915_request_completed(rq)) - break; - - /* - * If the GPU is hung, and we hold the lock, reset the GPU - * and then check for completion. On a full reset, the engine's - * HW seqno will be advanced passed us and we are complete. - * If we do a partial reset, we have to wait for the GPU to - * resume and update the breadcrumb. - * - * If we don't hold the mutex, we can just wait for the worker - * to come along and update the breadcrumb (either directly - * itself, or indirectly by recovering the GPU). - */ - if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(rq)) - continue; - - /* Only spin if we know the GPU is processing this request */ - if (__i915_spin_request(rq, wait.seqno, state, 2)) - break; - - if (!intel_wait_check_request(&wait, rq)) { - intel_engine_remove_wait(rq->engine, &wait); - goto restart; - } } - - intel_engine_remove_wait(rq->engine, &wait); -complete: __set_current_state(TASK_RUNNING); - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(errq, &reset); - remove_wait_queue(&rq->execute, &exec); - trace_i915_request_wait_end(rq); + dma_fence_remove_callback(&rq->fence, &wait.cb); + +out: + trace_i915_request_wait_end(rq); return timeout; } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index d014b0605445..40f3e8dcbdd5 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -38,23 +38,34 @@ struct drm_i915_gem_object; struct i915_request; struct i915_timeline; -struct intel_wait { - struct rb_node node; - struct task_struct *tsk; - struct i915_request *request; - u32 seqno; -}; - -struct intel_signal_node { - struct intel_wait wait; - struct list_head link; -}; - struct i915_capture_list { struct i915_capture_list *next; struct i915_vma *vma; }; +enum { + /* + * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW. + * + * Set by __i915_request_submit() on handing over to HW, and cleared + * by __i915_request_unsubmit() if we preempt this request. + * + * Finally cleared for consistency on retiring the request, when + * we know the HW is no longer running this request. + * + * See i915_request_is_active() + */ + I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS, + + /* + * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list + * + * Internal bookkeeping used by the breadcrumb code to track when + * a request is on the various signal_list. + */ + I915_FENCE_FLAG_SIGNAL, +}; + /** * Request queue structure. * @@ -97,7 +108,7 @@ struct i915_request { struct intel_context *hw_context; struct intel_ring *ring; struct i915_timeline *timeline; - struct intel_signal_node signaling; + struct list_head signal_link; /* * The rcu epoch of when this request was allocated. Used to judiciously @@ -116,7 +127,6 @@ struct i915_request { */ struct i915_sw_fence submit; wait_queue_entry_t submitq; - wait_queue_head_t execute; /* * A list of everyone we wait upon, and everyone who waits upon us. @@ -130,6 +140,13 @@ struct i915_request { struct i915_sched_node sched; struct i915_dependency dep; + /* + * A convenience pointer to the current breadcrumb value stored in + * the HW status page (or our timeline's local equivalent). The full + * path would be rq->hw_context->ring->timeline->hwsp_seqno. + */ + const u32 *hwsp_seqno; + /** * GEM sequence number associated with this request on the * global execution timeline. It is zero when the request is not @@ -248,7 +265,7 @@ i915_request_put(struct i915_request *rq) * that it has passed the global seqno and the global seqno is unchanged * after the read, it is indeed complete). */ -static u32 +static inline u32 i915_request_global_seqno(const struct i915_request *request) { return READ_ONCE(request->global_seqno); @@ -270,6 +287,10 @@ void i915_request_skip(struct i915_request *request, int error); void __i915_request_unsubmit(struct i915_request *request); void i915_request_unsubmit(struct i915_request *request); +/* Note: part of the intel_breadcrumbs family */ +bool i915_request_enable_breadcrumb(struct i915_request *request); +void i915_request_cancel_breadcrumb(struct i915_request *request); + long i915_request_wait(struct i915_request *rq, unsigned int flags, long timeout) @@ -280,441 +301,106 @@ long i915_request_wait(struct i915_request *rq, #define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ #define I915_WAIT_FOR_IDLE_BOOST BIT(4) -static inline bool intel_engine_has_started(struct intel_engine_cs *engine, - u32 seqno); -static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, - u32 seqno); - -/** - * Returns true if seq1 is later than seq2. - */ -static inline bool i915_seqno_passed(u32 seq1, u32 seq2) -{ - return (s32)(seq1 - seq2) >= 0; -} - -/** - * i915_request_started - check if the request has begun being executed - * @rq: the request - * - * Returns true if the request has been submitted to hardware, and the hardware - * has advanced passed the end of the previous request and so should be either - * currently processing the request (though it may be preempted and so - * not necessarily the next request to complete) or have completed the request. - */ -static inline bool i915_request_started(const struct i915_request *rq) -{ - u32 seqno; - - seqno = i915_request_global_seqno(rq); - if (!seqno) /* not yet submitted to HW */ - return false; - - return intel_engine_has_started(rq->engine, seqno); -} - -static inline bool -__i915_request_completed(const struct i915_request *rq, u32 seqno) -{ - GEM_BUG_ON(!seqno); - return intel_engine_has_completed(rq->engine, seqno) && - seqno == i915_request_global_seqno(rq); -} - -static inline bool i915_request_completed(const struct i915_request *rq) -{ - u32 seqno; - - seqno = i915_request_global_seqno(rq); - if (!seqno) - return false; - - return __i915_request_completed(rq, seqno); -} - -void i915_retire_requests(struct drm_i915_private *i915); - -/* - * We treat requests as fences. This is not be to confused with our - * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. - * We use the fences to synchronize access from the CPU with activity on the - * GPU, for example, we should not rewrite an object's PTE whilst the GPU - * is reading them. We also track fences at a higher level to provide - * implicit synchronisation around GEM objects, e.g. set-domain will wait - * for outstanding GPU rendering before marking the object ready for CPU - * access, or a pageflip will wait until the GPU is complete before showing - * the frame on the scanout. - * - * In order to use a fence, the object must track the fence it needs to - * serialise with. For example, GEM objects want to track both read and - * write access so that we can perform concurrent read operations between - * the CPU and GPU engines, as well as waiting for all rendering to - * complete, or waiting for the last GPU user of a "fence register". The - * object then embeds a #i915_gem_active to track the most recent (in - * retirement order) request relevant for the desired mode of access. - * The #i915_gem_active is updated with i915_gem_active_set() to track the - * most recent fence request, typically this is done as part of - * i915_vma_move_to_active(). - * - * When the #i915_gem_active completes (is retired), it will - * signal its completion to the owner through a callback as well as mark - * itself as idle (i915_gem_active.request == NULL). The owner - * can then perform any action, such as delayed freeing of an active - * resource including itself. - */ -struct i915_gem_active; - -typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, - struct i915_request *); - -struct i915_gem_active { - struct i915_request __rcu *request; - struct list_head link; - i915_gem_retire_fn retire; -}; - -void i915_gem_retire_noop(struct i915_gem_active *, - struct i915_request *request); - -/** - * init_request_active - prepares the activity tracker for use - * @active - the active tracker - * @func - a callback when then the tracker is retired (becomes idle), - * can be NULL - * - * init_request_active() prepares the embedded @active struct for use as - * an activity tracker, that is for tracking the last known active request - * associated with it. When the last request becomes idle, when it is retired - * after completion, the optional callback @func is invoked. - */ -static inline void -init_request_active(struct i915_gem_active *active, - i915_gem_retire_fn retire) +static inline bool i915_request_signaled(const struct i915_request *rq) { - RCU_INIT_POINTER(active->request, NULL); - INIT_LIST_HEAD(&active->link); - active->retire = retire ?: i915_gem_retire_noop; + /* The request may live longer than its HWSP, so check flags first! */ + return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags); } -/** - * i915_gem_active_set - updates the tracker to watch the current request - * @active - the active tracker - * @request - the request to watch - * - * i915_gem_active_set() watches the given @request for completion. Whilst - * that @request is busy, the @active reports busy. When that @request is - * retired, the @active tracker is updated to report idle. - */ -static inline void -i915_gem_active_set(struct i915_gem_active *active, - struct i915_request *request) +static inline bool i915_request_is_active(const struct i915_request *rq) { - list_move(&active->link, &request->active_list); - rcu_assign_pointer(active->request, request); + return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); } /** - * i915_gem_active_set_retire_fn - updates the retirement callback - * @active - the active tracker - * @fn - the routine called when the request is retired - * @mutex - struct_mutex used to guard retirements - * - * i915_gem_active_set_retire_fn() updates the function pointer that - * is called when the final request associated with the @active tracker - * is retired. + * Returns true if seq1 is later than seq2. */ -static inline void -i915_gem_active_set_retire_fn(struct i915_gem_active *active, - i915_gem_retire_fn fn, - struct mutex *mutex) +static inline bool i915_seqno_passed(u32 seq1, u32 seq2) { - lockdep_assert_held(mutex); - active->retire = fn ?: i915_gem_retire_noop; + return (s32)(seq1 - seq2) >= 0; } -static inline struct i915_request * -__i915_gem_active_peek(const struct i915_gem_active *active) +static inline u32 __hwsp_seqno(const struct i915_request *rq) { - /* - * Inside the error capture (running with the driver in an unknown - * state), we want to bend the rules slightly (a lot). - * - * Work is in progress to make it safer, in the meantime this keeps - * the known issue from spamming the logs. - */ - return rcu_dereference_protected(active->request, 1); + return READ_ONCE(*rq->hwsp_seqno); } /** - * i915_gem_active_raw - return the active request - * @active - the active tracker + * hwsp_seqno - the current breadcrumb value in the HW status page + * @rq: the request, to chase the relevant HW status page * - * i915_gem_active_raw() returns the current request being tracked, or NULL. - * It does not obtain a reference on the request for the caller, so the caller - * must hold struct_mutex. - */ -static inline struct i915_request * -i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex) -{ - return rcu_dereference_protected(active->request, - lockdep_is_held(mutex)); -} - -/** - * i915_gem_active_peek - report the active request being monitored - * @active - the active tracker + * The emphasis in naming here is that hwsp_seqno() is not a property of the + * request, but an indication of the current HW state (associated with this + * request). Its value will change as the GPU executes more requests. * - * i915_gem_active_peek() returns the current request being tracked if - * still active, or NULL. It does not obtain a reference on the request - * for the caller, so the caller must hold struct_mutex. + * Returns the current breadcrumb value in the associated HW status page (or + * the local timeline's equivalent) for this request. The request itself + * has the associated breadcrumb value of rq->fence.seqno, when the HW + * status page has that breadcrumb or later, this request is complete. */ -static inline struct i915_request * -i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) +static inline u32 hwsp_seqno(const struct i915_request *rq) { - struct i915_request *request; + u32 seqno; - request = i915_gem_active_raw(active, mutex); - if (!request || i915_request_completed(request)) - return NULL; + rcu_read_lock(); /* the HWSP may be freed at runtime */ + seqno = __hwsp_seqno(rq); + rcu_read_unlock(); - return request; + return seqno; } -/** - * i915_gem_active_get - return a reference to the active request - * @active - the active tracker - * - * i915_gem_active_get() returns a reference to the active request, or NULL - * if the active tracker is idle. The caller must hold struct_mutex. - */ -static inline struct i915_request * -i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) +static inline bool __i915_request_has_started(const struct i915_request *rq) { - return i915_request_get(i915_gem_active_peek(active, mutex)); + return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); } /** - * __i915_gem_active_get_rcu - return a reference to the active request - * @active - the active tracker - * - * __i915_gem_active_get() returns a reference to the active request, or NULL - * if the active tracker is idle. The caller must hold the RCU read lock, but - * the returned pointer is safe to use outside of RCU. - */ -static inline struct i915_request * -__i915_gem_active_get_rcu(const struct i915_gem_active *active) -{ - /* - * Performing a lockless retrieval of the active request is super - * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing - * slab of request objects will not be freed whilst we hold the - * RCU read lock. It does not guarantee that the request itself - * will not be freed and then *reused*. Viz, - * - * Thread A Thread B - * - * rq = active.request - * retire(rq) -> free(rq); - * (rq is now first on the slab freelist) - * active.request = NULL - * - * rq = new submission on a new object - * ref(rq) - * - * To prevent the request from being reused whilst the caller - * uses it, we take a reference like normal. Whilst acquiring - * the reference we check that it is not in a destroyed state - * (refcnt == 0). That prevents the request being reallocated - * whilst the caller holds on to it. To check that the request - * was not reallocated as we acquired the reference we have to - * check that our request remains the active request across - * the lookup, in the same manner as a seqlock. The visibility - * of the pointer versus the reference counting is controlled - * by using RCU barriers (rcu_dereference and rcu_assign_pointer). - * - * In the middle of all that, we inspect whether the request is - * complete. Retiring is lazy so the request may be completed long - * before the active tracker is updated. Querying whether the - * request is complete is far cheaper (as it involves no locked - * instructions setting cachelines to exclusive) than acquiring - * the reference, so we do it first. The RCU read lock ensures the - * pointer dereference is valid, but does not ensure that the - * seqno nor HWS is the right one! However, if the request was - * reallocated, that means the active tracker's request was complete. - * If the new request is also complete, then both are and we can - * just report the active tracker is idle. If the new request is - * incomplete, then we acquire a reference on it and check that - * it remained the active request. - * - * It is then imperative that we do not zero the request on - * reallocation, so that we can chase the dangling pointers! - * See i915_request_alloc(). - */ - do { - struct i915_request *request; - - request = rcu_dereference(active->request); - if (!request || i915_request_completed(request)) - return NULL; - - /* - * An especially silly compiler could decide to recompute the - * result of i915_request_completed, more specifically - * re-emit the load for request->fence.seqno. A race would catch - * a later seqno value, which could flip the result from true to - * false. Which means part of the instructions below might not - * be executed, while later on instructions are executed. Due to - * barriers within the refcounting the inconsistency can't reach - * past the call to i915_request_get_rcu, but not executing - * that while still executing i915_request_put() creates - * havoc enough. Prevent this with a compiler barrier. - */ - barrier(); - - request = i915_request_get_rcu(request); - - /* - * What stops the following rcu_access_pointer() from occurring - * before the above i915_request_get_rcu()? If we were - * to read the value before pausing to get the reference to - * the request, we may not notice a change in the active - * tracker. - * - * The rcu_access_pointer() is a mere compiler barrier, which - * means both the CPU and compiler are free to perform the - * memory read without constraint. The compiler only has to - * ensure that any operations after the rcu_access_pointer() - * occur afterwards in program order. This means the read may - * be performed earlier by an out-of-order CPU, or adventurous - * compiler. - * - * The atomic operation at the heart of - * i915_request_get_rcu(), see dma_fence_get_rcu(), is - * atomic_inc_not_zero() which is only a full memory barrier - * when successful. That is, if i915_request_get_rcu() - * returns the request (and so with the reference counted - * incremented) then the following read for rcu_access_pointer() - * must occur after the atomic operation and so confirm - * that this request is the one currently being tracked. - * - * The corresponding write barrier is part of - * rcu_assign_pointer(). - */ - if (!request || request == rcu_access_pointer(active->request)) - return rcu_pointer_handoff(request); - - i915_request_put(request); - } while (1); -} - -/** - * i915_gem_active_get_unlocked - return a reference to the active request - * @active - the active tracker - * - * i915_gem_active_get_unlocked() returns a reference to the active request, - * or NULL if the active tracker is idle. The reference is obtained under RCU, - * so no locking is required by the caller. + * i915_request_started - check if the request has begun being executed + * @rq: the request * - * The reference should be freed with i915_request_put(). + * Returns true if the request has been submitted to hardware, and the hardware + * has advanced passed the end of the previous request and so should be either + * currently processing the request (though it may be preempted and so + * not necessarily the next request to complete) or have completed the request. */ -static inline struct i915_request * -i915_gem_active_get_unlocked(const struct i915_gem_active *active) +static inline bool i915_request_started(const struct i915_request *rq) { - struct i915_request *request; + if (i915_request_signaled(rq)) + return true; - rcu_read_lock(); - request = __i915_gem_active_get_rcu(active); - rcu_read_unlock(); - - return request; + /* Remember: started but may have since been preempted! */ + return __i915_request_has_started(rq); } /** - * i915_gem_active_isset - report whether the active tracker is assigned - * @active - the active tracker + * i915_request_is_running - check if the request may actually be executing + * @rq: the request * - * i915_gem_active_isset() returns true if the active tracker is currently - * assigned to a request. Due to the lazy retiring, that request may be idle - * and this may report stale information. + * Returns true if the request is currently submitted to hardware, has passed + * its start point (i.e. the context is setup and not busywaiting). Note that + * it may no longer be running by the time the function returns! */ -static inline bool -i915_gem_active_isset(const struct i915_gem_active *active) +static inline bool i915_request_is_running(const struct i915_request *rq) { - return rcu_access_pointer(active->request); + if (!i915_request_is_active(rq)) + return false; + + return __i915_request_has_started(rq); } -/** - * i915_gem_active_wait - waits until the request is completed - * @active - the active request on which to wait - * @flags - how to wait - * @timeout - how long to wait at most - * @rps - userspace client to charge for a waitboost - * - * i915_gem_active_wait() waits until the request is completed before - * returning, without requiring any locks to be held. Note that it does not - * retire any requests before returning. - * - * This function relies on RCU in order to acquire the reference to the active - * request without holding any locks. See __i915_gem_active_get_rcu() for the - * glory details on how that is managed. Once the reference is acquired, we - * can then wait upon the request, and afterwards release our reference, - * free of any locking. - * - * This function wraps i915_request_wait(), see it for the full details on - * the arguments. - * - * Returns 0 if successful, or a negative error code. - */ -static inline int -i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags) +static inline bool i915_request_completed(const struct i915_request *rq) { - struct i915_request *request; - long ret = 0; - - request = i915_gem_active_get_unlocked(active); - if (request) { - ret = i915_request_wait(request, flags, MAX_SCHEDULE_TIMEOUT); - i915_request_put(request); - } + if (i915_request_signaled(rq)) + return true; - return ret < 0 ? ret : 0; + return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno); } -/** - * i915_gem_active_retire - waits until the request is retired - * @active - the active request on which to wait - * - * i915_gem_active_retire() waits until the request is completed, - * and then ensures that at least the retirement handler for this - * @active tracker is called before returning. If the @active - * tracker is idle, the function returns immediately. - */ -static inline int __must_check -i915_gem_active_retire(struct i915_gem_active *active, - struct mutex *mutex) +static inline void i915_request_mark_complete(struct i915_request *rq) { - struct i915_request *request; - long ret; - - request = i915_gem_active_raw(active, mutex); - if (!request) - return 0; - - ret = i915_request_wait(request, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (ret < 0) - return ret; - - list_del_init(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, request); - - return 0; + rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */ } -#define for_each_active(mask, idx) \ - for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) +void i915_retire_requests(struct drm_i915_private *i915); #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c new file mode 100644 index 000000000000..0e0ddf2e6815 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -0,0 +1,1349 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008-2018 Intel Corporation + */ + +#include <linux/sched/mm.h> +#include <linux/stop_machine.h> + +#include "i915_drv.h" +#include "i915_gpu_error.h" +#include "i915_reset.h" + +#include "intel_guc.h" + +#define RESET_MAX_RETRIES 3 + +/* XXX How to handle concurrent GGTT updates using tiling registers? */ +#define RESET_UNDER_STOP_MACHINE 0 + +static void engine_skip_context(struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_gem_context *hung_ctx = rq->gem_context; + struct i915_timeline *timeline = rq->timeline; + + lockdep_assert_held(&engine->timeline.lock); + GEM_BUG_ON(timeline == &engine->timeline); + + spin_lock(&timeline->lock); + + if (i915_request_is_active(rq)) { + list_for_each_entry_continue(rq, + &engine->timeline.requests, link) + if (rq->gem_context == hung_ctx) + i915_request_skip(rq, -EIO); + } + + list_for_each_entry(rq, &timeline->requests, link) + i915_request_skip(rq, -EIO); + + spin_unlock(&timeline->lock); +} + +static void client_mark_guilty(struct drm_i915_file_private *file_priv, + const struct i915_gem_context *ctx) +{ + unsigned int score; + unsigned long prev_hang; + + if (i915_gem_context_is_banned(ctx)) + score = I915_CLIENT_SCORE_CONTEXT_BAN; + else + score = 0; + + prev_hang = xchg(&file_priv->hang_timestamp, jiffies); + if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) + score += I915_CLIENT_SCORE_HANG_FAST; + + if (score) { + atomic_add(score, &file_priv->ban_score); + + DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", + ctx->name, score, + atomic_read(&file_priv->ban_score)); + } +} + +static bool context_mark_guilty(struct i915_gem_context *ctx) +{ + unsigned int score; + bool banned, bannable; + + atomic_inc(&ctx->guilty_count); + + bannable = i915_gem_context_is_bannable(ctx); + score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); + banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; + + /* Cool contexts don't accumulate client ban score */ + if (!bannable) + return false; + + if (banned) { + DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", + ctx->name, atomic_read(&ctx->guilty_count), + score); + i915_gem_context_set_banned(ctx); + } + + if (!IS_ERR_OR_NULL(ctx->file_priv)) + client_mark_guilty(ctx->file_priv, ctx); + + return banned; +} + +static void context_mark_innocent(struct i915_gem_context *ctx) +{ + atomic_inc(&ctx->active_count); +} + +void i915_reset_request(struct i915_request *rq, bool guilty) +{ + lockdep_assert_held(&rq->engine->timeline.lock); + GEM_BUG_ON(i915_request_completed(rq)); + + if (guilty) { + i915_request_skip(rq, -EIO); + if (context_mark_guilty(rq->gem_context)) + engine_skip_context(rq); + } else { + dma_fence_set_error(&rq->fence, -EAGAIN); + context_mark_innocent(rq->gem_context); + } +} + +static void gen3_stop_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + const u32 base = engine->mmio_base; + + if (intel_engine_stop_cs(engine)) + DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name); + + I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base))); + POSTING_READ_FW(RING_HEAD(base)); /* paranoia */ + + I915_WRITE_FW(RING_HEAD(base), 0); + I915_WRITE_FW(RING_TAIL(base), 0); + POSTING_READ_FW(RING_TAIL(base)); + + /* The ring must be empty before it is disabled */ + I915_WRITE_FW(RING_CTL(base), 0); + + /* Check acts as a post */ + if (I915_READ_FW(RING_HEAD(base)) != 0) + DRM_DEBUG_DRIVER("%s: ring head not parked\n", + engine->name); +} + +static void i915_stop_engines(struct drm_i915_private *i915, + unsigned int engine_mask) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (INTEL_GEN(i915) < 3) + return; + + for_each_engine_masked(engine, i915, engine_mask, id) + gen3_stop_engine(engine); +} + +static bool i915_in_reset(struct pci_dev *pdev) +{ + u8 gdrst; + + pci_read_config_byte(pdev, I915_GDRST, &gdrst); + return gdrst & GRDOM_RESET_STATUS; +} + +static int i915_do_reset(struct drm_i915_private *i915, + unsigned int engine_mask, + unsigned int retry) +{ + struct pci_dev *pdev = i915->drm.pdev; + int err; + + /* Assert reset for at least 20 usec, and wait for acknowledgement. */ + pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); + udelay(50); + err = wait_for_atomic(i915_in_reset(pdev), 50); + + /* Clear the reset request. */ + pci_write_config_byte(pdev, I915_GDRST, 0); + udelay(50); + if (!err) + err = wait_for_atomic(!i915_in_reset(pdev), 50); + + return err; +} + +static bool g4x_reset_complete(struct pci_dev *pdev) +{ + u8 gdrst; + + pci_read_config_byte(pdev, I915_GDRST, &gdrst); + return (gdrst & GRDOM_RESET_ENABLE) == 0; +} + +static int g33_do_reset(struct drm_i915_private *i915, + unsigned int engine_mask, + unsigned int retry) +{ + struct pci_dev *pdev = i915->drm.pdev; + + pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); + return wait_for_atomic(g4x_reset_complete(pdev), 50); +} + +static int g4x_do_reset(struct drm_i915_private *dev_priv, + unsigned int engine_mask, + unsigned int retry) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + int ret; + + /* WaVcpClkGateDisableForMediaReset:ctg,elk */ + I915_WRITE_FW(VDECCLK_GATE_D, + I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); + POSTING_READ_FW(VDECCLK_GATE_D); + + pci_write_config_byte(pdev, I915_GDRST, + GRDOM_MEDIA | GRDOM_RESET_ENABLE); + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); + if (ret) { + DRM_DEBUG_DRIVER("Wait for media reset failed\n"); + goto out; + } + + pci_write_config_byte(pdev, I915_GDRST, + GRDOM_RENDER | GRDOM_RESET_ENABLE); + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); + if (ret) { + DRM_DEBUG_DRIVER("Wait for render reset failed\n"); + goto out; + } + +out: + pci_write_config_byte(pdev, I915_GDRST, 0); + + I915_WRITE_FW(VDECCLK_GATE_D, + I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); + POSTING_READ_FW(VDECCLK_GATE_D); + + return ret; +} + +static int ironlake_do_reset(struct drm_i915_private *dev_priv, + unsigned int engine_mask, + unsigned int retry) +{ + int ret; + + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, + ILK_GRDOM_RESET_ENABLE, 0, + 5000, 0, + NULL); + if (ret) { + DRM_DEBUG_DRIVER("Wait for render reset failed\n"); + goto out; + } + + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, + ILK_GRDOM_RESET_ENABLE, 0, + 5000, 0, + NULL); + if (ret) { + DRM_DEBUG_DRIVER("Wait for media reset failed\n"); + goto out; + } + +out: + I915_WRITE_FW(ILK_GDSR, 0); + POSTING_READ_FW(ILK_GDSR); + return ret; +} + +/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ +static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv, + u32 hw_domain_mask) +{ + int err; + + /* + * GEN6_GDRST is not in the gt power well, no need to check + * for fifo space for the write or forcewake the chip for + * the read + */ + I915_WRITE_FW(GEN6_GDRST, hw_domain_mask); + + /* Wait for the device to ack the reset requests */ + err = __intel_wait_for_register_fw(dev_priv, + GEN6_GDRST, hw_domain_mask, 0, + 500, 0, + NULL); + if (err) + DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n", + hw_domain_mask); + + return err; +} + +static int gen6_reset_engines(struct drm_i915_private *i915, + unsigned int engine_mask, + unsigned int retry) +{ + struct intel_engine_cs *engine; + const u32 hw_engine_mask[I915_NUM_ENGINES] = { + [RCS] = GEN6_GRDOM_RENDER, + [BCS] = GEN6_GRDOM_BLT, + [VCS] = GEN6_GRDOM_MEDIA, + [VCS2] = GEN8_GRDOM_MEDIA2, + [VECS] = GEN6_GRDOM_VECS, + }; + u32 hw_mask; + + if (engine_mask == ALL_ENGINES) { + hw_mask = GEN6_GRDOM_FULL; + } else { + unsigned int tmp; + + hw_mask = 0; + for_each_engine_masked(engine, i915, engine_mask, tmp) + hw_mask |= hw_engine_mask[engine->id]; + } + + return gen6_hw_domain_reset(i915, hw_mask); +} + +static u32 gen11_lock_sfc(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access; + i915_reg_t sfc_forced_lock, sfc_forced_lock_ack; + u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit; + i915_reg_t sfc_usage; + u32 sfc_usage_bit; + u32 sfc_reset_bit; + + switch (engine->class) { + case VIDEO_DECODE_CLASS: + if ((BIT(engine->instance) & vdbox_sfc_access) == 0) + return 0; + + sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); + sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; + + sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine); + sfc_forced_lock_ack_bit = GEN11_VCS_SFC_LOCK_ACK_BIT; + + sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine); + sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT; + sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance); + break; + + case VIDEO_ENHANCEMENT_CLASS: + sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); + sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; + + sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine); + sfc_forced_lock_ack_bit = GEN11_VECS_SFC_LOCK_ACK_BIT; + + sfc_usage = GEN11_VECS_SFC_USAGE(engine); + sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT; + sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance); + break; + + default: + return 0; + } + + /* + * Tell the engine that a software reset is going to happen. The engine + * will then try to force lock the SFC (if currently locked, it will + * remain so until we tell the engine it is safe to unlock; if currently + * unlocked, it will ignore this and all new lock requests). If SFC + * ends up being locked to the engine we want to reset, we have to reset + * it as well (we will unlock it once the reset sequence is completed). + */ + I915_WRITE_FW(sfc_forced_lock, + I915_READ_FW(sfc_forced_lock) | sfc_forced_lock_bit); + + if (__intel_wait_for_register_fw(dev_priv, + sfc_forced_lock_ack, + sfc_forced_lock_ack_bit, + sfc_forced_lock_ack_bit, + 1000, 0, NULL)) { + DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); + return 0; + } + + if (I915_READ_FW(sfc_usage) & sfc_usage_bit) + return sfc_reset_bit; + + return 0; +} + +static void gen11_unlock_sfc(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access; + i915_reg_t sfc_forced_lock; + u32 sfc_forced_lock_bit; + + switch (engine->class) { + case VIDEO_DECODE_CLASS: + if ((BIT(engine->instance) & vdbox_sfc_access) == 0) + return; + + sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); + sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; + break; + + case VIDEO_ENHANCEMENT_CLASS: + sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); + sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; + break; + + default: + return; + } + + I915_WRITE_FW(sfc_forced_lock, + I915_READ_FW(sfc_forced_lock) & ~sfc_forced_lock_bit); +} + +static int gen11_reset_engines(struct drm_i915_private *i915, + unsigned int engine_mask, + unsigned int retry) +{ + const u32 hw_engine_mask[I915_NUM_ENGINES] = { + [RCS] = GEN11_GRDOM_RENDER, + [BCS] = GEN11_GRDOM_BLT, + [VCS] = GEN11_GRDOM_MEDIA, + [VCS2] = GEN11_GRDOM_MEDIA2, + [VCS3] = GEN11_GRDOM_MEDIA3, + [VCS4] = GEN11_GRDOM_MEDIA4, + [VECS] = GEN11_GRDOM_VECS, + [VECS2] = GEN11_GRDOM_VECS2, + }; + struct intel_engine_cs *engine; + unsigned int tmp; + u32 hw_mask; + int ret; + + BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES); + + if (engine_mask == ALL_ENGINES) { + hw_mask = GEN11_GRDOM_FULL; + } else { + hw_mask = 0; + for_each_engine_masked(engine, i915, engine_mask, tmp) { + hw_mask |= hw_engine_mask[engine->id]; + hw_mask |= gen11_lock_sfc(i915, engine); + } + } + + ret = gen6_hw_domain_reset(i915, hw_mask); + + if (engine_mask != ALL_ENGINES) + for_each_engine_masked(engine, i915, engine_mask, tmp) + gen11_unlock_sfc(i915, engine); + + return ret; +} + +static int gen8_engine_reset_prepare(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base), + _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET)); + + ret = __intel_wait_for_register_fw(dev_priv, + RING_RESET_CTL(engine->mmio_base), + RESET_CTL_READY_TO_RESET, + RESET_CTL_READY_TO_RESET, + 700, 0, + NULL); + if (ret) + DRM_ERROR("%s: reset request timeout\n", engine->name); + + return ret; +} + +static void gen8_engine_reset_cancel(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base), + _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)); +} + +static int gen8_reset_engines(struct drm_i915_private *i915, + unsigned int engine_mask, + unsigned int retry) +{ + struct intel_engine_cs *engine; + const bool reset_non_ready = retry >= 1; + unsigned int tmp; + int ret; + + for_each_engine_masked(engine, i915, engine_mask, tmp) { + ret = gen8_engine_reset_prepare(engine); + if (ret && !reset_non_ready) + goto skip_reset; + + /* + * If this is not the first failed attempt to prepare, + * we decide to proceed anyway. + * + * By doing so we risk context corruption and with + * some gens (kbl), possible system hang if reset + * happens during active bb execution. + * + * We rather take context corruption instead of + * failed reset with a wedged driver/gpu. And + * active bb execution case should be covered by + * i915_stop_engines we have before the reset. + */ + } + + if (INTEL_GEN(i915) >= 11) + ret = gen11_reset_engines(i915, engine_mask, retry); + else + ret = gen6_reset_engines(i915, engine_mask, retry); + +skip_reset: + for_each_engine_masked(engine, i915, engine_mask, tmp) + gen8_engine_reset_cancel(engine); + + return ret; +} + +typedef int (*reset_func)(struct drm_i915_private *, + unsigned int engine_mask, + unsigned int retry); + +static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) +{ + if (!i915_modparams.reset) + return NULL; + + if (INTEL_GEN(i915) >= 8) + return gen8_reset_engines; + else if (INTEL_GEN(i915) >= 6) + return gen6_reset_engines; + else if (INTEL_GEN(i915) >= 5) + return ironlake_do_reset; + else if (IS_G4X(i915)) + return g4x_do_reset; + else if (IS_G33(i915) || IS_PINEVIEW(i915)) + return g33_do_reset; + else if (INTEL_GEN(i915) >= 3) + return i915_do_reset; + else + return NULL; +} + +int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) +{ + const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; + reset_func reset; + int ret = -ETIMEDOUT; + int retry; + + reset = intel_get_gpu_reset(i915); + if (!reset) + return -ENODEV; + + /* + * If the power well sleeps during the reset, the reset + * request may be dropped and never completes (causing -EIO). + */ + intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); + for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { + /* + * We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) + * + * WaMediaResetMainRingCleanup:ctg,elk (presumably) + * + * FIXME: Wa for more modern gens needs to be validated + */ + i915_stop_engines(i915, engine_mask); + + GEM_TRACE("engine_mask=%x\n", engine_mask); + preempt_disable(); + ret = reset(i915, engine_mask, retry); + preempt_enable(); + } + intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); + + return ret; +} + +bool intel_has_gpu_reset(struct drm_i915_private *i915) +{ + if (USES_GUC(i915)) + return false; + + return intel_get_gpu_reset(i915); +} + +bool intel_has_reset_engine(struct drm_i915_private *i915) +{ + return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2; +} + +int intel_reset_guc(struct drm_i915_private *i915) +{ + u32 guc_domain = + INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; + int ret; + + GEM_BUG_ON(!HAS_GUC(i915)); + + intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); + ret = gen6_hw_domain_reset(i915, guc_domain); + intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); + + return ret; +} + +/* + * Ensure irq handler finishes, and not run again. + * Also return the active request so that we only search for it once. + */ +static void reset_prepare_engine(struct intel_engine_cs *engine) +{ + /* + * During the reset sequence, we must prevent the engine from + * entering RC6. As the context state is undefined until we restart + * the engine, if it does enter RC6 during the reset, the state + * written to the powercontext is undefined and so we may lose + * GPU state upon resume, i.e. fail to restart after a reset. + */ + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); + engine->reset.prepare(engine); +} + +static void reset_prepare(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + reset_prepare_engine(engine); + + intel_uc_sanitize(i915); +} + +static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + /* + * Everything depends on having the GTT running, so we need to start + * there. + */ + err = i915_ggtt_enable_hw(i915); + if (err) + return err; + + for_each_engine(engine, i915, id) + intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id)); + + i915_gem_restore_fences(i915); + + return err; +} + +static void reset_finish_engine(struct intel_engine_cs *engine) +{ + engine->reset.finish(engine); + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); +} + +struct i915_gpu_restart { + struct work_struct work; + struct drm_i915_private *i915; +}; + +static void restart_work(struct work_struct *work) +{ + struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work); + struct drm_i915_private *i915 = arg->i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(i915); + mutex_lock(&i915->drm.struct_mutex); + WRITE_ONCE(i915->gpu_error.restart, NULL); + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + /* + * Ostensibily, we always want a context loaded for powersaving, + * so if the engine is idle after the reset, send a request + * to load our scratch kernel_context. + */ + if (!intel_engine_is_idle(engine)) + continue; + + rq = i915_request_alloc(engine, i915->kernel_context); + if (!IS_ERR(rq)) + i915_request_add(rq); + } + + mutex_unlock(&i915->drm.struct_mutex); + intel_runtime_pm_put(i915, wakeref); + + kfree(arg); +} + +static void reset_finish(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + reset_finish_engine(engine); +} + +static void reset_restart(struct drm_i915_private *i915) +{ + struct i915_gpu_restart *arg; + + /* + * Following the reset, ensure that we always reload context for + * powersaving, and to correct engine->last_retired_context. Since + * this requires us to submit a request, queue a worker to do that + * task for us to evade any locking here. + */ + if (READ_ONCE(i915->gpu_error.restart)) + return; + + arg = kmalloc(sizeof(*arg), GFP_KERNEL); + if (arg) { + arg->i915 = i915; + INIT_WORK(&arg->work, restart_work); + + WRITE_ONCE(i915->gpu_error.restart, arg); + queue_work(i915->wq, &arg->work); + } +} + +static void nop_submit_request(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + GEM_TRACE("%s fence %llx:%lld -> -EIO\n", + engine->name, request->fence.context, request->fence.seqno); + dma_fence_set_error(&request->fence, -EIO); + + spin_lock_irqsave(&engine->timeline.lock, flags); + __i915_request_submit(request); + i915_request_mark_complete(request); + intel_engine_write_global_seqno(engine, request->global_seqno); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + intel_engine_queue_breadcrumbs(engine); +} + +void i915_gem_set_wedged(struct drm_i915_private *i915) +{ + struct i915_gpu_error *error = &i915->gpu_error; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + mutex_lock(&error->wedge_mutex); + if (test_bit(I915_WEDGED, &error->flags)) { + mutex_unlock(&error->wedge_mutex); + return; + } + + if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) { + struct drm_printer p = drm_debug_printer(__func__); + + for_each_engine(engine, i915, id) + intel_engine_dump(engine, &p, "%s\n", engine->name); + } + + GEM_TRACE("start\n"); + + /* + * First, stop submission to hw, but do not yet complete requests by + * rolling the global seqno forward (since this would complete requests + * for which we haven't set the fence error to EIO yet). + */ + for_each_engine(engine, i915, id) + reset_prepare_engine(engine); + + /* Even if the GPU reset fails, it should still stop the engines */ + if (INTEL_GEN(i915) >= 5) + intel_gpu_reset(i915, ALL_ENGINES); + + for_each_engine(engine, i915, id) { + engine->submit_request = nop_submit_request; + engine->schedule = NULL; + } + i915->caps.scheduler = 0; + + /* + * Make sure no request can slip through without getting completed by + * either this call here to intel_engine_write_global_seqno, or the one + * in nop_submit_request. + */ + synchronize_rcu(); + + /* Mark all executing requests as skipped */ + for_each_engine(engine, i915, id) + engine->cancel_requests(engine); + + for_each_engine(engine, i915, id) { + reset_finish_engine(engine); + intel_engine_signal_breadcrumbs(engine); + } + + smp_mb__before_atomic(); + set_bit(I915_WEDGED, &error->flags); + + GEM_TRACE("end\n"); + mutex_unlock(&error->wedge_mutex); + + wake_up_all(&error->reset_queue); +} + +bool i915_gem_unset_wedged(struct drm_i915_private *i915) +{ + struct i915_gpu_error *error = &i915->gpu_error; + struct i915_timeline *tl; + bool ret = false; + + if (!test_bit(I915_WEDGED, &error->flags)) + return true; + + if (!i915->gt.scratch) /* Never full initialised, recovery impossible */ + return false; + + mutex_lock(&error->wedge_mutex); + + GEM_TRACE("start\n"); + + /* + * Before unwedging, make sure that all pending operations + * are flushed and errored out - we may have requests waiting upon + * third party fences. We marked all inflight requests as EIO, and + * every execbuf since returned EIO, for consistency we want all + * the currently pending requests to also be marked as EIO, which + * is done inside our nop_submit_request - and so we must wait. + * + * No more can be submitted until we reset the wedged bit. + */ + mutex_lock(&i915->gt.timelines.mutex); + list_for_each_entry(tl, &i915->gt.timelines.active_list, link) { + struct i915_request *rq; + long timeout; + + rq = i915_active_request_get_unlocked(&tl->last_request); + if (!rq) + continue; + + /* + * We can't use our normal waiter as we want to + * avoid recursively trying to handle the current + * reset. The basic dma_fence_default_wait() installs + * a callback for dma_fence_signal(), which is + * triggered by our nop handler (indirectly, the + * callback enables the signaler thread which is + * woken by the nop_submit_request() advancing the seqno + * and when the seqno passes the fence, the signaler + * then signals the fence waking us up). + */ + timeout = dma_fence_default_wait(&rq->fence, true, + MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + if (timeout < 0) { + mutex_unlock(&i915->gt.timelines.mutex); + goto unlock; + } + } + mutex_unlock(&i915->gt.timelines.mutex); + + intel_engines_sanitize(i915, false); + + /* + * Undo nop_submit_request. We prevent all new i915 requests from + * being queued (by disallowing execbuf whilst wedged) so having + * waited for all active requests above, we know the system is idle + * and do not have to worry about a thread being inside + * engine->submit_request() as we swap over. So unlike installing + * the nop_submit_request on reset, we can do this from normal + * context and do not require stop_machine(). + */ + intel_engines_reset_default_submission(i915); + + GEM_TRACE("end\n"); + + smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ + clear_bit(I915_WEDGED, &i915->gpu_error.flags); + ret = true; +unlock: + mutex_unlock(&i915->gpu_error.wedge_mutex); + + return ret; +} + +struct __i915_reset { + struct drm_i915_private *i915; + unsigned int stalled_mask; +}; + +static int __i915_reset__BKL(void *data) +{ + struct __i915_reset *arg = data; + int err; + + err = intel_gpu_reset(arg->i915, ALL_ENGINES); + if (err) + return err; + + return gt_reset(arg->i915, arg->stalled_mask); +} + +#if RESET_UNDER_STOP_MACHINE +/* + * XXX An alternative to using stop_machine would be to park only the + * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP) + * we should be able to prevent their memmory accesses via the lost fence + * registers over the course of the reset without the potential recursive + * of mutexes between the pagefault handler and reset. + * + * See igt/gem_mmap_gtt/hang + */ +#define __do_reset(fn, arg) stop_machine(fn, arg, NULL) +#else +#define __do_reset(fn, arg) fn(arg) +#endif + +static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask) +{ + struct __i915_reset arg = { i915, stalled_mask }; + int err, i; + + err = __do_reset(__i915_reset__BKL, &arg); + for (i = 0; err && i < RESET_MAX_RETRIES; i++) { + msleep(100); + err = __do_reset(__i915_reset__BKL, &arg); + } + + return err; +} + +/** + * i915_reset - reset chip after a hang + * @i915: #drm_i915_private to reset + * @stalled_mask: mask of the stalled engines with the guilty requests + * @reason: user error message for why we are resetting + * + * Reset the chip. Useful if a hang is detected. Marks the device as wedged + * on failure. + * + * Caller must hold the struct_mutex. + * + * Procedure is fairly simple: + * - reset the chip using the reset reg + * - re-init context state + * - re-init hardware status page + * - re-init ring buffer + * - re-init interrupt state + * - re-init display + */ +void i915_reset(struct drm_i915_private *i915, + unsigned int stalled_mask, + const char *reason) +{ + struct i915_gpu_error *error = &i915->gpu_error; + int ret; + + GEM_TRACE("flags=%lx\n", error->flags); + + might_sleep(); + assert_rpm_wakelock_held(i915); + GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); + + /* Clear any previous failed attempts at recovery. Time to try again. */ + if (!i915_gem_unset_wedged(i915)) + return; + + if (reason) + dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); + error->reset_count++; + + reset_prepare(i915); + + if (!intel_has_gpu_reset(i915)) { + if (i915_modparams.reset) + dev_err(i915->drm.dev, "GPU reset not supported\n"); + else + DRM_DEBUG_DRIVER("GPU reset disabled\n"); + goto error; + } + + if (do_reset(i915, stalled_mask)) { + dev_err(i915->drm.dev, "Failed to reset chip\n"); + goto taint; + } + + intel_overlay_reset(i915); + + /* + * Next we need to restore the context, but we don't use those + * yet either... + * + * Ring buffer needs to be re-initialized in the KMS case, or if X + * was running at the time of the reset (i.e. we weren't VT + * switched away). + */ + ret = i915_gem_init_hw(i915); + if (ret) { + DRM_ERROR("Failed to initialise HW following reset (%d)\n", + ret); + goto error; + } + + i915_queue_hangcheck(i915); + +finish: + reset_finish(i915); + if (!i915_terminally_wedged(error)) + reset_restart(i915); + return; + +taint: + /* + * History tells us that if we cannot reset the GPU now, we + * never will. This then impacts everything that is run + * subsequently. On failing the reset, we mark the driver + * as wedged, preventing further execution on the GPU. + * We also want to go one step further and add a taint to the + * kernel so that any subsequent faults can be traced back to + * this failure. This is important for CI, where if the + * GPU/driver fails we would like to reboot and restart testing + * rather than continue on into oblivion. For everyone else, + * the system should still plod along, but they have been warned! + */ + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +error: + i915_gem_set_wedged(i915); + goto finish; +} + +static inline int intel_gt_reset_engine(struct drm_i915_private *i915, + struct intel_engine_cs *engine) +{ + return intel_gpu_reset(i915, intel_engine_flag(engine)); +} + +/** + * i915_reset_engine - reset GPU engine to recover from a hang + * @engine: engine to reset + * @msg: reason for GPU reset; or NULL for no dev_notice() + * + * Reset a specific GPU engine. Useful if a hang is detected. + * Returns zero on successful reset or otherwise an error code. + * + * Procedure is: + * - identifies the request that caused the hang and it is dropped + * - reset engine (which will force the engine to idle) + * - re-init/configure engine + */ +int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) +{ + struct i915_gpu_error *error = &engine->i915->gpu_error; + int ret; + + GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); + GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); + + reset_prepare_engine(engine); + + if (msg) + dev_notice(engine->i915->drm.dev, + "Resetting %s for %s\n", engine->name, msg); + error->reset_engine_count[engine->id]++; + + if (!engine->i915->guc.execbuf_client) + ret = intel_gt_reset_engine(engine->i915, engine); + else + ret = intel_guc_reset_engine(&engine->i915->guc, engine); + if (ret) { + /* If we fail here, we expect to fallback to a global reset */ + DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", + engine->i915->guc.execbuf_client ? "GuC " : "", + engine->name, ret); + goto out; + } + + /* + * The request that caused the hang is stuck on elsp, we know the + * active request and can drop it, adjust head to skip the offending + * request to resume executing remaining requests in the queue. + */ + intel_engine_reset(engine, true); + + /* + * The engine and its registers (and workarounds in case of render) + * have been reset to their default values. Follow the init_ring + * process to program RING_MODE, HWSP and re-enable submission. + */ + ret = engine->init_hw(engine); + if (ret) + goto out; + +out: + intel_engine_cancel_stop_cs(engine); + reset_finish_engine(engine); + return ret; +} + +static void i915_reset_device(struct drm_i915_private *i915, + u32 engine_mask, + const char *reason) +{ + struct i915_gpu_error *error = &i915->gpu_error; + struct kobject *kobj = &i915->drm.primary->kdev->kobj; + char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; + char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; + char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; + struct i915_wedge_me w; + + kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); + + DRM_DEBUG_DRIVER("resetting chip\n"); + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); + + /* Use a watchdog to ensure that our reset completes */ + i915_wedge_on_timeout(&w, i915, 5 * HZ) { + intel_prepare_reset(i915); + + i915_reset(i915, engine_mask, reason); + + intel_finish_reset(i915); + } + + if (!test_bit(I915_WEDGED, &error->flags)) + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); +} + +void i915_clear_error_registers(struct drm_i915_private *dev_priv) +{ + u32 eir; + + if (!IS_GEN(dev_priv, 2)) + I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER)); + + if (INTEL_GEN(dev_priv) < 4) + I915_WRITE(IPEIR, I915_READ(IPEIR)); + else + I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965)); + + I915_WRITE(EIR, I915_READ(EIR)); + eir = I915_READ(EIR); + if (eir) { + /* + * some errors might have become stuck, + * mask them. + */ + DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); + I915_WRITE(EMR, I915_READ(EMR) | eir); + I915_WRITE(IIR, I915_MASTER_ERROR_INTERRUPT); + } + + if (INTEL_GEN(dev_priv) >= 8) { + I915_WRITE(GEN8_RING_FAULT_REG, + I915_READ(GEN8_RING_FAULT_REG) & ~RING_FAULT_VALID); + POSTING_READ(GEN8_RING_FAULT_REG); + } else if (INTEL_GEN(dev_priv) >= 6) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) { + I915_WRITE(RING_FAULT_REG(engine), + I915_READ(RING_FAULT_REG(engine)) & + ~RING_FAULT_VALID); + } + POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); + } +} + +/** + * i915_handle_error - handle a gpu error + * @i915: i915 device private + * @engine_mask: mask representing engines that are hung + * @flags: control flags + * @fmt: Error message format string + * + * Do some basic checking of register state at error time and + * dump it to the syslog. Also call i915_capture_error_state() to make + * sure we get a record and make it available in debugfs. Fire a uevent + * so userspace knows something bad happened (should trigger collection + * of a ring dump etc.). + */ +void i915_handle_error(struct drm_i915_private *i915, + u32 engine_mask, + unsigned long flags, + const char *fmt, ...) +{ + struct intel_engine_cs *engine; + intel_wakeref_t wakeref; + unsigned int tmp; + char error_msg[80]; + char *msg = NULL; + + if (fmt) { + va_list args; + + va_start(args, fmt); + vscnprintf(error_msg, sizeof(error_msg), fmt, args); + va_end(args); + + msg = error_msg; + } + + /* + * In most cases it's guaranteed that we get here with an RPM + * reference held, for example because there is a pending GPU + * request that won't finish until the reset is done. This + * isn't the case at least when we get here by doing a + * simulated reset via debugfs, so get an RPM reference. + */ + wakeref = intel_runtime_pm_get(i915); + + engine_mask &= INTEL_INFO(i915)->ring_mask; + + if (flags & I915_ERROR_CAPTURE) { + i915_capture_error_state(i915, engine_mask, msg); + i915_clear_error_registers(i915); + } + + /* + * Try engine reset when available. We fall back to full reset if + * single reset fails. + */ + if (intel_has_reset_engine(i915) && + !i915_terminally_wedged(&i915->gpu_error)) { + for_each_engine_masked(engine, i915, engine_mask, tmp) { + BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); + if (test_and_set_bit(I915_RESET_ENGINE + engine->id, + &i915->gpu_error.flags)) + continue; + + if (i915_reset_engine(engine, msg) == 0) + engine_mask &= ~intel_engine_flag(engine); + + clear_bit(I915_RESET_ENGINE + engine->id, + &i915->gpu_error.flags); + wake_up_bit(&i915->gpu_error.flags, + I915_RESET_ENGINE + engine->id); + } + } + + if (!engine_mask) + goto out; + + /* Full reset needs the mutex, stop any other user trying to do so. */ + if (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) { + wait_event(i915->gpu_error.reset_queue, + !test_bit(I915_RESET_BACKOFF, + &i915->gpu_error.flags)); + goto out; + } + + /* Prevent any other reset-engine attempt. */ + for_each_engine(engine, i915, tmp) { + while (test_and_set_bit(I915_RESET_ENGINE + engine->id, + &i915->gpu_error.flags)) + wait_on_bit(&i915->gpu_error.flags, + I915_RESET_ENGINE + engine->id, + TASK_UNINTERRUPTIBLE); + } + + i915_reset_device(i915, engine_mask, msg); + + for_each_engine(engine, i915, tmp) { + clear_bit(I915_RESET_ENGINE + engine->id, + &i915->gpu_error.flags); + } + + clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + wake_up_all(&i915->gpu_error.reset_queue); + +out: + intel_runtime_pm_put(i915, wakeref); +} + +bool i915_reset_flush(struct drm_i915_private *i915) +{ + int err; + + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + + flush_workqueue(i915->wq); + GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart)); + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + MAX_SCHEDULE_TIMEOUT); + mutex_unlock(&i915->drm.struct_mutex); + + return !err; +} + +static void i915_wedge_me(struct work_struct *work) +{ + struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); + + dev_err(w->i915->drm.dev, + "%s timed out, cancelling all in-flight rendering.\n", + w->name); + i915_gem_set_wedged(w->i915); +} + +void __i915_init_wedge(struct i915_wedge_me *w, + struct drm_i915_private *i915, + long timeout, + const char *name) +{ + w->i915 = i915; + w->name = name; + + INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me); + schedule_delayed_work(&w->work, timeout); +} + +void __i915_fini_wedge(struct i915_wedge_me *w) +{ + cancel_delayed_work_sync(&w->work); + destroy_delayed_work_on_stack(&w->work); + w->i915 = NULL; +} diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h new file mode 100644 index 000000000000..f2d347f319df --- /dev/null +++ b/drivers/gpu/drm/i915/i915_reset.h @@ -0,0 +1,59 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008-2018 Intel Corporation + */ + +#ifndef I915_RESET_H +#define I915_RESET_H + +#include <linux/compiler.h> +#include <linux/types.h> + +struct drm_i915_private; +struct intel_engine_cs; +struct intel_guc; + +__printf(4, 5) +void i915_handle_error(struct drm_i915_private *i915, + u32 engine_mask, + unsigned long flags, + const char *fmt, ...); +#define I915_ERROR_CAPTURE BIT(0) + +void i915_clear_error_registers(struct drm_i915_private *i915); + +void i915_reset(struct drm_i915_private *i915, + unsigned int stalled_mask, + const char *reason); +int i915_reset_engine(struct intel_engine_cs *engine, + const char *reason); + +void i915_reset_request(struct i915_request *rq, bool guilty); +bool i915_reset_flush(struct drm_i915_private *i915); + +bool intel_has_gpu_reset(struct drm_i915_private *i915); +bool intel_has_reset_engine(struct drm_i915_private *i915); + +int intel_gpu_reset(struct drm_i915_private *i915, u32 engine_mask); + +int intel_reset_guc(struct drm_i915_private *i915); + +struct i915_wedge_me { + struct delayed_work work; + struct drm_i915_private *i915; + const char *name; +}; + +void __i915_init_wedge(struct i915_wedge_me *w, + struct drm_i915_private *i915, + long timeout, + const char *name); +void __i915_fini_wedge(struct i915_wedge_me *w); + +#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \ + for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__); \ + (W)->i915; \ + __i915_fini_wedge((W))) + +#endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 340faea6c08a..d01683167c77 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -127,8 +127,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static void assert_priolists(struct intel_engine_execlists * const execlists, - long queue_priority) +static void assert_priolists(struct intel_engine_execlists * const execlists) { struct rb_node *rb; long last_prio, i; @@ -139,7 +138,7 @@ static void assert_priolists(struct intel_engine_execlists * const execlists, GEM_BUG_ON(rb_first_cached(&execlists->queue) != rb_first(&execlists->queue.rb_root)); - last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; + last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1; for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { const struct i915_priolist *p = to_priolist(rb); @@ -166,7 +165,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) int idx, i; lockdep_assert_held(&engine->timeline.lock); - assert_priolists(execlists, INT_MAX); + assert_priolists(execlists); /* buckets sorted from highest [in slot 0] to lowest priority */ idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; @@ -239,6 +238,18 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) return engine; } +static bool inflight(const struct i915_request *rq, + const struct intel_engine_cs *engine) +{ + const struct i915_request *active; + + if (!i915_request_is_active(rq)) + return false; + + active = port_request(engine->execlists.port); + return active->hw_context == rq->hw_context; +} + static void __i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) { @@ -328,6 +339,7 @@ static void __i915_schedule(struct i915_request *rq, INIT_LIST_HEAD(&dep->dfs_link); engine = sched_lock_engine(node, engine); + lockdep_assert_held(&engine->timeline.lock); /* Recheck after acquiring the engine->timeline.lock */ if (prio <= node->attr.priority || node_signaled(node)) @@ -353,20 +365,19 @@ static void __i915_schedule(struct i915_request *rq, continue; } - if (prio <= engine->execlists.queue_priority) + if (prio <= engine->execlists.queue_priority_hint) continue; + engine->execlists.queue_priority_hint = prio; + /* * If we are already the currently executing context, don't * bother evaluating if we should preempt ourselves. */ - if (node_to_request(node)->global_seqno && - i915_seqno_passed(port_request(engine->execlists.port)->global_seqno, - node_to_request(node)->global_seqno)) + if (inflight(node_to_request(node), engine)) continue; /* Defer (tasklet) submission until after all of our updates. */ - engine->execlists.queue_priority = prio; tasklet_hi_schedule(&engine->execlists.tasklet); } diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index a73472dd12fd..207e21b478f2 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -31,6 +31,7 @@ struct i915_selftest { unsigned long timeout_jiffies; unsigned int timeout_ms; unsigned int random_seed; + char *filter; int mock; int live; }; diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index f18afa2bac8d..d2f2a9c2fabd 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -86,7 +86,7 @@ int i915_save_state(struct drm_i915_private *dev_priv) } else if (IS_GEN(dev_priv, 2)) { for (i = 0; i < 7; i++) dev_priv->regfile.saveSWF1[i] = I915_READ(SWF1(i)); - } else if (HAS_GMCH_DISPLAY(dev_priv)) { + } else if (HAS_GMCH(dev_priv)) { for (i = 0; i < 16; i++) { dev_priv->regfile.saveSWF0[i] = I915_READ(SWF0(i)); dev_priv->regfile.saveSWF1[i] = I915_READ(SWF1(i)); @@ -131,7 +131,7 @@ int i915_restore_state(struct drm_i915_private *dev_priv) } else if (IS_GEN(dev_priv, 2)) { for (i = 0; i < 7; i++) I915_WRITE(SWF1(i), dev_priv->regfile.saveSWF1[i]); - } else if (HAS_GMCH_DISPLAY(dev_priv)) { + } else if (HAS_GMCH(dev_priv)) { for (i = 0; i < 16; i++) { I915_WRITE(SWF0(i), dev_priv->regfile.saveSWF0[i]); I915_WRITE(SWF1(i), dev_priv->regfile.saveSWF1[i]); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index c0cfe7ae2ba5..41313005af42 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -42,11 +42,11 @@ static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) static u32 calc_residency(struct drm_i915_private *dev_priv, i915_reg_t reg) { - u64 res; + intel_wakeref_t wakeref; + u64 res = 0; - intel_runtime_pm_get(dev_priv); - res = intel_rc6_residency_us(dev_priv, reg); - intel_runtime_pm_put(dev_priv); + with_intel_runtime_pm(dev_priv, wakeref) + res = intel_rc6_residency_us(dev_priv, reg); return DIV_ROUND_CLOSEST_ULL(res, 1000); } @@ -258,9 +258,10 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + intel_wakeref_t wakeref; int ret; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { @@ -274,7 +275,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, } mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return snprintf(buf, PAGE_SIZE, "%d\n", ret); } @@ -354,6 +355,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct intel_rps *rps = &dev_priv->gt_pm.rps; + intel_wakeref_t wakeref; u32 val; ssize_t ret; @@ -361,7 +363,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, if (ret) return ret; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->pcu_lock); @@ -371,7 +373,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, val > rps->max_freq || val < rps->min_freq_softlimit) { mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return -EINVAL; } @@ -392,7 +394,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return ret ?: count; } @@ -412,6 +414,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct intel_rps *rps = &dev_priv->gt_pm.rps; + intel_wakeref_t wakeref; u32 val; ssize_t ret; @@ -419,7 +422,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, if (ret) return ret; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->pcu_lock); @@ -429,7 +432,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, val > rps->max_freq || val > rps->max_freq_softlimit) { mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return -EINVAL; } @@ -446,7 +449,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return ret ?: count; } diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 4667cc08c416..b2202d2e58a2 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -9,34 +9,199 @@ #include "i915_timeline.h" #include "i915_syncmap.h" -void i915_timeline_init(struct drm_i915_private *i915, - struct i915_timeline *timeline, - const char *name) +struct i915_timeline_hwsp { + struct i915_vma *vma; + struct list_head free_link; + u64 free_bitmap; +}; + +static inline struct i915_timeline_hwsp * +i915_timeline_hwsp(const struct i915_timeline *tl) +{ + return tl->hwsp_ggtt->private; +} + +static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + if (IS_ERR(vma)) + i915_gem_object_put(obj); + + return vma; +} + +static struct i915_vma * +hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) { - lockdep_assert_held(&i915->drm.struct_mutex); + struct drm_i915_private *i915 = timeline->i915; + struct i915_gt_timelines *gt = &i915->gt.timelines; + struct i915_timeline_hwsp *hwsp; + + BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); + + spin_lock(>->hwsp_lock); + + /* hwsp_free_list only contains HWSP that have available cachelines */ + hwsp = list_first_entry_or_null(>->hwsp_free_list, + typeof(*hwsp), free_link); + if (!hwsp) { + struct i915_vma *vma; + + spin_unlock(>->hwsp_lock); + + hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); + if (!hwsp) + return ERR_PTR(-ENOMEM); + + vma = __hwsp_alloc(i915); + if (IS_ERR(vma)) { + kfree(hwsp); + return vma; + } + + vma->private = hwsp; + hwsp->vma = vma; + hwsp->free_bitmap = ~0ull; + + spin_lock(>->hwsp_lock); + list_add(&hwsp->free_link, >->hwsp_free_list); + } + + GEM_BUG_ON(!hwsp->free_bitmap); + *cacheline = __ffs64(hwsp->free_bitmap); + hwsp->free_bitmap &= ~BIT_ULL(*cacheline); + if (!hwsp->free_bitmap) + list_del(&hwsp->free_link); + + spin_unlock(>->hwsp_lock); + + GEM_BUG_ON(hwsp->vma->private != hwsp); + return hwsp->vma; +} + +static void hwsp_free(struct i915_timeline *timeline) +{ + struct i915_gt_timelines *gt = &timeline->i915->gt.timelines; + struct i915_timeline_hwsp *hwsp; + + hwsp = i915_timeline_hwsp(timeline); + if (!hwsp) /* leave global HWSP alone! */ + return; + + spin_lock(>->hwsp_lock); + + /* As a cacheline becomes available, publish the HWSP on the freelist */ + if (!hwsp->free_bitmap) + list_add_tail(&hwsp->free_link, >->hwsp_free_list); + + hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES); + + /* And if no one is left using it, give the page back to the system */ + if (hwsp->free_bitmap == ~0ull) { + i915_vma_put(hwsp->vma); + list_del(&hwsp->free_link); + kfree(hwsp); + } + + spin_unlock(>->hwsp_lock); +} + +int i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *timeline, + const char *name, + struct i915_vma *hwsp) +{ + void *vaddr; /* * Ideally we want a set of engines on a single leaf as we expect * to mostly be tracking synchronisation between engines. It is not * a huge issue if this is not the case, but we may want to mitigate * any page crossing penalties if they become an issue. + * + * Called during early_init before we know how many engines there are. */ BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + timeline->i915 = i915; timeline->name = name; + timeline->pin_count = 0; + timeline->has_initial_breadcrumb = !hwsp; - list_add(&timeline->link, &i915->gt.timelines); + timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; + if (!hwsp) { + unsigned int cacheline; + + hwsp = hwsp_alloc(timeline, &cacheline); + if (IS_ERR(hwsp)) + return PTR_ERR(hwsp); + + timeline->hwsp_offset = cacheline * CACHELINE_BYTES; + } + timeline->hwsp_ggtt = i915_vma_get(hwsp); + + vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + hwsp_free(timeline); + i915_vma_put(hwsp); + return PTR_ERR(vaddr); + } - /* Called during early_init before we know how many engines there are */ + timeline->hwsp_seqno = + memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); timeline->fence_context = dma_fence_context_alloc(1); spin_lock_init(&timeline->lock); - init_request_active(&timeline->last_request, NULL); + INIT_ACTIVE_REQUEST(&timeline->barrier); + INIT_ACTIVE_REQUEST(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); + + return 0; +} + +void i915_timelines_init(struct drm_i915_private *i915) +{ + struct i915_gt_timelines *gt = &i915->gt.timelines; + + mutex_init(>->mutex); + INIT_LIST_HEAD(>->active_list); + + spin_lock_init(>->hwsp_lock); + INIT_LIST_HEAD(>->hwsp_free_list); + + /* via i915_gem_wait_for_idle() */ + i915_gem_shrinker_taints_mutex(i915, >->mutex); +} + +static void timeline_add_to_active(struct i915_timeline *tl) +{ + struct i915_gt_timelines *gt = &tl->i915->gt.timelines; + + mutex_lock(>->mutex); + list_add(&tl->link, >->active_list); + mutex_unlock(>->mutex); +} + +static void timeline_remove_from_active(struct i915_timeline *tl) +{ + struct i915_gt_timelines *gt = &tl->i915->gt.timelines; + + mutex_lock(>->mutex); + list_del(&tl->link); + mutex_unlock(>->mutex); } /** @@ -51,11 +216,11 @@ void i915_timeline_init(struct drm_i915_private *i915, */ void i915_timelines_park(struct drm_i915_private *i915) { + struct i915_gt_timelines *gt = &i915->gt.timelines; struct i915_timeline *timeline; - lockdep_assert_held(&i915->drm.struct_mutex); - - list_for_each_entry(timeline, &i915->gt.timelines, link) { + mutex_lock(>->mutex); + list_for_each_entry(timeline, >->active_list, link) { /* * All known fences are completed so we can scrap * the current sync point tracking and start afresh, @@ -64,32 +229,88 @@ void i915_timelines_park(struct drm_i915_private *i915) */ i915_syncmap_free(&timeline->sync); } + mutex_unlock(>->mutex); } void i915_timeline_fini(struct i915_timeline *timeline) { + GEM_BUG_ON(timeline->pin_count); GEM_BUG_ON(!list_empty(&timeline->requests)); + GEM_BUG_ON(i915_active_request_isset(&timeline->barrier)); i915_syncmap_free(&timeline->sync); + hwsp_free(timeline); - list_del(&timeline->link); + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); + i915_vma_put(timeline->hwsp_ggtt); } struct i915_timeline * -i915_timeline_create(struct drm_i915_private *i915, const char *name) +i915_timeline_create(struct drm_i915_private *i915, + const char *name, + struct i915_vma *global_hwsp) { struct i915_timeline *timeline; + int err; timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); if (!timeline) return ERR_PTR(-ENOMEM); - i915_timeline_init(i915, timeline, name); + err = i915_timeline_init(i915, timeline, name, global_hwsp); + if (err) { + kfree(timeline); + return ERR_PTR(err); + } + kref_init(&timeline->kref); return timeline; } +int i915_timeline_pin(struct i915_timeline *tl) +{ + int err; + + if (tl->pin_count++) + return 0; + GEM_BUG_ON(!tl->pin_count); + + err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto unpin; + + tl->hwsp_offset = + i915_ggtt_offset(tl->hwsp_ggtt) + + offset_in_page(tl->hwsp_offset); + + timeline_add_to_active(tl); + + return 0; + +unpin: + tl->pin_count = 0; + return err; +} + +void i915_timeline_unpin(struct i915_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + if (--tl->pin_count) + return; + + timeline_remove_from_active(tl); + + /* + * Since this timeline is idle, all bariers upon which we were waiting + * must also be complete and so we can discard the last used barriers + * without loss of information. + */ + i915_syncmap_free(&tl->sync); + + __i915_vma_unpin(tl->hwsp_ggtt); +} + void __i915_timeline_free(struct kref *kref) { struct i915_timeline *timeline = @@ -99,6 +320,16 @@ void __i915_timeline_free(struct kref *kref) kfree(timeline); } +void i915_timelines_fini(struct drm_i915_private *i915) +{ + struct i915_gt_timelines *gt = &i915->gt.timelines; + + GEM_BUG_ON(!list_empty(>->active_list)); + GEM_BUG_ON(!list_empty(>->hwsp_free_list)); + + mutex_destroy(>->mutex); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_timeline.c" #include "selftests/i915_timeline.c" diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 38c1e15e927a..7bec7d2e45bf 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -28,10 +28,14 @@ #include <linux/list.h> #include <linux/kref.h> +#include "i915_active.h" #include "i915_request.h" #include "i915_syncmap.h" #include "i915_utils.h" +struct i915_vma; +struct i915_timeline_hwsp; + struct i915_timeline { u64 fence_context; u32 seqno; @@ -40,6 +44,13 @@ struct i915_timeline { #define TIMELINE_CLIENT 0 /* default subclass */ #define TIMELINE_ENGINE 1 + unsigned int pin_count; + const u32 *hwsp_seqno; + struct i915_vma *hwsp_ggtt; + u32 hwsp_offset; + + bool has_initial_breadcrumb; + /** * List of breadcrumbs associated with GPU requests currently * outstanding. @@ -48,10 +59,10 @@ struct i915_timeline { /* Contains an RCU guarded pointer to the last request. No reference is * held to the request, users must carefully acquire a reference to - * the request using i915_gem_active_get_request_rcu(), or hold the + * the request using i915_active_request_get_request_rcu(), or hold the * struct_mutex. */ - struct i915_gem_active last_request; + struct i915_active_request last_request; /** * We track the most recent seqno that we wait on in every context so @@ -64,15 +75,27 @@ struct i915_timeline { */ struct i915_syncmap *sync; + /** + * Barrier provides the ability to serialize ordering between different + * timelines. + * + * Users can call i915_timeline_set_barrier which will make all + * subsequent submissions to this timeline be executed only after the + * barrier has been completed. + */ + struct i915_active_request barrier; + struct list_head link; const char *name; + struct drm_i915_private *i915; struct kref kref; }; -void i915_timeline_init(struct drm_i915_private *i915, - struct i915_timeline *tl, - const char *name); +int i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *tl, + const char *name, + struct i915_vma *hwsp); void i915_timeline_fini(struct i915_timeline *tl); static inline void @@ -95,7 +118,9 @@ i915_timeline_set_subclass(struct i915_timeline *timeline, } struct i915_timeline * -i915_timeline_create(struct drm_i915_private *i915, const char *name); +i915_timeline_create(struct drm_i915_private *i915, + const char *name, + struct i915_vma *global_hwsp); static inline struct i915_timeline * i915_timeline_get(struct i915_timeline *timeline) @@ -134,6 +159,26 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); } +int i915_timeline_pin(struct i915_timeline *tl); +void i915_timeline_unpin(struct i915_timeline *tl); + +void i915_timelines_init(struct drm_i915_private *i915); void i915_timelines_park(struct drm_i915_private *i915); +void i915_timelines_fini(struct drm_i915_private *i915); + +/** + * i915_timeline_set_barrier - orders submission between different timelines + * @timeline: timeline to set the barrier on + * @rq: request after which new submissions can proceed + * + * Sets the passed in request as the serialization point for all subsequent + * submissions on @timeline. Subsequent requests will not be submitted to GPU + * until the barrier has been completed. + */ +static inline int +i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq) +{ + return i915_active_request_set(&tl->barrier, rq); +} #endif diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 43da14f08dc0..eab313c3163c 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -752,31 +752,6 @@ trace_i915_request_out(struct i915_request *rq) #endif #endif -TRACE_EVENT(intel_engine_notify, - TP_PROTO(struct intel_engine_cs *engine, bool waiters), - TP_ARGS(engine, waiters), - - TP_STRUCT__entry( - __field(u32, dev) - __field(u16, class) - __field(u16, instance) - __field(u32, seqno) - __field(bool, waiters) - ), - - TP_fast_assign( - __entry->dev = engine->i915->drm.primary->index; - __entry->class = engine->uabi_class; - __entry->instance = engine->instance; - __entry->seqno = intel_engine_get_seqno(engine); - __entry->waiters = waiters; - ), - - TP_printk("dev=%u, engine=%u:%u, seqno=%u, waiters=%u", - __entry->dev, __entry->class, __entry->instance, - __entry->seqno, __entry->waiters) -); - DEFINE_EVENT(i915_request, i915_request_retire, TP_PROTO(struct i915_request *rq), TP_ARGS(rq) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 5b4d78cdb4ca..b713bed20c38 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -63,24 +63,22 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason) #endif -struct i915_vma_active { - struct i915_gem_active base; - struct i915_vma *vma; - struct rb_node node; - u64 timeline; -}; - -static void -__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq) +static void obj_bump_mru(struct drm_i915_gem_object *obj) { - struct drm_i915_gem_object *obj = vma->obj; + struct drm_i915_private *i915 = to_i915(obj->base.dev); - GEM_BUG_ON(!i915_vma_is_active(vma)); - if (--vma->active_count) - return; + spin_lock(&i915->mm.obj_lock); + if (obj->bind_count) + list_move_tail(&obj->mm.link, &i915->mm.bound_list); + spin_unlock(&i915->mm.obj_lock); - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + obj->mm.dirty = true; /* be paranoid */ +} + +static void __i915_vma_retire(struct i915_active *ref) +{ + struct i915_vma *vma = container_of(ref, typeof(*vma), active); + struct drm_i915_gem_object *obj = vma->obj; GEM_BUG_ON(!i915_gem_object_is_active(obj)); if (--obj->active_count) @@ -93,16 +91,12 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq) reservation_object_unlock(obj->resv); } - /* Bump our place on the bound list to keep it roughly in LRU order + /* + * Bump our place on the bound list to keep it roughly in LRU order * so that we don't steal from recently used but inactive objects * (unless we are forced to ofc!) */ - spin_lock(&rq->i915->mm.obj_lock); - if (obj->bind_count) - list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list); - spin_unlock(&rq->i915->mm.obj_lock); - - obj->mm.dirty = true; /* be paranoid */ + obj_bump_mru(obj); if (i915_gem_object_has_active_reference(obj)) { i915_gem_object_clear_active_reference(obj); @@ -110,21 +104,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq) } } -static void -i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq) -{ - struct i915_vma_active *active = - container_of(base, typeof(*active), base); - - __i915_vma_retire(active->vma, rq); -} - -static void -i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq) -{ - __i915_vma_retire(container_of(base, struct i915_vma, last_active), rq); -} - static struct i915_vma * vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -140,10 +119,9 @@ vma_create(struct drm_i915_gem_object *obj, if (vma == NULL) return ERR_PTR(-ENOMEM); - vma->active = RB_ROOT; + i915_active_init(vm->i915, &vma->active, __i915_vma_retire); + INIT_ACTIVE_REQUEST(&vma->last_fence); - init_request_active(&vma->last_active, i915_vma_last_retire); - init_request_active(&vma->last_fence, NULL); vma->vm = vm; vma->ops = &vm->vma_ops; vma->obj = obj; @@ -190,33 +168,56 @@ vma_create(struct drm_i915_gem_object *obj, i915_gem_object_get_stride(obj)); GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); - /* - * We put the GGTT vma at the start of the vma-list, followed - * by the ppGGTT vma. This allows us to break early when - * iterating over only the GGTT vma for an object, see - * for_each_ggtt_vma() - */ vma->flags |= I915_VMA_GGTT; - list_add(&vma->obj_link, &obj->vma_list); - } else { - list_add_tail(&vma->obj_link, &obj->vma_list); } + spin_lock(&obj->vma.lock); + rb = NULL; - p = &obj->vma_tree.rb_node; + p = &obj->vma.tree.rb_node; while (*p) { struct i915_vma *pos; + long cmp; rb = *p; pos = rb_entry(rb, struct i915_vma, obj_node); - if (i915_vma_compare(pos, vm, view) < 0) + + /* + * If the view already exists in the tree, another thread + * already created a matching vma, so return the older instance + * and dispose of ours. + */ + cmp = i915_vma_compare(pos, vm, view); + if (cmp == 0) { + spin_unlock(&obj->vma.lock); + kmem_cache_free(vm->i915->vmas, vma); + return pos; + } + + if (cmp < 0) p = &rb->rb_right; else p = &rb->rb_left; } rb_link_node(&vma->obj_node, rb, p); - rb_insert_color(&vma->obj_node, &obj->vma_tree); + rb_insert_color(&vma->obj_node, &obj->vma.tree); + + if (i915_vma_is_ggtt(vma)) + /* + * We put the GGTT vma at the start of the vma-list, followed + * by the ppGGTT vma. This allows us to break early when + * iterating over only the GGTT vma for an object, see + * for_each_ggtt_vma() + */ + list_add(&vma->obj_link, &obj->vma.list); + else + list_add_tail(&vma->obj_link, &obj->vma.list); + + spin_unlock(&obj->vma.lock); + + mutex_lock(&vm->mutex); list_add(&vma->vm_link, &vm->unbound_list); + mutex_unlock(&vm->mutex); return vma; @@ -232,7 +233,7 @@ vma_lookup(struct drm_i915_gem_object *obj, { struct rb_node *rb; - rb = obj->vma_tree.rb_node; + rb = obj->vma.tree.rb_node; while (rb) { struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); long cmp; @@ -272,16 +273,18 @@ i915_vma_instance(struct drm_i915_gem_object *obj, { struct i915_vma *vma; - lockdep_assert_held(&obj->base.dev->struct_mutex); GEM_BUG_ON(view && !i915_is_ggtt(vm)); GEM_BUG_ON(vm->closed); + spin_lock(&obj->vma.lock); vma = vma_lookup(obj, vm, view); - if (!vma) + spin_unlock(&obj->vma.lock); + + /* vma_create() will resolve the race if another creates the vma */ + if (unlikely(!vma)) vma = vma_create(obj, vm, view); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); - GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma); return vma; } @@ -659,7 +662,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level)); - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + mutex_lock(&vma->vm->mutex); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); + mutex_unlock(&vma->vm->mutex); if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; @@ -692,8 +697,10 @@ i915_vma_remove(struct i915_vma *vma) vma->ops->clear_pages(vma); + mutex_lock(&vma->vm->mutex); drm_mm_remove_node(&vma->node); list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + mutex_unlock(&vma->vm->mutex); /* * Since the unbound list is global, only move to that list if @@ -797,23 +804,27 @@ void i915_vma_reopen(struct i915_vma *vma) static void __i915_vma_destroy(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - struct i915_vma_active *iter, *n; GEM_BUG_ON(vma->node.allocated); GEM_BUG_ON(vma->fence); - GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); + GEM_BUG_ON(i915_active_request_isset(&vma->last_fence)); - list_del(&vma->obj_link); + mutex_lock(&vma->vm->mutex); list_del(&vma->vm_link); - if (vma->obj) - rb_erase(&vma->obj_node, &vma->obj->vma_tree); + mutex_unlock(&vma->vm->mutex); + + if (vma->obj) { + struct drm_i915_gem_object *obj = vma->obj; - rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) { - GEM_BUG_ON(i915_gem_active_isset(&iter->base)); - kfree(iter); + spin_lock(&obj->vma.lock); + list_del(&vma->obj_link); + rb_erase(&vma->obj_node, &vma->obj->vma.tree); + spin_unlock(&obj->vma.lock); } + i915_active_fini(&vma->active); + kmem_cache_free(i915->vmas, vma); } @@ -897,104 +908,15 @@ static void export_fence(struct i915_vma *vma, reservation_object_unlock(resv); } -static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx) -{ - struct i915_vma_active *active; - struct rb_node **p, *parent; - struct i915_request *old; - - /* - * We track the most recently used timeline to skip a rbtree search - * for the common case, under typical loads we never need the rbtree - * at all. We can reuse the last_active slot if it is empty, that is - * after the previous activity has been retired, or if the active - * matches the current timeline. - * - * Note that we allow the timeline to be active simultaneously in - * the rbtree and the last_active cache. We do this to avoid having - * to search and replace the rbtree element for a new timeline, with - * the cost being that we must be aware that the vma may be retired - * twice for the same timeline (as the older rbtree element will be - * retired before the new request added to last_active). - */ - old = i915_gem_active_raw(&vma->last_active, - &vma->vm->i915->drm.struct_mutex); - if (!old || old->fence.context == idx) - goto out; - - /* Move the currently active fence into the rbtree */ - idx = old->fence.context; - - parent = NULL; - p = &vma->active.rb_node; - while (*p) { - parent = *p; - - active = rb_entry(parent, struct i915_vma_active, node); - if (active->timeline == idx) - goto replace; - - if (active->timeline < idx) - p = &parent->rb_right; - else - p = &parent->rb_left; - } - - active = kmalloc(sizeof(*active), GFP_KERNEL); - - /* kmalloc may retire the vma->last_active request (thanks shrinker)! */ - if (unlikely(!i915_gem_active_raw(&vma->last_active, - &vma->vm->i915->drm.struct_mutex))) { - kfree(active); - goto out; - } - - if (unlikely(!active)) - return ERR_PTR(-ENOMEM); - - init_request_active(&active->base, i915_vma_retire); - active->vma = vma; - active->timeline = idx; - - rb_link_node(&active->node, parent, p); - rb_insert_color(&active->node, &vma->active); - -replace: - /* - * Overwrite the previous active slot in the rbtree with last_active, - * leaving last_active zeroed. If the previous slot is still active, - * we must be careful as we now only expect to receive one retire - * callback not two, and so much undo the active counting for the - * overwritten slot. - */ - if (i915_gem_active_isset(&active->base)) { - /* Retire ourselves from the old rq->active_list */ - __list_del_entry(&active->base.link); - vma->active_count--; - GEM_BUG_ON(!vma->active_count); - } - GEM_BUG_ON(list_empty(&vma->last_active.link)); - list_replace_init(&vma->last_active.link, &active->base.link); - active->base.request = fetch_and_zero(&vma->last_active.request); - -out: - return &vma->last_active; -} - int i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, unsigned int flags) { struct drm_i915_gem_object *obj = vma->obj; - struct i915_gem_active *active; lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - active = active_instance(vma, rq->fence.context); - if (IS_ERR(active)) - return PTR_ERR(active); - /* * Add a reference if we're newly entering the active list. * The order in which we add operations to the retirement queue is @@ -1003,11 +925,15 @@ int i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - if (!i915_gem_active_isset(active) && !vma->active_count++) { - list_move_tail(&vma->vm_link, &vma->vm->active_list); + if (!vma->active.count) obj->active_count++; + + if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) { + if (!vma->active.count) + obj->active_count--; + return -ENOMEM; } - i915_gem_active_set(active, rq); + GEM_BUG_ON(!i915_vma_is_active(vma)); GEM_BUG_ON(!obj->active_count); @@ -1016,14 +942,14 @@ int i915_vma_move_to_active(struct i915_vma *vma, obj->write_domain = I915_GEM_DOMAIN_RENDER; if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) - i915_gem_active_set(&obj->frontbuffer_write, rq); + __i915_active_request_set(&obj->frontbuffer_write, rq); obj->read_domains = 0; } obj->read_domains |= I915_GEM_GPU_DOMAINS; if (flags & EXEC_OBJECT_NEEDS_FENCE) - i915_gem_active_set(&vma->last_fence, rq); + __i915_active_request_set(&vma->last_fence, rq); export_fence(vma, rq, flags); return 0; @@ -1041,8 +967,6 @@ int i915_vma_unbind(struct i915_vma *vma) */ might_sleep(); if (i915_vma_is_active(vma)) { - struct i915_vma_active *active, *n; - /* * When a closed VMA is retired, it is unbound - eek. * In order to prevent it from being recursively closed, @@ -1058,21 +982,12 @@ int i915_vma_unbind(struct i915_vma *vma) */ __i915_vma_pin(vma); - ret = i915_gem_active_retire(&vma->last_active, - &vma->vm->i915->drm.struct_mutex); + ret = i915_active_wait(&vma->active); if (ret) goto unpin; - rbtree_postorder_for_each_entry_safe(active, n, - &vma->active, node) { - ret = i915_gem_active_retire(&active->base, - &vma->vm->i915->drm.struct_mutex); - if (ret) - goto unpin; - } - - ret = i915_gem_active_retire(&vma->last_fence, - &vma->vm->i915->drm.struct_mutex); + ret = i915_active_request_retire(&vma->last_fence, + &vma->vm->i915->drm.struct_mutex); unpin: __i915_vma_unpin(vma); if (ret) diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 4f7c1c7599f4..7c742027f866 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -34,6 +34,7 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" +#include "i915_active.h" #include "i915_request.h" enum i915_cache_level; @@ -71,34 +72,45 @@ struct i915_vma { unsigned int open_count; unsigned long flags; /** - * How many users have pinned this object in GTT space. The following - * users can each hold at most one reference: pwrite/pread, execbuffer - * (objects are not allowed multiple times for the same batchbuffer), - * and the framebuffer code. When switching/pageflipping, the - * framebuffer code has at most two buffers pinned per crtc. + * How many users have pinned this object in GTT space. * - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 - * bits with absolutely no headroom. So use 4 bits. + * This is a tightly bound, fairly small number of users, so we + * stuff inside the flags field so that we can both check for overflow + * and detect a no-op i915_vma_pin() in a single check, while also + * pinning the vma. + * + * The worst case display setup would have the same vma pinned for + * use on each plane on each crtc, while also building the next atomic + * state and holding a pin for the length of the cleanup queue. In the + * future, the flip queue may be increased from 1. + * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84 + * + * For GEM, the number of concurrent users for pwrite/pread is + * unbounded. For execbuffer, it is currently one but will in future + * be extended to allow multiple clients to pin vma concurrently. + * + * We also use suballocated pages, with each suballocation claiming + * its own pin on the shared vma. At present, this is limited to + * exclusive cachelines of a single page, so a maximum of 64 possible + * users. */ -#define I915_VMA_PIN_MASK 0xf -#define I915_VMA_PIN_OVERFLOW BIT(5) +#define I915_VMA_PIN_MASK 0xff +#define I915_VMA_PIN_OVERFLOW BIT(8) /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND BIT(6) -#define I915_VMA_LOCAL_BIND BIT(7) +#define I915_VMA_GLOBAL_BIND BIT(9) +#define I915_VMA_LOCAL_BIND BIT(10) #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) -#define I915_VMA_GGTT BIT(8) -#define I915_VMA_CAN_FENCE BIT(9) -#define I915_VMA_CLOSED BIT(10) -#define I915_VMA_USERFAULT_BIT 11 +#define I915_VMA_GGTT BIT(11) +#define I915_VMA_CAN_FENCE BIT(12) +#define I915_VMA_CLOSED BIT(13) +#define I915_VMA_USERFAULT_BIT 14 #define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) -#define I915_VMA_GGTT_WRITE BIT(12) +#define I915_VMA_GGTT_WRITE BIT(15) - unsigned int active_count; - struct rb_root active; - struct i915_gem_active last_active; - struct i915_gem_active last_fence; + struct i915_active active; + struct i915_active_request last_fence; /** * Support different GGTT views into the same object. @@ -141,9 +153,9 @@ i915_vma_instance(struct drm_i915_gem_object *obj, void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags); #define I915_VMA_RELEASE_MAP BIT(0) -static inline bool i915_vma_is_active(struct i915_vma *vma) +static inline bool i915_vma_is_active(const struct i915_vma *vma) { - return vma->active_count; + return !i915_active_is_idle(&vma->active); } int __must_check i915_vma_move_to_active(struct i915_vma *vma, @@ -425,7 +437,7 @@ void i915_vma_parked(struct drm_i915_private *i915); * or the list is empty ofc. */ #define for_each_ggtt_vma(V, OBJ) \ - list_for_each_entry(V, &(OBJ)->vma_list, obj_link) \ + list_for_each_entry(V, &(OBJ)->vma.list, obj_link) \ for_each_until(!i915_vma_is_ggtt(V)) #endif diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c index d9dcee4ec51f..73a7bee24a66 100644 --- a/drivers/gpu/drm/i915/icl_dsi.c +++ b/drivers/gpu/drm/i915/icl_dsi.c @@ -337,9 +337,11 @@ static void gen11_dsi_enable_io_power(struct intel_encoder *encoder) } for_each_dsi_port(port, intel_dsi->ports) { - intel_display_power_get(dev_priv, port == PORT_A ? - POWER_DOMAIN_PORT_DDI_A_IO : - POWER_DOMAIN_PORT_DDI_B_IO); + intel_dsi->io_wakeref[port] = + intel_display_power_get(dev_priv, + port == PORT_A ? + POWER_DOMAIN_PORT_DDI_A_IO : + POWER_DOMAIN_PORT_DDI_B_IO); } } @@ -1125,10 +1127,18 @@ static void gen11_dsi_disable_io_power(struct intel_encoder *encoder) enum port port; u32 tmp; - intel_display_power_put(dev_priv, POWER_DOMAIN_PORT_DDI_A_IO); - - if (intel_dsi->dual_link) - intel_display_power_put(dev_priv, POWER_DOMAIN_PORT_DDI_B_IO); + for_each_dsi_port(port, intel_dsi->ports) { + intel_wakeref_t wakeref; + + wakeref = fetch_and_zero(&intel_dsi->io_wakeref[port]); + if (wakeref) { + intel_display_power_put(dev_priv, + port == PORT_A ? + POWER_DOMAIN_PORT_DDI_A_IO : + POWER_DOMAIN_PORT_DDI_B_IO, + wakeref); + } + } /* set mode to DDI */ for_each_dsi_port(port, intel_dsi->ports) { @@ -1229,13 +1239,15 @@ static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - u32 tmp; - enum port port; enum transcoder dsi_trans; + intel_wakeref_t wakeref; + enum port port; bool ret = false; + u32 tmp; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; for_each_dsi_port(port, intel_dsi->ports) { @@ -1260,7 +1272,7 @@ static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, ret = tmp & PIPECONF_ENABLE; } out: - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } @@ -1378,6 +1390,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) encoder->disable = gen11_dsi_disable; encoder->port = port; encoder->get_config = gen11_dsi_get_config; + encoder->update_pipe = intel_panel_update_backlight; encoder->compute_config = gen11_dsi_compute_config; encoder->get_hw_state = gen11_dsi_get_hw_state; encoder->type = INTEL_OUTPUT_DSI; diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index f415ed239184..7cf9290ea34a 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -48,7 +48,7 @@ int intel_digital_connector_atomic_get_property(struct drm_connector *connector, const struct drm_connector_state *state, struct drm_property *property, - uint64_t *val) + u64 *val) { struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -80,7 +80,7 @@ int intel_digital_connector_atomic_get_property(struct drm_connector *connector, int intel_digital_connector_atomic_set_property(struct drm_connector *connector, struct drm_connector_state *state, struct drm_property *property, - uint64_t val) + u64 val) { struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 79139d496c78..db0965904439 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -112,41 +112,39 @@ intel_plane_destroy_state(struct drm_plane *plane, } int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state, - struct intel_crtc_state *crtc_state, + struct intel_crtc_state *new_crtc_state, const struct intel_plane_state *old_plane_state, - struct intel_plane_state *intel_state) + struct intel_plane_state *new_plane_state) { - struct drm_plane *plane = intel_state->base.plane; - struct drm_plane_state *state = &intel_state->base; - struct intel_plane *intel_plane = to_intel_plane(plane); + struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane); int ret; - crtc_state->active_planes &= ~BIT(intel_plane->id); - crtc_state->nv12_planes &= ~BIT(intel_plane->id); - intel_state->base.visible = false; + new_crtc_state->active_planes &= ~BIT(plane->id); + new_crtc_state->nv12_planes &= ~BIT(plane->id); + new_plane_state->base.visible = false; - /* If this is a cursor plane, no further checks are needed. */ - if (!intel_state->base.crtc && !old_plane_state->base.crtc) + if (!new_plane_state->base.crtc && !old_plane_state->base.crtc) return 0; - ret = intel_plane->check_plane(crtc_state, intel_state); + ret = plane->check_plane(new_crtc_state, new_plane_state); if (ret) return ret; /* FIXME pre-g4x don't work like this */ - if (state->visible) - crtc_state->active_planes |= BIT(intel_plane->id); + if (new_plane_state->base.visible) + new_crtc_state->active_planes |= BIT(plane->id); - if (state->visible && state->fb->format->format == DRM_FORMAT_NV12) - crtc_state->nv12_planes |= BIT(intel_plane->id); + if (new_plane_state->base.visible && + new_plane_state->base.fb->format->format == DRM_FORMAT_NV12) + new_crtc_state->nv12_planes |= BIT(plane->id); - if (state->visible || old_plane_state->base.visible) - crtc_state->update_planes |= BIT(intel_plane->id); + if (new_plane_state->base.visible || old_plane_state->base.visible) + new_crtc_state->update_planes |= BIT(plane->id); return intel_plane_atomic_calc_changes(old_crtc_state, - &crtc_state->base, + &new_crtc_state->base, old_plane_state, - state); + &new_plane_state->base); } static int intel_plane_atomic_check(struct drm_plane *plane, @@ -313,7 +311,7 @@ int intel_plane_atomic_get_property(struct drm_plane *plane, const struct drm_plane_state *state, struct drm_property *property, - uint64_t *val) + u64 *val) { DRM_DEBUG_KMS("Unknown property [PROP:%d:%s]\n", property->base.id, property->name); @@ -336,7 +334,7 @@ int intel_plane_atomic_set_property(struct drm_plane *plane, struct drm_plane_state *state, struct drm_property *property, - uint64_t val) + u64 val) { DRM_DEBUG_KMS("Unknown property [PROP:%d:%s]\n", property->base.id, property->name); diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 202a58cf2d9f..de26cd0a5497 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -748,7 +748,8 @@ static void i915_audio_component_get_power(struct device *kdev) static void i915_audio_component_put_power(struct device *kdev) { - intel_display_power_put(kdev_to_i915(kdev), POWER_DOMAIN_AUDIO); + intel_display_power_put_unchecked(kdev_to_i915(kdev), + POWER_DOMAIN_AUDIO); } static void i915_audio_component_codec_wake_override(struct device *kdev, diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 140c218128cb..b508d8a735e0 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1663,6 +1663,13 @@ init_vbt_missing_defaults(struct drm_i915_private *dev_priv) struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; + /* + * VBT has the TypeC mode (native,TBT/USB) and we don't want + * to detect it. + */ + if (intel_port_is_tc(dev_priv, port)) + continue; + info->supports_dvi = (port != PORT_A && port != PORT_E); info->supports_hdmi = info->supports_dvi; info->supports_dp = (port != PORT_E); @@ -1946,6 +1953,15 @@ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port por }; int i; + if (HAS_DDI(dev_priv)) { + const struct ddi_vbt_port_info *port_info = + &dev_priv->vbt.ddi_port_info[port]; + + return port_info->supports_dp || + port_info->supports_dvi || + port_info->supports_hdmi; + } + /* FIXME maybe deal with port A as well? */ if (WARN_ON(port == PORT_A) || port >= ARRAY_SIZE(port_mapping)) return false; diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 4ed7105d7ff5..cacaa1d04d17 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -29,174 +29,146 @@ #define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_rq) -static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) +static void irq_enable(struct intel_engine_cs *engine) { - struct intel_wait *wait; - unsigned int result = 0; - - lockdep_assert_held(&b->irq_lock); - - wait = b->irq_wait; - if (wait) { - /* - * N.B. Since task_asleep() and ttwu are not atomic, the - * waiter may actually go to sleep after the check, causing - * us to suppress a valid wakeup. We prefer to reduce the - * number of false positive missed_breadcrumb() warnings - * at the expense of a few false negatives, as it it easy - * to trigger a false positive under heavy load. Enough - * signal should remain from genuine missed_breadcrumb() - * for us to detect in CI. - */ - bool was_asleep = task_asleep(wait->tsk); - - result = ENGINE_WAKEUP_WAITER; - if (wake_up_process(wait->tsk) && was_asleep) - result |= ENGINE_WAKEUP_ASLEEP; - } + if (!engine->irq_enable) + return; - return result; + /* Caller disables interrupts */ + spin_lock(&engine->i915->irq_lock); + engine->irq_enable(engine); + spin_unlock(&engine->i915->irq_lock); } -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) +static void irq_disable(struct intel_engine_cs *engine) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; - unsigned int result; - - spin_lock_irqsave(&b->irq_lock, flags); - result = __intel_breadcrumbs_wakeup(b); - spin_unlock_irqrestore(&b->irq_lock, flags); - - return result; -} + if (!engine->irq_disable) + return; -static unsigned long wait_timeout(void) -{ - return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); + /* Caller disables interrupts */ + spin_lock(&engine->i915->irq_lock); + engine->irq_disable(engine); + spin_unlock(&engine->i915->irq_lock); } -static noinline void missed_breadcrumb(struct intel_engine_cs *engine) +static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) { - if (GEM_SHOW_DEBUG()) { - struct drm_printer p = drm_debug_printer(__func__); + lockdep_assert_held(&b->irq_lock); - intel_engine_dump(engine, &p, - "%s missed breadcrumb at %pS\n", - engine->name, __builtin_return_address(0)); - } + GEM_BUG_ON(!b->irq_enabled); + if (!--b->irq_enabled) + irq_disable(container_of(b, + struct intel_engine_cs, + breadcrumbs)); - set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + b->irq_armed = false; } -static void intel_breadcrumbs_hangcheck(struct timer_list *t) +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine = - from_timer(engine, t, breadcrumbs.hangcheck); struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned int irq_count; if (!b->irq_armed) return; - irq_count = READ_ONCE(b->irq_count); - if (b->hangcheck_interrupts != irq_count) { - b->hangcheck_interrupts = irq_count; - mod_timer(&b->hangcheck, wait_timeout()); - return; - } + spin_lock_irq(&b->irq_lock); + if (b->irq_armed) + __intel_breadcrumbs_disarm_irq(b); + spin_unlock_irq(&b->irq_lock); +} - /* We keep the hangcheck timer alive until we disarm the irq, even - * if there are no waiters at present. - * - * If the waiter was currently running, assume it hasn't had a chance - * to process the pending interrupt (e.g, low priority task on a loaded - * system) and wait until it sleeps before declaring a missed interrupt. - * - * If the waiter was asleep (and not even pending a wakeup), then we - * must have missed an interrupt as the GPU has stopped advancing - * but we still have a waiter. Assuming all batches complete within - * DRM_I915_HANGCHECK_JIFFIES [1.5s]! - */ - if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { - missed_breadcrumb(engine); - mod_timer(&b->fake_irq, jiffies + 1); - } else { - mod_timer(&b->hangcheck, wait_timeout()); - } +static inline bool __request_completed(const struct i915_request *rq) +{ + return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); } -static void intel_breadcrumbs_fake_irq(struct timer_list *t) +bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine = - from_timer(engine, t, breadcrumbs.fake_irq); struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_context *ce, *cn; + struct list_head *pos, *next; + LIST_HEAD(signal); - /* - * The timer persists in case we cannot enable interrupts, - * or if we have previously seen seqno/interrupt incoherency - * ("missed interrupt" syndrome, better known as a "missed breadcrumb"). - * Here the worker will wake up every jiffie in order to kick the - * oldest waiter to do the coherent seqno check. - */ + spin_lock(&b->irq_lock); - spin_lock_irq(&b->irq_lock); - if (b->irq_armed && !__intel_breadcrumbs_wakeup(b)) - __intel_engine_disarm_breadcrumbs(engine); - spin_unlock_irq(&b->irq_lock); - if (!b->irq_armed) - return; + if (b->irq_armed && list_empty(&b->signalers)) + __intel_breadcrumbs_disarm_irq(b); - /* If the user has disabled the fake-irq, restore the hangchecking */ - if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) { - mod_timer(&b->hangcheck, wait_timeout()); - return; - } + list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { + GEM_BUG_ON(list_empty(&ce->signals)); - mod_timer(&b->fake_irq, jiffies + 1); -} + list_for_each_safe(pos, next, &ce->signals) { + struct i915_request *rq = + list_entry(pos, typeof(*rq), signal_link); -static void irq_enable(struct intel_engine_cs *engine) -{ - /* - * FIXME: Ideally we want this on the API boundary, but for the - * sake of testing with mock breadcrumbs (no HW so unable to - * enable irqs) we place it deep within the bowels, at the point - * of no return. - */ - GEM_BUG_ON(!intel_irqs_enabled(engine->i915)); + if (!__request_completed(rq)) + break; - /* Caller disables interrupts */ - if (engine->irq_enable) { - spin_lock(&engine->i915->irq_lock); - engine->irq_enable(engine); - spin_unlock(&engine->i915->irq_lock); + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, + &rq->fence.flags)); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + + /* + * We may race with direct invocation of + * dma_fence_signal(), e.g. i915_request_retire(), + * in which case we can skip processing it ourselves. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags)) + continue; + + /* + * Queue for execution after dropping the signaling + * spinlock as the callback chain may end up adding + * more signalers to the same context or engine. + */ + i915_request_get(rq); + list_add_tail(&rq->signal_link, &signal); + } + + /* + * We process the list deletion in bulk, only using a list_add + * (not list_move) above but keeping the status of + * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit. + */ + if (!list_is_first(pos, &ce->signals)) { + /* Advance the list to the first incomplete request */ + __list_del_many(&ce->signals, pos); + if (&ce->signals == pos) /* now empty */ + list_del_init(&ce->signal_link); + } } -} -static void irq_disable(struct intel_engine_cs *engine) -{ - /* Caller disables interrupts */ - if (engine->irq_disable) { - spin_lock(&engine->i915->irq_lock); - engine->irq_disable(engine); - spin_unlock(&engine->i915->irq_lock); + spin_unlock(&b->irq_lock); + + list_for_each_safe(pos, next, &signal) { + struct i915_request *rq = + list_entry(pos, typeof(*rq), signal_link); + + dma_fence_signal(&rq->fence); + i915_request_put(rq); } + + return !list_empty(&signal); } -void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; + bool result; - lockdep_assert_held(&b->irq_lock); - GEM_BUG_ON(b->irq_wait); - GEM_BUG_ON(!b->irq_armed); + local_irq_disable(); + result = intel_engine_breadcrumbs_irq(engine); + local_irq_enable(); - GEM_BUG_ON(!b->irq_enabled); - if (!--b->irq_enabled) - irq_disable(engine); + return result; +} - b->irq_armed = false; +static void signal_irq_work(struct irq_work *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), breadcrumbs.irq_work); + + intel_engine_breadcrumbs_irq(engine); } void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) @@ -221,646 +193,155 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine) spin_unlock_irq(&b->irq_lock); } -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct intel_wait *wait, *n; - - if (!b->irq_armed) - return; - - /* - * We only disarm the irq when we are idle (all requests completed), - * so if the bottom-half remains asleep, it missed the request - * completion. - */ - if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) - missed_breadcrumb(engine); - - spin_lock_irq(&b->rb_lock); - - spin_lock(&b->irq_lock); - b->irq_wait = NULL; - if (b->irq_armed) - __intel_engine_disarm_breadcrumbs(engine); - spin_unlock(&b->irq_lock); - - rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { - GEM_BUG_ON(!intel_engine_signaled(engine, wait->seqno)); - RB_CLEAR_NODE(&wait->node); - wake_up_process(wait->tsk); - } - b->waiters = RB_ROOT; - - spin_unlock_irq(&b->rb_lock); -} - -static bool use_fake_irq(const struct intel_breadcrumbs *b) -{ - const struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); - - if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) - return false; - - /* - * Only start with the heavy weight fake irq timer if we have not - * seen any interrupts since enabling it the first time. If the - * interrupts are still arriving, it means we made a mistake in our - * engine->seqno_barrier(), a timing error that should be transient - * and unlikely to reoccur. - */ - return READ_ONCE(b->irq_count) == b->hangcheck_interrupts; -} - -static void enable_fake_irq(struct intel_breadcrumbs *b) -{ - /* Ensure we never sleep indefinitely */ - if (!b->irq_enabled || use_fake_irq(b)) - mod_timer(&b->fake_irq, jiffies + 1); - else - mod_timer(&b->hangcheck, wait_timeout()); -} - -static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) +static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); - struct drm_i915_private *i915 = engine->i915; - bool enabled; lockdep_assert_held(&b->irq_lock); if (b->irq_armed) - return false; + return; - /* The breadcrumb irq will be disarmed on the interrupt after the + /* + * The breadcrumb irq will be disarmed on the interrupt after the * waiters are signaled. This gives us a single interrupt window in * which we can add a new waiter and avoid the cost of re-enabling * the irq. */ b->irq_armed = true; - if (I915_SELFTEST_ONLY(b->mock)) { - /* For our mock objects we want to avoid interaction - * with the real hardware (which is not set up). So - * we simply pretend we have enabled the powerwell - * and the irq, and leave it up to the mock - * implementation to call intel_engine_wakeup() - * itself when it wants to simulate a user interrupt, - */ - return true; - } - - /* Since we are waiting on a request, the GPU should be busy + /* + * Since we are waiting on a request, the GPU should be busy * and should have its own rpm reference. This is tracked * by i915->gt.awake, we can forgo holding our own wakref * for the interrupt as before i915->gt.awake is released (when * the driver is idle) we disarm the breadcrumbs. */ - /* No interrupts? Kick the waiter every jiffie! */ - enabled = false; - if (!b->irq_enabled++ && - !test_bit(engine->id, &i915->gpu_error.test_irq_rings)) { + if (!b->irq_enabled++) irq_enable(engine); - enabled = true; - } - - enable_fake_irq(b); - return enabled; -} - -static inline struct intel_wait *to_wait(struct rb_node *node) -{ - return rb_entry(node, struct intel_wait, node); -} - -static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, - struct intel_wait *wait) -{ - lockdep_assert_held(&b->rb_lock); - GEM_BUG_ON(b->irq_wait == wait); - - /* - * This request is completed, so remove it from the tree, mark it as - * complete, and *then* wake up the associated task. N.B. when the - * task wakes up, it will find the empty rb_node, discern that it - * has already been removed from the tree and skip the serialisation - * of the b->rb_lock and b->irq_lock. This means that the destruction - * of the intel_wait is not serialised with the interrupt handler - * by the waiter - it must instead be serialised by the caller. - */ - rb_erase(&wait->node, &b->waiters); - RB_CLEAR_NODE(&wait->node); - - if (wait->tsk->state != TASK_RUNNING) - wake_up_process(wait->tsk); /* implicit smp_wmb() */ -} - -static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, - struct rb_node *next) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock(&b->irq_lock); - GEM_BUG_ON(!b->irq_armed); - GEM_BUG_ON(!b->irq_wait); - b->irq_wait = to_wait(next); - spin_unlock(&b->irq_lock); - - /* We always wake up the next waiter that takes over as the bottom-half - * as we may delegate not only the irq-seqno barrier to the next waiter - * but also the task of waking up concurrent waiters. - */ - if (next) - wake_up_process(to_wait(next)->tsk); } -static bool __intel_engine_add_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct rb_node **p, *parent, *completed; - bool first, armed; - u32 seqno; - - GEM_BUG_ON(!wait->seqno); - - /* Insert the request into the retirement ordered list - * of waiters by walking the rbtree. If we are the oldest - * seqno in the tree (the first to be retired), then - * set ourselves as the bottom-half. - * - * As we descend the tree, prune completed branches since we hold the - * spinlock we know that the first_waiter must be delayed and can - * reduce some of the sequential wake up latency if we take action - * ourselves and wake up the completed tasks in parallel. Also, by - * removing stale elements in the tree, we may be able to reduce the - * ping-pong between the old bottom-half and ourselves as first-waiter. - */ - armed = false; - first = true; - parent = NULL; - completed = NULL; - seqno = intel_engine_get_seqno(engine); - - /* If the request completed before we managed to grab the spinlock, - * return now before adding ourselves to the rbtree. We let the - * current bottom-half handle any pending wakeups and instead - * try and get out of the way quickly. - */ - if (i915_seqno_passed(seqno, wait->seqno)) { - RB_CLEAR_NODE(&wait->node); - return first; - } - - p = &b->waiters.rb_node; - while (*p) { - parent = *p; - if (wait->seqno == to_wait(parent)->seqno) { - /* We have multiple waiters on the same seqno, select - * the highest priority task (that with the smallest - * task->prio) to serve as the bottom-half for this - * group. - */ - if (wait->tsk->prio > to_wait(parent)->tsk->prio) { - p = &parent->rb_right; - first = false; - } else { - p = &parent->rb_left; - } - } else if (i915_seqno_passed(wait->seqno, - to_wait(parent)->seqno)) { - p = &parent->rb_right; - if (i915_seqno_passed(seqno, to_wait(parent)->seqno)) - completed = parent; - else - first = false; - } else { - p = &parent->rb_left; - } - } - rb_link_node(&wait->node, parent, p); - rb_insert_color(&wait->node, &b->waiters); - - if (first) { - spin_lock(&b->irq_lock); - b->irq_wait = wait; - /* After assigning ourselves as the new bottom-half, we must - * perform a cursory check to prevent a missed interrupt. - * Either we miss the interrupt whilst programming the hardware, - * or if there was a previous waiter (for a later seqno) they - * may be woken instead of us (due to the inherent race - * in the unlocked read of b->irq_seqno_bh in the irq handler) - * and so we miss the wake up. - */ - armed = __intel_breadcrumbs_enable_irq(b); - spin_unlock(&b->irq_lock); - } - - if (completed) { - /* Advance the bottom-half (b->irq_wait) before we wake up - * the waiters who may scribble over their intel_wait - * just as the interrupt handler is dereferencing it via - * b->irq_wait. - */ - if (!first) { - struct rb_node *next = rb_next(completed); - GEM_BUG_ON(next == &wait->node); - __intel_breadcrumbs_next(engine, next); - } - do { - struct intel_wait *crumb = to_wait(completed); - completed = rb_prev(completed); - __intel_breadcrumbs_finish(b, crumb); - } while (completed); - } - - GEM_BUG_ON(!b->irq_wait); - GEM_BUG_ON(!b->irq_armed); - GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node); + spin_lock_init(&b->irq_lock); + INIT_LIST_HEAD(&b->signalers); - return armed; + init_irq_work(&b->irq_work, signal_irq_work); } -bool intel_engine_add_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) +void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - bool armed; - - spin_lock_irq(&b->rb_lock); - armed = __intel_engine_add_wait(engine, wait); - spin_unlock_irq(&b->rb_lock); - if (armed) - return armed; - - /* Make the caller recheck if its request has already started. */ - return intel_engine_has_started(engine, wait->seqno); -} + unsigned long flags; -static inline bool chain_wakeup(struct rb_node *rb, int priority) -{ - return rb && to_wait(rb)->tsk->prio <= priority; -} + spin_lock_irqsave(&b->irq_lock, flags); -static inline int wakeup_priority(struct intel_breadcrumbs *b, - struct task_struct *tsk) -{ - if (tsk == b->signaler) - return INT_MIN; + if (b->irq_enabled) + irq_enable(engine); else - return tsk->prio; -} - -static void __intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - lockdep_assert_held(&b->rb_lock); - - if (RB_EMPTY_NODE(&wait->node)) - goto out; - - if (b->irq_wait == wait) { - const int priority = wakeup_priority(b, wait->tsk); - struct rb_node *next; - - /* We are the current bottom-half. Find the next candidate, - * the first waiter in the queue on the remaining oldest - * request. As multiple seqnos may complete in the time it - * takes us to wake up and find the next waiter, we have to - * wake up that waiter for it to perform its own coherent - * completion check. - */ - next = rb_next(&wait->node); - if (chain_wakeup(next, priority)) { - /* If the next waiter is already complete, - * wake it up and continue onto the next waiter. So - * if have a small herd, they will wake up in parallel - * rather than sequentially, which should reduce - * the overall latency in waking all the completed - * clients. - * - * However, waking up a chain adds extra latency to - * the first_waiter. This is undesirable if that - * waiter is a high priority task. - */ - u32 seqno = intel_engine_get_seqno(engine); - - while (i915_seqno_passed(seqno, to_wait(next)->seqno)) { - struct rb_node *n = rb_next(next); - - __intel_breadcrumbs_finish(b, to_wait(next)); - next = n; - if (!chain_wakeup(next, priority)) - break; - } - } - - __intel_breadcrumbs_next(engine, next); - } else { - GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); - } - - GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); - rb_erase(&wait->node, &b->waiters); - RB_CLEAR_NODE(&wait->node); + irq_disable(engine); -out: - GEM_BUG_ON(b->irq_wait == wait); - GEM_BUG_ON(rb_first(&b->waiters) != - (b->irq_wait ? &b->irq_wait->node : NULL)); + spin_unlock_irqrestore(&b->irq_lock, flags); } -void intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - /* Quick check to see if this waiter was already decoupled from - * the tree by the bottom-half to avoid contention on the spinlock - * by the herd. - */ - if (RB_EMPTY_NODE(&wait->node)) { - GEM_BUG_ON(READ_ONCE(b->irq_wait) == wait); - return; - } - - spin_lock_irq(&b->rb_lock); - __intel_engine_remove_wait(engine, wait); - spin_unlock_irq(&b->rb_lock); } -static void signaler_set_rtpriority(void) +bool i915_request_enable_breadcrumb(struct i915_request *rq) { - struct sched_param param = { .sched_priority = 1 }; - - sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); -} + struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; -static int intel_breadcrumbs_signaler(void *arg) -{ - struct intel_engine_cs *engine = arg; - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct i915_request *rq, *n; + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); - /* Install ourselves with high priority to reduce signalling latency */ - signaler_set_rtpriority(); + if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) + return true; - do { - bool do_schedule = true; - LIST_HEAD(list); - u32 seqno; + spin_lock(&b->irq_lock); + if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) && + !__request_completed(rq)) { + struct intel_context *ce = rq->hw_context; + struct list_head *pos; - set_current_state(TASK_INTERRUPTIBLE); - if (list_empty(&b->signals)) - goto sleep; + __intel_breadcrumbs_arm_irq(b); /* - * We are either woken up by the interrupt bottom-half, - * or by a client adding a new signaller. In both cases, - * the GPU seqno may have advanced beyond our oldest signal. - * If it has, propagate the signal, remove the waiter and - * check again with the next oldest signal. Otherwise we - * need to wait for a new interrupt from the GPU or for - * a new client. + * We keep the seqno in retirement order, so we can break + * inside intel_engine_breadcrumbs_irq as soon as we've passed + * the last completed request (or seen a request that hasn't + * event started). We could iterate the timeline->requests list, + * but keeping a separate signalers_list has the advantage of + * hopefully being much smaller than the full list and so + * provides faster iteration and detection when there are no + * more interrupts required for this context. + * + * We typically expect to add new signalers in order, so we + * start looking for our insertion point from the tail of + * the list. */ - seqno = intel_engine_get_seqno(engine); + list_for_each_prev(pos, &ce->signals) { + struct i915_request *it = + list_entry(pos, typeof(*it), signal_link); - spin_lock_irq(&b->rb_lock); - list_for_each_entry_safe(rq, n, &b->signals, signaling.link) { - u32 this = rq->signaling.wait.seqno; - - GEM_BUG_ON(!rq->signaling.wait.seqno); - - if (!i915_seqno_passed(seqno, this)) + if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) break; - - if (likely(this == i915_request_global_seqno(rq))) { - __intel_engine_remove_wait(engine, - &rq->signaling.wait); - - rq->signaling.wait.seqno = 0; - __list_del_entry(&rq->signaling.link); - - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &rq->fence.flags)) { - list_add_tail(&rq->signaling.link, - &list); - i915_request_get(rq); - } - } } - spin_unlock_irq(&b->rb_lock); - - if (!list_empty(&list)) { - local_bh_disable(); - list_for_each_entry_safe(rq, n, &list, signaling.link) { - dma_fence_signal(&rq->fence); - GEM_BUG_ON(!i915_request_completed(rq)); - i915_request_put(rq); - } - local_bh_enable(); /* kick start the tasklets */ - - /* - * If the engine is saturated we may be continually - * processing completed requests. This angers the - * NMI watchdog if we never let anything else - * have access to the CPU. Let's pretend to be nice - * and relinquish the CPU if we burn through the - * entire RT timeslice! - */ - do_schedule = need_resched(); - } - - if (unlikely(do_schedule)) { -sleep: - if (kthread_should_park()) - kthread_parkme(); - - if (unlikely(kthread_should_stop())) - break; + list_add(&rq->signal_link, pos); + if (pos == &ce->signals) /* catch transitions from empty list */ + list_move_tail(&ce->signal_link, &b->signalers); - schedule(); - } - } while (1); - __set_current_state(TASK_RUNNING); - - return 0; -} - -static void insert_signal(struct intel_breadcrumbs *b, - struct i915_request *request, - const u32 seqno) -{ - struct i915_request *iter; - - lockdep_assert_held(&b->rb_lock); - - /* - * A reasonable assumption is that we are called to add signals - * in sequence, as the requests are submitted for execution and - * assigned a global_seqno. This will be the case for the majority - * of internally generated signals (inter-engine signaling). - * - * Out of order waiters triggering random signaling enabling will - * be more problematic, but hopefully rare enough and the list - * small enough that the O(N) insertion sort is not an issue. - */ - - list_for_each_entry_reverse(iter, &b->signals, signaling.link) - if (i915_seqno_passed(seqno, iter->signaling.wait.seqno)) - break; - - list_add(&request->signaling.link, &iter->signaling.link); -} - -bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup) -{ - struct intel_engine_cs *engine = request->engine; - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct intel_wait *wait = &request->signaling.wait; - u32 seqno; - - /* - * Note that we may be called from an interrupt handler on another - * device (e.g. nouveau signaling a fence completion causing us - * to submit a request, and so enable signaling). As such, - * we need to make sure that all other users of b->rb_lock protect - * against interrupts, i.e. use spin_lock_irqsave. - */ - - /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ - GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&request->lock); - - seqno = i915_request_global_seqno(request); - if (!seqno) /* will be enabled later upon execution */ - return true; - - GEM_BUG_ON(wait->seqno); - wait->tsk = b->signaler; - wait->request = request; - wait->seqno = seqno; - - /* - * Add ourselves into the list of waiters, but registering our - * bottom-half as the signaller thread. As per usual, only the oldest - * waiter (not just signaller) is tasked as the bottom-half waking - * up all completed waiters after the user interrupt. - * - * If we are the oldest waiter, enable the irq (after which we - * must double check that the seqno did not complete). - */ - spin_lock(&b->rb_lock); - insert_signal(b, request, seqno); - wakeup &= __intel_engine_add_wait(engine, wait); - spin_unlock(&b->rb_lock); - - if (wakeup) { - wake_up_process(b->signaler); - return !intel_wait_complete(wait); + set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); } + spin_unlock(&b->irq_lock); - return true; + return !__request_completed(rq); } -void intel_engine_cancel_signaling(struct i915_request *request) +void i915_request_cancel_breadcrumb(struct i915_request *rq) { - struct intel_engine_cs *engine = request->engine; - struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&request->lock); - - if (!READ_ONCE(request->signaling.wait.seqno)) + if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) return; - spin_lock(&b->rb_lock); - __intel_engine_remove_wait(engine, &request->signaling.wait); - if (fetch_and_zero(&request->signaling.wait.seqno)) - __list_del_entry(&request->signaling.link); - spin_unlock(&b->rb_lock); -} - -int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct task_struct *tsk; - - spin_lock_init(&b->rb_lock); - spin_lock_init(&b->irq_lock); - - timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); - timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); - - INIT_LIST_HEAD(&b->signals); - - /* Spawn a thread to provide a common bottom-half for all signals. - * As this is an asynchronous interface we cannot steal the current - * task for handling the bottom-half to the user interrupt, therefore - * we create a thread to do the coherent seqno dance after the - * interrupt and then signal the waitqueue (via the dma-buf/fence). - */ - tsk = kthread_run(intel_breadcrumbs_signaler, engine, - "i915/signal:%d", engine->id); - if (IS_ERR(tsk)) - return PTR_ERR(tsk); - - b->signaler = tsk; - - return 0; -} - -static void cancel_fake_irq(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */ - del_timer_sync(&b->hangcheck); - clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); -} - -void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; - - spin_lock_irqsave(&b->irq_lock, flags); - - /* - * Leave the fake_irq timer enabled (if it is running), but clear the - * bit so that it turns itself off on its next wake up and goes back - * to the long hangcheck interval if still required. - */ - clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + spin_lock(&b->irq_lock); + if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { + struct intel_context *ce = rq->hw_context; - if (b->irq_enabled) - irq_enable(engine); - else - irq_disable(engine); + list_del(&rq->signal_link); + if (list_empty(&ce->signals)) + list_del_init(&ce->signal_link); - spin_unlock_irqrestore(&b->irq_lock, flags); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + } + spin_unlock(&b->irq_lock); } -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p) { struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_context *ce; + struct i915_request *rq; - /* The engines should be idle and all requests accounted for! */ - WARN_ON(READ_ONCE(b->irq_wait)); - WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); - WARN_ON(!list_empty(&b->signals)); + if (list_empty(&b->signalers)) + return; - if (!IS_ERR_OR_NULL(b->signaler)) - kthread_stop(b->signaler); + drm_printf(p, "Signals:\n"); - cancel_fake_irq(engine); + spin_lock_irq(&b->irq_lock); + list_for_each_entry(ce, &b->signalers, signal_link) { + list_for_each_entry(rq, &ce->signals, signal_link) { + drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", + rq->fence.context, rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + jiffies_to_msecs(jiffies - rq->emitted_jiffies)); + } + } + spin_unlock_irq(&b->irq_lock); } - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_breadcrumbs.c" -#endif diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 2021e484a287..15ba950dee00 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -218,7 +218,7 @@ static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) }; const unsigned int *vco_table; unsigned int vco; - uint8_t tmp = 0; + u8 tmp = 0; /* FIXME other chipsets? */ if (IS_GM45(dev_priv)) @@ -249,13 +249,13 @@ static void g33_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; - static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; - static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; - static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; - const uint8_t *div_table; + static const u8 div_3200[] = { 12, 10, 8, 7, 5, 16 }; + static const u8 div_4000[] = { 14, 12, 10, 8, 6, 20 }; + static const u8 div_4800[] = { 20, 14, 12, 10, 8, 24 }; + static const u8 div_5333[] = { 20, 16, 12, 12, 8, 28 }; + const u8 *div_table; unsigned int cdclk_sel; - uint16_t tmp = 0; + u16 tmp = 0; cdclk_state->vco = intel_hpll_vco(dev_priv); @@ -330,12 +330,12 @@ static void i965gm_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 16, 10, 8 }; - static const uint8_t div_4000[] = { 20, 12, 10 }; - static const uint8_t div_5333[] = { 24, 16, 14 }; - const uint8_t *div_table; + static const u8 div_3200[] = { 16, 10, 8 }; + static const u8 div_4000[] = { 20, 12, 10 }; + static const u8 div_5333[] = { 24, 16, 14 }; + const u8 *div_table; unsigned int cdclk_sel; - uint16_t tmp = 0; + u16 tmp = 0; cdclk_state->vco = intel_hpll_vco(dev_priv); @@ -375,7 +375,7 @@ static void gm45_get_cdclk(struct drm_i915_private *dev_priv, { struct pci_dev *pdev = dev_priv->drm.pdev; unsigned int cdclk_sel; - uint16_t tmp = 0; + u16 tmp = 0; cdclk_state->vco = intel_hpll_vco(dev_priv); @@ -403,8 +403,8 @@ static void gm45_get_cdclk(struct drm_i915_private *dev_priv, static void hsw_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + u32 lcpll = I915_READ(LCPLL_CTL); + u32 freq = lcpll & LCPLL_CLK_FREQ_MASK; if (lcpll & LCPLL_CD_SOURCE_FCLK) cdclk_state->cdclk = 800000; @@ -520,6 +520,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_state->cdclk; u32 val, cmd = cdclk_state->voltage_level; + intel_wakeref_t wakeref; switch (cdclk) { case 400000: @@ -539,7 +540,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, * a system suspend. So grab the PIPE-A domain, which covers * the HW blocks needed for the following programming. */ - intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); @@ -593,7 +594,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, vlv_program_pfi_credits(dev_priv); - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); + intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A, wakeref); } static void chv_set_cdclk(struct drm_i915_private *dev_priv, @@ -601,6 +602,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_state->cdclk; u32 val, cmd = cdclk_state->voltage_level; + intel_wakeref_t wakeref; switch (cdclk) { case 333333: @@ -619,7 +621,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, * a system suspend. So grab the PIPE-A domain, which covers * the HW blocks needed for the following programming. */ - intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); @@ -637,7 +639,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, vlv_program_pfi_credits(dev_priv); - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); + intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A, wakeref); } static int bdw_calc_cdclk(int min_cdclk) @@ -670,8 +672,8 @@ static u8 bdw_calc_voltage_level(int cdclk) static void bdw_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + u32 lcpll = I915_READ(LCPLL_CTL); + u32 freq = lcpll & LCPLL_CLK_FREQ_MASK; if (lcpll & LCPLL_CD_SOURCE_FCLK) cdclk_state->cdclk = 800000; @@ -698,7 +700,7 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state) { int cdclk = cdclk_state->cdclk; - uint32_t val; + u32 val; int ret; if (WARN((I915_READ(LCPLL_CTL) & @@ -1081,7 +1083,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) { - uint32_t cdctl, expected; + u32 cdctl, expected; /* * check if the pre-os initialized the display @@ -2688,7 +2690,7 @@ static int vlv_hrawclk(struct drm_i915_private *dev_priv) static int g4x_hrawclk(struct drm_i915_private *dev_priv) { - uint32_t clkcfg; + u32 clkcfg; /* hrawclock is 1/4 the FSB frequency */ clkcfg = I915_READ(CLKCFG); diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 37fd9ddf762e..71a1f12c6b2a 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -74,12 +74,12 @@ #define ILK_CSC_COEFF_1_0 \ ((7 << 12) | ILK_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) -static bool lut_is_legacy(struct drm_property_blob *lut) +static bool lut_is_legacy(const struct drm_property_blob *lut) { return drm_color_lut_size(lut) == LEGACY_LUT_LENGTH; } -static bool crtc_state_is_legacy_gamma(struct intel_crtc_state *crtc_state) +static bool crtc_state_is_legacy_gamma(const struct intel_crtc_state *crtc_state) { return !crtc_state->base.degamma_lut && !crtc_state->base.ctm && @@ -115,8 +115,8 @@ static u64 *ctm_mult_by_limited(u64 *result, const u64 *input) static void ilk_load_ycbcr_conversion_matrix(struct intel_crtc *crtc) { - int pipe = crtc->pipe; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; I915_WRITE(PIPE_CSC_PREOFF_HI(pipe), 0); I915_WRITE(PIPE_CSC_PREOFF_ME(pipe), 0); @@ -137,13 +137,14 @@ static void ilk_load_ycbcr_conversion_matrix(struct intel_crtc *crtc) I915_WRITE(PIPE_CSC_MODE(pipe), 0); } -static void ilk_load_csc_matrix(struct intel_crtc_state *crtc_state) +static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int i, pipe = crtc->pipe; - uint16_t coeffs[9] = { 0, }; bool limited_color_range = false; + enum pipe pipe = crtc->pipe; + u16 coeffs[9] = {}; + int i; /* * FIXME if there's a gamma LUT after the CSC, we should @@ -171,7 +172,7 @@ static void ilk_load_csc_matrix(struct intel_crtc_state *crtc_state) * hardware. */ for (i = 0; i < ARRAY_SIZE(coeffs); i++) { - uint64_t abs_coeff = ((1ULL << 63) - 1) & input[i]; + u64 abs_coeff = ((1ULL << 63) - 1) & input[i]; /* * Clamp input value to min/max supported by @@ -233,7 +234,7 @@ static void ilk_load_csc_matrix(struct intel_crtc_state *crtc_state) I915_WRITE(PIPE_CSC_PREOFF_LO(pipe), 0); if (INTEL_GEN(dev_priv) > 6) { - uint16_t postoff = 0; + u16 postoff = 0; if (limited_color_range) postoff = (16 * (1 << 12) / 255) & 0x1fff; @@ -244,7 +245,7 @@ static void ilk_load_csc_matrix(struct intel_crtc_state *crtc_state) I915_WRITE(PIPE_CSC_MODE(pipe), 0); } else { - uint32_t mode = CSC_MODE_YUV_TO_RGB; + u32 mode = CSC_MODE_YUV_TO_RGB; if (limited_color_range) mode |= CSC_BLACK_SCREEN_OFFSET; @@ -256,20 +257,20 @@ static void ilk_load_csc_matrix(struct intel_crtc_state *crtc_state) /* * Set up the pipe CSC unit on CherryView. */ -static void cherryview_load_csc_matrix(struct intel_crtc_state *crtc_state) +static void cherryview_load_csc_matrix(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc_state->base.crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; - uint32_t mode; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + u32 mode; if (crtc_state->base.ctm) { - struct drm_color_ctm *ctm = crtc_state->base.ctm->data; - uint16_t coeffs[9] = { 0, }; + const struct drm_color_ctm *ctm = crtc_state->base.ctm->data; + u16 coeffs[9] = {}; int i; for (i = 0; i < ARRAY_SIZE(coeffs); i++) { - uint64_t abs_coeff = + u64 abs_coeff = ((1ULL << 63) - 1) & ctm->matrix[i]; /* Round coefficient. */ @@ -303,25 +304,16 @@ static void cherryview_load_csc_matrix(struct intel_crtc_state *crtc_state) I915_WRITE(CGM_PIPE_MODE(pipe), mode); } -void intel_color_set_csc(struct intel_crtc_state *crtc_state) -{ - struct drm_device *dev = crtc_state->base.crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - - if (dev_priv->display.load_csc_matrix) - dev_priv->display.load_csc_matrix(crtc_state); -} - /* Loads the legacy palette/gamma unit for the CRTC. */ -static void i9xx_load_luts_internal(struct intel_crtc_state *crtc_state, - struct drm_property_blob *blob) +static void i9xx_load_luts_internal(const struct intel_crtc_state *crtc_state, + const struct drm_property_blob *blob) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; int i; - if (HAS_GMCH_DISPLAY(dev_priv)) { + if (HAS_GMCH(dev_priv)) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) assert_dsi_pll_enabled(dev_priv); else @@ -329,23 +321,24 @@ static void i9xx_load_luts_internal(struct intel_crtc_state *crtc_state, } if (blob) { - struct drm_color_lut *lut = blob->data; + const struct drm_color_lut *lut = blob->data; + for (i = 0; i < 256; i++) { - uint32_t word = + u32 word = (drm_color_lut_extract(lut[i].red, 8) << 16) | (drm_color_lut_extract(lut[i].green, 8) << 8) | drm_color_lut_extract(lut[i].blue, 8); - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) I915_WRITE(PALETTE(pipe, i), word); else I915_WRITE(LGC_PALETTE(pipe, i), word); } } else { for (i = 0; i < 256; i++) { - uint32_t word = (i << 16) | (i << 8) | i; + u32 word = (i << 16) | (i << 8) | i; - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) I915_WRITE(PALETTE(pipe, i), word); else I915_WRITE(LGC_PALETTE(pipe, i), word); @@ -353,51 +346,37 @@ static void i9xx_load_luts_internal(struct intel_crtc_state *crtc_state, } } -static void i9xx_load_luts(struct intel_crtc_state *crtc_state) +static void i9xx_load_luts(const struct intel_crtc_state *crtc_state) { i9xx_load_luts_internal(crtc_state, crtc_state->base.gamma_lut); } -/* Loads the legacy palette/gamma unit for the CRTC on Haswell. */ -static void haswell_load_luts(struct intel_crtc_state *crtc_state) +static void hsw_color_commit(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - bool reenable_ips = false; - - /* - * Workaround : Do not read or write the pipe palette/gamma data while - * GAMMA_MODE is configured for split gamma and IPS_CTL has IPS enabled. - */ - if (IS_HASWELL(dev_priv) && crtc_state->ips_enabled && - (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT)) { - hsw_disable_ips(crtc_state); - reenable_ips = true; - } - - crtc_state->gamma_mode = GAMMA_MODE_MODE_8BIT; - I915_WRITE(GAMMA_MODE(crtc->pipe), GAMMA_MODE_MODE_8BIT); - i9xx_load_luts(crtc_state); + I915_WRITE(GAMMA_MODE(crtc->pipe), crtc_state->gamma_mode); - if (reenable_ips) - hsw_enable_ips(crtc_state); + ilk_load_csc_matrix(crtc_state); } -static void bdw_load_degamma_lut(struct intel_crtc_state *crtc_state) +static void bdw_load_degamma_lut(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; - uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; + u32 i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; + enum pipe pipe = crtc->pipe; I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT); - if (crtc_state->base.degamma_lut) { - struct drm_color_lut *lut = crtc_state->base.degamma_lut->data; + if (degamma_lut) { + const struct drm_color_lut *lut = degamma_lut->data; for (i = 0; i < lut_size; i++) { - uint32_t word = + u32 word = drm_color_lut_extract(lut[i].red, 10) << 20 | drm_color_lut_extract(lut[i].green, 10) << 10 | drm_color_lut_extract(lut[i].blue, 10); @@ -406,7 +385,7 @@ static void bdw_load_degamma_lut(struct intel_crtc_state *crtc_state) } } else { for (i = 0; i < lut_size; i++) { - uint32_t v = (i * ((1 << 10) - 1)) / (lut_size - 1); + u32 v = (i * ((1 << 10) - 1)) / (lut_size - 1); I915_WRITE(PREC_PAL_DATA(pipe), (v << 20) | (v << 10) | v); @@ -414,11 +393,13 @@ static void bdw_load_degamma_lut(struct intel_crtc_state *crtc_state) } } -static void bdw_load_gamma_lut(struct intel_crtc_state *crtc_state, u32 offset) +static void bdw_load_gamma_lut(const struct intel_crtc_state *crtc_state, u32 offset) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; - uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; + u32 i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + enum pipe pipe = crtc->pipe; WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK); @@ -427,11 +408,11 @@ static void bdw_load_gamma_lut(struct intel_crtc_state *crtc_state, u32 offset) PAL_PREC_AUTO_INCREMENT | offset); - if (crtc_state->base.gamma_lut) { - struct drm_color_lut *lut = crtc_state->base.gamma_lut->data; + if (gamma_lut) { + const struct drm_color_lut *lut = gamma_lut->data; for (i = 0; i < lut_size; i++) { - uint32_t word = + u32 word = (drm_color_lut_extract(lut[i].red, 10) << 20) | (drm_color_lut_extract(lut[i].green, 10) << 10) | drm_color_lut_extract(lut[i].blue, 10); @@ -449,7 +430,7 @@ static void bdw_load_gamma_lut(struct intel_crtc_state *crtc_state, u32 offset) drm_color_lut_extract(lut[i].blue, 16)); } else { for (i = 0; i < lut_size; i++) { - uint32_t v = (i * ((1 << 10) - 1)) / (lut_size - 1); + u32 v = (i * ((1 << 10) - 1)) / (lut_size - 1); I915_WRITE(PREC_PAL_DATA(pipe), (v << 20) | (v << 10) | v); @@ -462,37 +443,34 @@ static void bdw_load_gamma_lut(struct intel_crtc_state *crtc_state, u32 offset) } /* Loads the palette/gamma unit for the CRTC on Broadwell+. */ -static void broadwell_load_luts(struct intel_crtc_state *crtc_state) +static void broadwell_load_luts(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; if (crtc_state_is_legacy_gamma(crtc_state)) { - haswell_load_luts(crtc_state); - return; - } - - bdw_load_degamma_lut(crtc_state); - bdw_load_gamma_lut(crtc_state, - INTEL_INFO(dev_priv)->color.degamma_lut_size); - - crtc_state->gamma_mode = GAMMA_MODE_MODE_SPLIT; - I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_SPLIT); - POSTING_READ(GAMMA_MODE(pipe)); + i9xx_load_luts(crtc_state); + } else { + bdw_load_degamma_lut(crtc_state); + bdw_load_gamma_lut(crtc_state, + INTEL_INFO(dev_priv)->color.degamma_lut_size); - /* - * Reset the index, otherwise it prevents the legacy palette to be - * written properly. - */ - I915_WRITE(PREC_PAL_INDEX(pipe), 0); + /* + * Reset the index, otherwise it prevents the legacy palette to be + * written properly. + */ + I915_WRITE(PREC_PAL_INDEX(pipe), 0); + } } -static void glk_load_degamma_lut(struct intel_crtc_state *crtc_state) +static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; - const uint32_t lut_size = 33; - uint32_t i; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + const u32 lut_size = 33; + u32 i; /* * When setting the auto-increment bit, the hardware seems to @@ -507,7 +485,7 @@ static void glk_load_degamma_lut(struct intel_crtc_state *crtc_state) * different values per channel, so this just loads a linear table. */ for (i = 0; i < lut_size; i++) { - uint32_t v = (i * (1 << 16)) / (lut_size - 1); + u32 v = (i * (1 << 16)) / (lut_size - 1); I915_WRITE(PRE_CSC_GAMC_DATA(pipe), v); } @@ -517,48 +495,49 @@ static void glk_load_degamma_lut(struct intel_crtc_state *crtc_state) I915_WRITE(PRE_CSC_GAMC_DATA(pipe), (1 << 16)); } -static void glk_load_luts(struct intel_crtc_state *crtc_state) +static void glk_load_luts(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc_state->base.crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; glk_load_degamma_lut(crtc_state); if (crtc_state_is_legacy_gamma(crtc_state)) { - haswell_load_luts(crtc_state); - return; - } - - bdw_load_gamma_lut(crtc_state, 0); + i9xx_load_luts(crtc_state); + } else { + bdw_load_gamma_lut(crtc_state, 0); - crtc_state->gamma_mode = GAMMA_MODE_MODE_10BIT; - I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_10BIT); - POSTING_READ(GAMMA_MODE(pipe)); + /* + * Reset the index, otherwise it prevents the legacy palette to be + * written properly. + */ + I915_WRITE(PREC_PAL_INDEX(pipe), 0); + } } -/* Loads the palette/gamma unit for the CRTC on CherryView. */ -static void cherryview_load_luts(struct intel_crtc_state *crtc_state) +static void cherryview_load_luts(const struct intel_crtc_state *crtc_state) { - struct drm_crtc *crtc = crtc_state->base.crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - enum pipe pipe = to_intel_crtc(crtc)->pipe; - struct drm_color_lut *lut; - uint32_t i, lut_size; - uint32_t word0, word1; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; + const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; + enum pipe pipe = crtc->pipe; + + cherryview_load_csc_matrix(crtc_state); if (crtc_state_is_legacy_gamma(crtc_state)) { - /* Turn off degamma/gamma on CGM block. */ - I915_WRITE(CGM_PIPE_MODE(pipe), - (crtc_state->base.ctm ? CGM_PIPE_MODE_CSC : 0)); - i9xx_load_luts_internal(crtc_state, crtc_state->base.gamma_lut); + i9xx_load_luts_internal(crtc_state, gamma_lut); return; } - if (crtc_state->base.degamma_lut) { - lut = crtc_state->base.degamma_lut->data; - lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; + if (degamma_lut) { + const struct drm_color_lut *lut = degamma_lut->data; + int i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; + for (i = 0; i < lut_size; i++) { + u32 word0, word1; + /* Write LUT in U0.14 format. */ word0 = (drm_color_lut_extract(lut[i].green, 14) << 16) | @@ -570,10 +549,13 @@ static void cherryview_load_luts(struct intel_crtc_state *crtc_state) } } - if (crtc_state->base.gamma_lut) { - lut = crtc_state->base.gamma_lut->data; - lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + if (gamma_lut) { + const struct drm_color_lut *lut = gamma_lut->data; + int i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + for (i = 0; i < lut_size; i++) { + u32 word0, word1; + /* Write LUT in U0.10 format. */ word0 = (drm_color_lut_extract(lut[i].green, 10) << 16) | @@ -585,11 +567,6 @@ static void cherryview_load_luts(struct intel_crtc_state *crtc_state) } } - I915_WRITE(CGM_PIPE_MODE(pipe), - (crtc_state->base.ctm ? CGM_PIPE_MODE_CSC : 0) | - (crtc_state->base.degamma_lut ? CGM_PIPE_MODE_DEGAMMA : 0) | - (crtc_state->base.gamma_lut ? CGM_PIPE_MODE_GAMMA : 0)); - /* * Also program a linear LUT in the legacy block (behind the * CGM block). @@ -597,40 +574,73 @@ static void cherryview_load_luts(struct intel_crtc_state *crtc_state) i9xx_load_luts_internal(crtc_state, NULL); } -void intel_color_load_luts(struct intel_crtc_state *crtc_state) +void intel_color_load_luts(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc_state->base.crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); dev_priv->display.load_luts(crtc_state); } +void intel_color_commit(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + if (dev_priv->display.color_commit) + dev_priv->display.color_commit(crtc_state); +} + +static int check_lut_size(const struct drm_property_blob *lut, int expected) +{ + int len; + + if (!lut) + return 0; + + len = drm_color_lut_size(lut); + if (len != expected) { + DRM_DEBUG_KMS("Invalid LUT size; got %d, expected %d\n", + len, expected); + return -EINVAL; + } + + return 0; +} + int intel_color_check(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - size_t gamma_length, degamma_length; + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; + const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; + int gamma_length, degamma_length; + u32 gamma_tests, degamma_tests; degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size; gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size; + degamma_tests = INTEL_INFO(dev_priv)->color.degamma_lut_tests; + gamma_tests = INTEL_INFO(dev_priv)->color.gamma_lut_tests; - /* - * We allow both degamma & gamma luts at the right size or - * NULL. - */ - if ((!crtc_state->base.degamma_lut || - drm_color_lut_size(crtc_state->base.degamma_lut) == degamma_length) && - (!crtc_state->base.gamma_lut || - drm_color_lut_size(crtc_state->base.gamma_lut) == gamma_length)) + /* Always allow legacy gamma LUT with no further checking. */ + if (crtc_state_is_legacy_gamma(crtc_state)) { + crtc_state->gamma_mode = GAMMA_MODE_MODE_8BIT; return 0; + } - /* - * We also allow no degamma lut/ctm and a gamma lut at the legacy - * size (256 entries). - */ - if (crtc_state_is_legacy_gamma(crtc_state)) - return 0; + if (check_lut_size(degamma_lut, degamma_length) || + check_lut_size(gamma_lut, gamma_length)) + return -EINVAL; + + if (drm_color_lut_check(degamma_lut, degamma_tests) || + drm_color_lut_check(gamma_lut, gamma_tests)) + return -EINVAL; - return -EINVAL; + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + crtc_state->gamma_mode = GAMMA_MODE_MODE_10BIT; + else if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) + crtc_state->gamma_mode = GAMMA_MODE_MODE_SPLIT; + else + crtc_state->gamma_mode = GAMMA_MODE_MODE_8BIT; + + return 0; } void intel_color_init(struct intel_crtc *crtc) @@ -640,18 +650,17 @@ void intel_color_init(struct intel_crtc *crtc) drm_mode_crtc_set_gamma_size(&crtc->base, 256); if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->display.load_csc_matrix = cherryview_load_csc_matrix; dev_priv->display.load_luts = cherryview_load_luts; } else if (IS_HASWELL(dev_priv)) { - dev_priv->display.load_csc_matrix = ilk_load_csc_matrix; - dev_priv->display.load_luts = haswell_load_luts; + dev_priv->display.load_luts = i9xx_load_luts; + dev_priv->display.color_commit = hsw_color_commit; } else if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) { - dev_priv->display.load_csc_matrix = ilk_load_csc_matrix; dev_priv->display.load_luts = broadwell_load_luts; + dev_priv->display.color_commit = hsw_color_commit; } else if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { - dev_priv->display.load_csc_matrix = ilk_load_csc_matrix; dev_priv->display.load_luts = glk_load_luts; + dev_priv->display.color_commit = hsw_color_commit; } else { dev_priv->display.load_luts = i9xx_load_luts; } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index d9fc1601671d..3716b2ee362f 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -83,15 +83,17 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crt *crt = intel_encoder_to_crt(encoder); + intel_wakeref_t wakeref; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; ret = intel_crt_port_enabled(dev_priv, crt->adpa_reg, pipe); - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } @@ -629,19 +631,19 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) } static enum drm_connector_status -intel_crt_load_detect(struct intel_crt *crt, uint32_t pipe) +intel_crt_load_detect(struct intel_crt *crt, u32 pipe) { struct drm_device *dev = crt->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t save_bclrpat; - uint32_t save_vtotal; - uint32_t vtotal, vactive; - uint32_t vsample; - uint32_t vblank, vblank_start, vblank_end; - uint32_t dsl; + u32 save_bclrpat; + u32 save_vtotal; + u32 vtotal, vactive; + u32 vsample; + u32 vblank, vblank_start, vblank_end; + u32 dsl; i915_reg_t bclrpat_reg, vtotal_reg, vblank_reg, vsync_reg, pipeconf_reg, pipe_dsl_reg; - uint8_t st00; + u8 st00; enum drm_connector_status status; DRM_DEBUG_KMS("starting load-detect on CRT\n"); @@ -667,7 +669,7 @@ intel_crt_load_detect(struct intel_crt *crt, uint32_t pipe) I915_WRITE(bclrpat_reg, 0x500050); if (!IS_GEN(dev_priv, 2)) { - uint32_t pipeconf = I915_READ(pipeconf_reg); + u32 pipeconf = I915_READ(pipeconf_reg); I915_WRITE(pipeconf_reg, pipeconf | PIPECONF_FORCE_BORDER); POSTING_READ(pipeconf_reg); /* Wait for next Vblank to substitue @@ -688,8 +690,8 @@ intel_crt_load_detect(struct intel_crt *crt, uint32_t pipe) * Yes, this will flicker */ if (vblank_start <= vactive && vblank_end >= vtotal) { - uint32_t vsync = I915_READ(vsync_reg); - uint32_t vsync_start = (vsync & 0xffff) + 1; + u32 vsync = I915_READ(vsync_reg); + u32 vsync_start = (vsync & 0xffff) + 1; vblank_start = vsync_start; I915_WRITE(vblank_reg, @@ -777,6 +779,7 @@ intel_crt_detect(struct drm_connector *connector, struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; + intel_wakeref_t wakeref; int status, ret; struct intel_load_detect_pipe tmp; @@ -785,7 +788,8 @@ intel_crt_detect(struct drm_connector *connector, force); if (i915_modparams.load_detect_test) { - intel_display_power_get(dev_priv, intel_encoder->power_domain); + wakeref = intel_display_power_get(dev_priv, + intel_encoder->power_domain); goto load_detect; } @@ -793,7 +797,8 @@ intel_crt_detect(struct drm_connector *connector, if (dmi_check_system(intel_spurious_crt_detect)) return connector_status_disconnected; - intel_display_power_get(dev_priv, intel_encoder->power_domain); + wakeref = intel_display_power_get(dev_priv, + intel_encoder->power_domain); if (I915_HAS_HOTPLUG(dev_priv)) { /* We can not rely on the HPD pin always being correctly wired @@ -848,7 +853,7 @@ load_detect: } out: - intel_display_power_put(dev_priv, intel_encoder->power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); return status; } @@ -858,10 +863,12 @@ static int intel_crt_get_modes(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; - int ret; + intel_wakeref_t wakeref; struct i2c_adapter *i2c; + int ret; - intel_display_power_get(dev_priv, intel_encoder->power_domain); + wakeref = intel_display_power_get(dev_priv, + intel_encoder->power_domain); i2c = intel_gmbus_get_adapter(dev_priv, dev_priv->vbt.crt_ddc_pin); ret = intel_crt_ddc_get_modes(connector, i2c); @@ -873,7 +880,7 @@ static int intel_crt_get_modes(struct drm_connector *connector) ret = intel_crt_ddc_get_modes(connector, i2c); out: - intel_display_power_put(dev_priv, intel_encoder->power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index a516697bf57d..e8ac04c33e29 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -70,50 +70,50 @@ MODULE_FIRMWARE(BXT_CSR_PATH); struct intel_css_header { /* 0x09 for DMC */ - uint32_t module_type; + u32 module_type; /* Includes the DMC specific header in dwords */ - uint32_t header_len; + u32 header_len; /* always value would be 0x10000 */ - uint32_t header_ver; + u32 header_ver; /* Not used */ - uint32_t module_id; + u32 module_id; /* Not used */ - uint32_t module_vendor; + u32 module_vendor; /* in YYYYMMDD format */ - uint32_t date; + u32 date; /* Size in dwords (CSS_Headerlen + PackageHeaderLen + dmc FWsLen)/4 */ - uint32_t size; + u32 size; /* Not used */ - uint32_t key_size; + u32 key_size; /* Not used */ - uint32_t modulus_size; + u32 modulus_size; /* Not used */ - uint32_t exponent_size; + u32 exponent_size; /* Not used */ - uint32_t reserved1[12]; + u32 reserved1[12]; /* Major Minor */ - uint32_t version; + u32 version; /* Not used */ - uint32_t reserved2[8]; + u32 reserved2[8]; /* Not used */ - uint32_t kernel_header_info; + u32 kernel_header_info; } __packed; struct intel_fw_info { - uint16_t reserved1; + u16 reserved1; /* Stepping (A, B, C, ..., *). * is a wildcard */ char stepping; @@ -121,8 +121,8 @@ struct intel_fw_info { /* Sub-stepping (0, 1, ..., *). * is a wildcard */ char substepping; - uint32_t offset; - uint32_t reserved2; + u32 offset; + u32 reserved2; } __packed; struct intel_package_header { @@ -135,14 +135,14 @@ struct intel_package_header { unsigned char reserved[10]; /* Number of valid entries in the FWInfo array below */ - uint32_t num_entries; + u32 num_entries; struct intel_fw_info fw_info[20]; } __packed; struct intel_dmc_header { /* always value would be 0x40403E3E */ - uint32_t signature; + u32 signature; /* DMC binary header length */ unsigned char header_len; @@ -151,30 +151,30 @@ struct intel_dmc_header { unsigned char header_ver; /* Reserved */ - uint16_t dmcc_ver; + u16 dmcc_ver; /* Major, Minor */ - uint32_t project; + u32 project; /* Firmware program size (excluding header) in dwords */ - uint32_t fw_size; + u32 fw_size; /* Major Minor version */ - uint32_t fw_version; + u32 fw_version; /* Number of valid MMIO cycles present. */ - uint32_t mmio_count; + u32 mmio_count; /* MMIO address */ - uint32_t mmioaddr[8]; + u32 mmioaddr[8]; /* MMIO data */ - uint32_t mmiodata[8]; + u32 mmiodata[8]; /* FW filename */ unsigned char dfile[32]; - uint32_t reserved1[2]; + u32 reserved1[2]; } __packed; struct stepping_info { @@ -230,7 +230,7 @@ intel_get_stepping_info(struct drm_i915_private *dev_priv) static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) { - uint32_t val, mask; + u32 val, mask; mask = DC_STATE_DEBUG_MASK_MEMORY_UP; @@ -257,7 +257,7 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) void intel_csr_load_program(struct drm_i915_private *dev_priv) { u32 *payload = dev_priv->csr.dmc_payload; - uint32_t i, fw_size; + u32 i, fw_size; if (!HAS_CSR(dev_priv)) { DRM_ERROR("No CSR support available for this platform\n"); @@ -289,17 +289,17 @@ void intel_csr_load_program(struct drm_i915_private *dev_priv) gen9_set_dc_state_debugmask(dev_priv); } -static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, - const struct firmware *fw) +static u32 *parse_csr_fw(struct drm_i915_private *dev_priv, + const struct firmware *fw) { struct intel_css_header *css_header; struct intel_package_header *package_header; struct intel_dmc_header *dmc_header; struct intel_csr *csr = &dev_priv->csr; const struct stepping_info *si = intel_get_stepping_info(dev_priv); - uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes; - uint32_t i; - uint32_t *dmc_payload; + u32 dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes; + u32 i; + u32 *dmc_payload; if (!fw) return NULL; @@ -409,6 +409,21 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, return memcpy(dmc_payload, &fw->data[readcount], nbytes); } +static void intel_csr_runtime_pm_get(struct drm_i915_private *dev_priv) +{ + WARN_ON(dev_priv->csr.wakeref); + dev_priv->csr.wakeref = + intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); +} + +static void intel_csr_runtime_pm_put(struct drm_i915_private *dev_priv) +{ + intel_wakeref_t wakeref __maybe_unused = + fetch_and_zero(&dev_priv->csr.wakeref); + + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref); +} + static void csr_load_work_fn(struct work_struct *work) { struct drm_i915_private *dev_priv; @@ -424,8 +439,7 @@ static void csr_load_work_fn(struct work_struct *work) if (dev_priv->csr.dmc_payload) { intel_csr_load_program(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_csr_runtime_pm_put(dev_priv); DRM_INFO("Finished loading DMC firmware %s (v%u.%u)\n", dev_priv->csr.fw_path, @@ -467,7 +481,7 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) * suspend as runtime suspend *requires* a working CSR for whatever * reason. */ - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + intel_csr_runtime_pm_get(dev_priv); if (INTEL_GEN(dev_priv) >= 12) { /* Allow to load fw via parameter using the last known size */ @@ -538,7 +552,7 @@ void intel_csr_ucode_suspend(struct drm_i915_private *dev_priv) /* Drop the reference held in case DMC isn't loaded. */ if (!dev_priv->csr.dmc_payload) - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_csr_runtime_pm_put(dev_priv); } /** @@ -558,7 +572,7 @@ void intel_csr_ucode_resume(struct drm_i915_private *dev_priv) * loaded. */ if (!dev_priv->csr.dmc_payload) - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + intel_csr_runtime_pm_get(dev_priv); } /** @@ -574,6 +588,7 @@ void intel_csr_ucode_fini(struct drm_i915_private *dev_priv) return; intel_csr_ucode_suspend(dev_priv); + WARN_ON(dev_priv->csr.wakeref); kfree(dev_priv->csr.dmc_payload); } diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index b1ac89b514c1..ca705546a0ab 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -974,7 +974,7 @@ static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, DRM_ERROR("Timeout waiting for DDI BUF %c idle bit\n", port_name(port)); } -static uint32_t hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) +static u32 hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) { switch (pll->info->id) { case DPLL_ID_WRPLL1: @@ -995,8 +995,8 @@ static uint32_t hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) } } -static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) +static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { const struct intel_shared_dpll *pll = crtc_state->shared_dpll; int clock = crtc_state->port_clock; @@ -1004,10 +1004,11 @@ static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, switch (id) { default: + /* + * DPLL_ID_ICL_DPLL0 and DPLL_ID_ICL_DPLL1 should not be used + * here, so do warn if this get passed in + */ MISSING_CASE(id); - /* fall through */ - case DPLL_ID_ICL_DPLL0: - case DPLL_ID_ICL_DPLL1: return DDI_CLK_SEL_NONE; case DPLL_ID_ICL_TBTPLL: switch (clock) { @@ -1021,7 +1022,7 @@ static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, return DDI_CLK_SEL_TBT_810; default: MISSING_CASE(clock); - break; + return DDI_CLK_SEL_NONE; } case DPLL_ID_ICL_MGPLL1: case DPLL_ID_ICL_MGPLL2: @@ -1243,8 +1244,8 @@ static int skl_calc_wrpll_link(struct drm_i915_private *dev_priv, enum intel_dpll_id pll_id) { i915_reg_t cfgcr1_reg, cfgcr2_reg; - uint32_t cfgcr1_val, cfgcr2_val; - uint32_t p0, p1, p2, dco_freq; + u32 cfgcr1_val, cfgcr2_val; + u32 p0, p1, p2, dco_freq; cfgcr1_reg = DPLL_CFGCR1(pll_id); cfgcr2_reg = DPLL_CFGCR2(pll_id); @@ -1305,8 +1306,8 @@ static int skl_calc_wrpll_link(struct drm_i915_private *dev_priv, int cnl_calc_wrpll_link(struct drm_i915_private *dev_priv, enum intel_dpll_id pll_id) { - uint32_t cfgcr0, cfgcr1; - uint32_t p0, p1, p2, dco_freq, ref_clock; + u32 cfgcr0, cfgcr1; + u32 p0, p1, p2, dco_freq, ref_clock; if (INTEL_GEN(dev_priv) >= 11) { cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(pll_id)); @@ -1391,16 +1392,17 @@ static int icl_calc_tbt_pll_link(struct drm_i915_private *dev_priv, static int icl_calc_mg_pll_link(struct drm_i915_private *dev_priv, enum port port) { + enum tc_port tc_port = intel_port_to_tc(dev_priv, port); u32 mg_pll_div0, mg_clktop_hsclkctl; u32 m1, m2_int, m2_frac, div1, div2, refclk; u64 tmp; refclk = dev_priv->cdclk.hw.ref; - mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); - mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port)); + mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port)); - m1 = I915_READ(MG_PLL_DIV1(port)) & MG_PLL_DIV1_FBPREDIV_MASK; + m1 = I915_READ(MG_PLL_DIV1(tc_port)) & MG_PLL_DIV1_FBPREDIV_MASK; m2_int = mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK; m2_frac = (mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) ? (mg_pll_div0 & MG_PLL_DIV0_FBDIV_FRAC_MASK) >> @@ -1471,7 +1473,7 @@ static void icl_ddi_clock_get(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; int link_clock = 0; - uint32_t pll_id; + u32 pll_id; pll_id = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); if (intel_port_is_combophy(dev_priv, port)) { @@ -1496,7 +1498,7 @@ static void cnl_ddi_clock_get(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int link_clock = 0; - uint32_t cfgcr0; + u32 cfgcr0; enum intel_dpll_id pll_id; pll_id = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); @@ -1550,7 +1552,7 @@ static void skl_ddi_clock_get(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int link_clock = 0; - uint32_t dpll_ctl1; + u32 dpll_ctl1; enum intel_dpll_id pll_id; pll_id = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); @@ -1739,7 +1741,7 @@ void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - uint32_t temp; + u32 temp; temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (state == true) @@ -1757,7 +1759,7 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) enum pipe pipe = crtc->pipe; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; enum port port = encoder->port; - uint32_t temp; + u32 temp; /* Enable TRANS_DDI_FUNC_CTL for the pipe to work in HDMI mode */ temp = TRANS_DDI_FUNC_ENABLE; @@ -1841,7 +1843,7 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; i915_reg_t reg = TRANS_DDI_FUNC_CTL(cpu_transcoder); - uint32_t val = I915_READ(reg); + u32 val = I915_READ(reg); val &= ~(TRANS_DDI_FUNC_ENABLE | TRANS_DDI_PORT_MASK | TRANS_DDI_DP_VC_PAYLOAD_ALLOC); val |= TRANS_DDI_PORT_NONE; @@ -1860,12 +1862,14 @@ int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, { struct drm_device *dev = intel_encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); + intel_wakeref_t wakeref; enum pipe pipe = 0; int ret = 0; - uint32_t tmp; + u32 tmp; - if (WARN_ON(!intel_display_power_get_if_enabled(dev_priv, - intel_encoder->power_domain))) + wakeref = intel_display_power_get_if_enabled(dev_priv, + intel_encoder->power_domain); + if (WARN_ON(!wakeref)) return -ENXIO; if (WARN_ON(!intel_encoder->get_hw_state(intel_encoder, &pipe))) { @@ -1880,7 +1884,7 @@ int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, tmp &= ~TRANS_DDI_HDCP_SIGNALLING; I915_WRITE(TRANS_DDI_FUNC_CTL(pipe), tmp); out: - intel_display_power_put(dev_priv, intel_encoder->power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); return ret; } @@ -1891,13 +1895,15 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) struct intel_encoder *encoder = intel_connector->encoder; int type = intel_connector->base.connector_type; enum port port = encoder->port; - enum pipe pipe = 0; enum transcoder cpu_transcoder; - uint32_t tmp; + intel_wakeref_t wakeref; + enum pipe pipe = 0; + u32 tmp; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; if (!encoder->get_hw_state(encoder, &pipe)) { @@ -1939,7 +1945,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) } out: - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } @@ -1950,6 +1956,7 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum port port = encoder->port; + intel_wakeref_t wakeref; enum pipe p; u32 tmp; u8 mst_pipe_mask; @@ -1957,8 +1964,9 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, *pipe_mask = 0; *is_dp_mst = false; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return; tmp = I915_READ(DDI_BUF_CTL(port)); @@ -2029,7 +2037,7 @@ out: "(PHY_CTL %08x)\n", port_name(port), tmp); } - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); } bool intel_ddi_get_hw_state(struct intel_encoder *encoder, @@ -2126,7 +2134,7 @@ void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state) } static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, - enum port port, uint8_t iboost) + enum port port, u8 iboost) { u32 tmp; @@ -2145,7 +2153,7 @@ static void skl_ddi_set_iboost(struct intel_encoder *encoder, struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; - uint8_t iboost; + u8 iboost; if (type == INTEL_OUTPUT_HDMI) iboost = dev_priv->vbt.ddi_port_info[port].hdmi_boost_level; @@ -2659,7 +2667,7 @@ static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, icl_mg_phy_ddi_vswing_sequence(encoder, link_clock, level); } -static uint32_t translate_signal_level(int signal_levels) +static u32 translate_signal_level(int signal_levels) { int i; @@ -2674,9 +2682,9 @@ static uint32_t translate_signal_level(int signal_levels) return 0; } -static uint32_t intel_ddi_dp_level(struct intel_dp *intel_dp) +static u32 intel_ddi_dp_level(struct intel_dp *intel_dp) { - uint8_t train_set = intel_dp->train_set[0]; + u8 train_set = intel_dp->train_set[0]; int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | DP_TRAIN_PRE_EMPHASIS_MASK); @@ -2701,7 +2709,7 @@ u32 bxt_signal_levels(struct intel_dp *intel_dp) return 0; } -uint32_t ddi_signal_levels(struct intel_dp *intel_dp) +u32 ddi_signal_levels(struct intel_dp *intel_dp) { struct intel_digital_port *dport = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); @@ -2715,8 +2723,8 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) } static inline -uint32_t icl_dpclka_cfgcr0_clk_off(struct drm_i915_private *dev_priv, - enum port port) +u32 icl_dpclka_cfgcr0_clk_off(struct drm_i915_private *dev_priv, + enum port port) { if (intel_port_is_combophy(dev_priv, port)) { return ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(port); @@ -2851,7 +2859,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; - uint32_t val; + u32 val; const struct intel_shared_dpll *pll = crtc_state->shared_dpll; if (WARN_ON(!pll)) @@ -2862,7 +2870,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, if (IS_ICELAKE(dev_priv)) { if (!intel_port_is_combophy(dev_priv, port)) I915_WRITE(DDI_CLK_SEL(port), - icl_pll_to_ddi_pll_sel(encoder, crtc_state)); + icl_pll_to_ddi_clk_sel(encoder, crtc_state)); } else if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); @@ -3286,7 +3294,8 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, intel_edp_panel_vdd_on(intel_dp); intel_edp_panel_off(intel_dp); - intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + intel_display_power_put_unchecked(dev_priv, + dig_port->ddi_io_power_domain); intel_ddi_clk_disable(encoder); } @@ -3306,7 +3315,8 @@ static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, intel_disable_ddi_buf(encoder, old_crtc_state); - intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + intel_display_power_put_unchecked(dev_priv, + dig_port->ddi_io_power_domain); intel_ddi_clk_disable(encoder); @@ -3348,7 +3358,7 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - uint32_t val; + u32 val; /* * Bspec lists this as both step 13 (before DDI_BUF_CTL disable) @@ -3548,6 +3558,8 @@ static void intel_ddi_update_pipe_dp(struct intel_encoder *encoder, intel_psr_enable(intel_dp, crtc_state); intel_edp_drrs_enable(intel_dp, crtc_state); + + intel_panel_update_backlight(encoder, crtc_state, conn_state); } static void intel_ddi_update_pipe(struct intel_encoder *encoder, @@ -3626,8 +3638,8 @@ intel_ddi_post_pll_disable(struct intel_encoder *encoder, if (intel_crtc_has_dp_encoder(crtc_state) || intel_port_is_tc(dev_priv, encoder->port)) - intel_display_power_put(dev_priv, - intel_ddi_main_link_aux_domain(dig_port)); + intel_display_power_put_unchecked(dev_priv, + intel_ddi_main_link_aux_domain(dig_port)); } void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) @@ -3636,7 +3648,7 @@ void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); enum port port = intel_dig_port->base.port; - uint32_t val; + u32 val; bool wait = false; if (I915_READ(DP_TP_CTL(port)) & DP_TP_CTL_ENABLE) { diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 957c6527f76b..e8b8661df746 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -115,7 +115,7 @@ enum intel_ppgtt { func(has_ddi); \ func(has_dp_mst); \ func(has_fbc); \ - func(has_gmch_display); \ + func(has_gmch); \ func(has_hotplug); \ func(has_ipc); \ func(has_overlay); \ @@ -189,6 +189,8 @@ struct intel_device_info { struct color_luts { u16 degamma_lut_size; u16 gamma_lut_size; + u32 degamma_lut_tests; + u32 gamma_lut_tests; } color; }; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e0f40be07131..ccb616351bba 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -51,8 +51,17 @@ #include "intel_dsi.h" #include "intel_frontbuffer.h" +#include "intel_drv.h" +#include "intel_dsi.h" +#include "intel_frontbuffer.h" + +#include "i915_drv.h" +#include "i915_gem_clflush.h" +#include "i915_reset.h" +#include "i915_trace.h" + /* Primary plane formats for gen <= 3 */ -static const uint32_t i8xx_primary_formats[] = { +static const u32 i8xx_primary_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB1555, @@ -60,7 +69,7 @@ static const uint32_t i8xx_primary_formats[] = { }; /* Primary plane formats for gen >= 4 */ -static const uint32_t i965_primary_formats[] = { +static const u32 i965_primary_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -69,18 +78,18 @@ static const uint32_t i965_primary_formats[] = { DRM_FORMAT_XBGR2101010, }; -static const uint64_t i9xx_format_modifiers[] = { +static const u64 i9xx_format_modifiers[] = { I915_FORMAT_MOD_X_TILED, DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; /* Cursor formats */ -static const uint32_t intel_cursor_formats[] = { +static const u32 intel_cursor_formats[] = { DRM_FORMAT_ARGB8888, }; -static const uint64_t cursor_format_modifiers[] = { +static const u64 cursor_format_modifiers[] = { DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; @@ -496,7 +505,7 @@ static int pnv_calc_dpll_params(int refclk, struct dpll *clock) return clock->dot; } -static uint32_t i9xx_dpll_compute_m(struct dpll *dpll) +static u32 i9xx_dpll_compute_m(struct dpll *dpll) { return 5 * (dpll->m1 + 2) + (dpll->m2 + 2); } @@ -531,8 +540,8 @@ int chv_calc_dpll_params(int refclk, struct dpll *clock) clock->p = clock->p1 * clock->p2; if (WARN_ON(clock->n == 0 || clock->p == 0)) return 0; - clock->vco = DIV_ROUND_CLOSEST_ULL((uint64_t)refclk * clock->m, - clock->n << 22); + clock->vco = DIV_ROUND_CLOSEST_ULL((u64)refclk * clock->m, + clock->n << 22); clock->dot = DIV_ROUND_CLOSEST(clock->vco, clock->p); return clock->dot / 5; @@ -894,7 +903,7 @@ chv_find_best_dpll(const struct intel_limit *limit, struct drm_device *dev = crtc->base.dev; unsigned int best_error_ppm; struct dpll clock; - uint64_t m2; + u64 m2; int found = false; memset(best_clock, 0, sizeof(*best_clock)); @@ -916,7 +925,7 @@ chv_find_best_dpll(const struct intel_limit *limit, clock.p = clock.p1 * clock.p2; - m2 = DIV_ROUND_CLOSEST_ULL(((uint64_t)target * clock.p * + m2 = DIV_ROUND_CLOSEST_ULL(((u64)target * clock.p * clock.n) << 22, refclk * clock.m1); if (m2 > INT_MAX/clock.m1) @@ -1200,17 +1209,19 @@ void assert_pipe(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, pipe); enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; /* we keep both pipes enabled on 830 */ if (IS_I830(dev_priv)) state = true; power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); - if (intel_display_power_get_if_enabled(dev_priv, power_domain)) { + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (wakeref) { u32 val = I915_READ(PIPECONF(cpu_transcoder)); cur_state = !!(val & PIPECONF_ENABLE); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); } else { cur_state = false; } @@ -1611,7 +1622,7 @@ static void ironlake_enable_pch_transcoder(const struct intel_crtc_state *crtc_s struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; i915_reg_t reg; - uint32_t val, pipeconf_val; + u32 val, pipeconf_val; /* Make sure PCH DPLL is enabled */ assert_shared_dpll_enabled(dev_priv, crtc_state->shared_dpll); @@ -1699,7 +1710,7 @@ static void ironlake_disable_pch_transcoder(struct drm_i915_private *dev_priv, enum pipe pipe) { i915_reg_t reg; - uint32_t val; + u32 val; /* FDI relies on the transcoder */ assert_fdi_tx_disabled(dev_priv, pipe); @@ -1756,6 +1767,35 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc) return crtc->pipe; } +static u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + /* + * On i965gm the hardware frame counter reads + * zero when the TV encoder is enabled :( + */ + if (IS_I965GM(dev_priv) && + (crtc_state->output_types & BIT(INTEL_OUTPUT_TVOUT))) + return 0; + + if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) + return 0xffffffff; /* full 32 bit counter */ + else if (INTEL_GEN(dev_priv) >= 3) + return 0xffffff; /* only 24 bits of frame count */ + else + return 0; /* Gen2 doesn't have a hardware frame counter */ +} + +static void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + + drm_crtc_set_max_vblank_count(&crtc->base, + intel_crtc_max_vblank_count(crtc_state)); + drm_crtc_vblank_on(&crtc->base); +} + static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); @@ -1774,7 +1814,7 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) * a plane. On ILK+ the pipe PLLs are integrated, so we don't * need the check. */ - if (HAS_GMCH_DISPLAY(dev_priv)) { + if (HAS_GMCH(dev_priv)) { if (intel_crtc_has_type(new_crtc_state, INTEL_OUTPUT_DSI)) assert_dsi_pll_enabled(dev_priv); else @@ -1808,7 +1848,7 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) * when it's derived from the timestamps. So let's wait for the * pipe to start properly before we call drm_crtc_vblank_on() */ - if (dev_priv->drm.max_vblank_count == 0) + if (intel_crtc_max_vblank_count(new_crtc_state) == 0) intel_wait_for_pipe_scanline_moving(crtc); } @@ -2026,6 +2066,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, struct drm_device *dev = fb->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj = intel_fb_obj(fb); + intel_wakeref_t wakeref; struct i915_vma *vma; unsigned int pinctl; u32 alignment; @@ -2049,7 +2090,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, * intel_runtime_pm_put(), so it is correct to wrap only the * pin/unpin/fence and not more. */ - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); atomic_inc(&dev_priv->gpu_error.pending_fb_pin); @@ -2062,7 +2103,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, * complicated than this. For example, Cherryview appears quite * happy to scanout from anywhere within its global aperture. */ - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) pinctl |= PIN_MAPPABLE; vma = i915_gem_object_pin_to_display_plane(obj, @@ -2104,7 +2145,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, err: atomic_dec(&dev_priv->gpu_error.pending_fb_pin); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return vma; } @@ -2375,7 +2416,7 @@ static int intel_fb_offset_to_xy(int *x, int *y, return 0; } -static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier) +static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) { switch (fb_modifier) { case I915_FORMAT_MOD_X_TILED: @@ -3163,7 +3204,7 @@ i9xx_plane_max_stride(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - if (!HAS_GMCH_DISPLAY(dev_priv)) { + if (!HAS_GMCH(dev_priv)) { return 32*1024; } else if (INTEL_GEN(dev_priv) >= 4) { if (modifier == I915_FORMAT_MOD_X_TILED) @@ -3183,28 +3224,38 @@ i9xx_plane_max_stride(struct intel_plane *plane, } } +static u32 i9xx_plane_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 dspcntr = 0; + + dspcntr |= DISPPLANE_GAMMA_ENABLE; + + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; + + if (INTEL_GEN(dev_priv) < 5) + dspcntr |= DISPPLANE_SEL_PIPE(crtc->pipe); + + return dspcntr; +} + static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; u32 dspcntr; - dspcntr = DISPLAY_PLANE_ENABLE | DISPPLANE_GAMMA_ENABLE; + dspcntr = DISPLAY_PLANE_ENABLE; if (IS_G4X(dev_priv) || IS_GEN(dev_priv, 5) || IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; - - if (INTEL_GEN(dev_priv) < 5) - dspcntr |= DISPPLANE_SEL_PIPE(crtc->pipe); - switch (fb->format->format) { case DRM_FORMAT_C8: dspcntr |= DISPPLANE_8BPP; @@ -3332,11 +3383,13 @@ static void i9xx_update_plane(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; u32 linear_offset; - u32 dspcntr = plane_state->ctl; int x = plane_state->color_plane[0].x; int y = plane_state->color_plane[0].y; unsigned long irqflags; u32 dspaddr_offset; + u32 dspcntr; + + dspcntr = plane_state->ctl | i9xx_plane_ctl_crtc(crtc_state); linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); @@ -3396,10 +3449,23 @@ static void i9xx_disable_plane(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; unsigned long irqflags; + u32 dspcntr; + + /* + * DSPCNTR pipe gamma enable on g4x+ and pipe csc + * enable on ilk+ affect the pipe bottom color as + * well, so we must configure them even if the plane + * is disabled. + * + * On pre-g4x there is no way to gamma correct the + * pipe bottom color but we'll keep on doing this + * anyway. + */ + dspcntr = i9xx_plane_ctl_crtc(crtc_state); spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - I915_WRITE_FW(DSPCNTR(i9xx_plane), 0); + I915_WRITE_FW(DSPCNTR(i9xx_plane), dspcntr); if (INTEL_GEN(dev_priv) >= 4) I915_WRITE_FW(DSPSURF(i9xx_plane), 0); else @@ -3414,6 +3480,7 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; + intel_wakeref_t wakeref; bool ret; u32 val; @@ -3423,7 +3490,8 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane, * display power wells. */ power_domain = POWER_DOMAIN_PIPE(plane->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; val = I915_READ(DSPCNTR(i9xx_plane)); @@ -3436,7 +3504,7 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane, *pipe = (val & DISPPLANE_SEL_PIPE_MASK) >> DISPPLANE_SEL_PIPE_SHIFT; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -3505,7 +3573,7 @@ u32 skl_plane_stride(const struct intel_plane_state *plane_state, return stride / skl_plane_stride_mult(fb, color_plane, rotation); } -static u32 skl_plane_ctl_format(uint32_t pixel_format) +static u32 skl_plane_ctl_format(u32 pixel_format) { switch (pixel_format) { case DRM_FORMAT_C8: @@ -3575,7 +3643,7 @@ static u32 glk_plane_color_ctl_alpha(const struct intel_plane_state *plane_state } } -static u32 skl_plane_ctl_tiling(uint64_t fb_modifier) +static u32 skl_plane_ctl_tiling(u64 fb_modifier) { switch (fb_modifier) { case DRM_FORMAT_MOD_LINEAR: @@ -3634,6 +3702,20 @@ static u32 cnl_plane_ctl_flip(unsigned int reflect) return 0; } +u32 skl_plane_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + u32 plane_ctl = 0; + + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + return plane_ctl; + + plane_ctl |= PLANE_CTL_PIPE_GAMMA_ENABLE; + plane_ctl |= PLANE_CTL_PIPE_CSC_ENABLE; + + return plane_ctl; +} + u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -3648,10 +3730,7 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, if (INTEL_GEN(dev_priv) < 10 && !IS_GEMINILAKE(dev_priv)) { plane_ctl |= skl_plane_ctl_alpha(plane_state); - plane_ctl |= - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE | - PLANE_CTL_PLANE_GAMMA_DISABLE; + plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) plane_ctl |= PLANE_CTL_YUV_TO_RGB_CSC_FORMAT_BT709; @@ -3676,19 +3755,27 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, return plane_ctl; } +u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + u32 plane_color_ctl = 0; + + if (INTEL_GEN(dev_priv) >= 11) + return plane_color_ctl; + + plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; + plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; + + return plane_color_ctl; +} + u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); const struct drm_framebuffer *fb = plane_state->base.fb; struct intel_plane *plane = to_intel_plane(plane_state->base.plane); u32 plane_color_ctl = 0; - if (INTEL_GEN(dev_priv) < 11) { - plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; - plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; - } plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE; plane_color_ctl |= glk_plane_color_ctl_alpha(plane_state); @@ -3737,7 +3824,7 @@ __intel_display_resume(struct drm_device *dev, } /* ignore any reset values/BIOS leftovers in the WM registers */ - if (!HAS_GMCH_DISPLAY(to_i915(dev))) + if (!HAS_GMCH(to_i915(dev))) to_intel_atomic_state(state)->skip_intermediate_wm = true; ret = drm_atomic_helper_commit_duplicated_state(state, ctx); @@ -3862,6 +3949,30 @@ unlock: clear_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags); } +static void icl_set_pipe_chicken(struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + u32 tmp; + + tmp = I915_READ(PIPE_CHICKEN(pipe)); + + /* + * Display WA #1153: icl + * enable hardware to bypass the alpha math + * and rounding for per-pixel values 00 and 0xff + */ + tmp |= PER_PIXEL_ALPHA_BYPASS_EN; + + /* + * W/A for underruns with linear/X-tiled with + * WM1+ disabled. + */ + tmp |= PM_FILL_MAINTAIN_DBUF_FULLNESS; + + I915_WRITE(PIPE_CHICKEN(pipe), tmp); +} + static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { @@ -3896,6 +4007,19 @@ static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_sta else if (old_crtc_state->pch_pfit.enabled) ironlake_pfit_disable(old_crtc_state); } + + /* + * We don't (yet) allow userspace to control the pipe background color, + * so force it to black, but apply pipe gamma and CSC so that its + * handling will match how we program our planes. + */ + if (INTEL_GEN(dev_priv) >= 9) + I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe), + SKL_BOTTOM_COLOR_GAMMA_ENABLE | + SKL_BOTTOM_COLOR_CSC_ENABLE); + + if (INTEL_GEN(dev_priv) >= 11) + icl_set_pipe_chicken(crtc); } static void intel_fdi_normal_train(struct intel_crtc *crtc) @@ -4595,7 +4719,7 @@ static void ironlake_pch_transcoder_set_timings(const struct intel_crtc_state *c static void cpt_set_fdi_bc_bifurcation(struct drm_i915_private *dev_priv, bool enable) { - uint32_t temp; + u32 temp; temp = I915_READ(SOUTH_CHICKEN1); if (!!(temp & FDI_BC_BIFURCATION_SELECT) == enable) @@ -5250,7 +5374,7 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) * event which is after the vblank start event, so we need to have a * wait-for-vblank between disabling the plane and the pipe. */ - if (HAS_GMCH_DISPLAY(dev_priv) && + if (HAS_GMCH(dev_priv) && intel_set_memory_cxsr(dev_priv, false)) intel_wait_for_vblank(dev_priv, pipe); } @@ -5258,18 +5382,36 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) static bool hsw_pre_update_disable_ips(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + if (!old_crtc_state->ips_enabled) return false; if (needs_modeset(&new_crtc_state->base)) return true; + /* + * Workaround : Do not read or write the pipe palette/gamma data while + * GAMMA_MODE is configured for split gamma and IPS_CTL has IPS enabled. + * + * Disable IPS before we program the LUT. + */ + if (IS_HASWELL(dev_priv) && + (new_crtc_state->base.color_mgmt_changed || + new_crtc_state->update_pipe) && + new_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) + return true; + return !new_crtc_state->ips_enabled; } static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + if (!new_crtc_state->ips_enabled) return false; @@ -5277,6 +5419,18 @@ static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_s return true; /* + * Workaround : Do not read or write the pipe palette/gamma data while + * GAMMA_MODE is configured for split gamma and IPS_CTL has IPS enabled. + * + * Re-enable IPS after the LUT has been programmed. + */ + if (IS_HASWELL(dev_priv) && + (new_crtc_state->base.color_mgmt_changed || + new_crtc_state->update_pipe) && + new_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) + return true; + + /* * We can't read out IPS on broadwell, assume the worst and * forcibly enable IPS on the first fastset. */ @@ -5387,7 +5541,7 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, * event which is after the vblank start event, so we need to have a * wait-for-vblank between disabling the plane and the pipe. */ - if (HAS_GMCH_DISPLAY(dev_priv) && old_crtc_state->base.active && + if (HAS_GMCH(dev_priv) && old_crtc_state->base.active && pipe_config->disable_cxsr && intel_set_memory_cxsr(dev_priv, false)) intel_wait_for_vblank(dev_priv, crtc->pipe); @@ -5664,6 +5818,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, * clocks enabled */ intel_color_load_luts(pipe_config); + intel_color_commit(pipe_config); if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, pipe_config); @@ -5673,7 +5828,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, ironlake_pch_enable(old_intel_state, pipe_config); assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); + intel_crtc_vblank_on(pipe_config); intel_encoders_enable(crtc, pipe_config, old_state); @@ -5718,7 +5873,7 @@ static void icl_pipe_mbus_enable(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - uint32_t val; + u32 val; val = MBUS_DBOX_A_CREDIT(2); val |= MBUS_DBOX_BW_CREDIT(1); @@ -5738,7 +5893,6 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); bool psl_clkgate_wa; - u32 pipe_chicken; if (WARN_ON(intel_crtc->active)) return; @@ -5774,8 +5928,6 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, haswell_set_pipemisc(pipe_config); - intel_color_set_csc(pipe_config); - intel_crtc->active = true; /* Display WA #1180: WaDisableScalarClockGating: glk, cnl */ @@ -5794,17 +5946,10 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, * clocks enabled */ intel_color_load_luts(pipe_config); + intel_color_commit(pipe_config); - /* - * Display WA #1153: enable hardware to bypass the alpha math - * and rounding for per-pixel values 00 and 0xff - */ - if (INTEL_GEN(dev_priv) >= 11) { - pipe_chicken = I915_READ(PIPE_CHICKEN(pipe)); - if (!(pipe_chicken & PER_PIXEL_ALPHA_BYPASS_EN)) - I915_WRITE_FW(PIPE_CHICKEN(pipe), - pipe_chicken | PER_PIXEL_ALPHA_BYPASS_EN); - } + if (INTEL_GEN(dev_priv) >= 11) + icl_set_pipe_chicken(intel_crtc); intel_ddi_set_pipe_settings(pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) @@ -5827,7 +5972,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_ddi_set_vc_payload_alloc(pipe_config, true); assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); + intel_crtc_vblank_on(pipe_config); intel_encoders_enable(crtc, pipe_config, old_state); @@ -6109,7 +6254,7 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain; for_each_power_domain(domain, domains) - intel_display_power_put(dev_priv, domain); + intel_display_power_put_unchecked(dev_priv, domain); } static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, @@ -6139,8 +6284,6 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, i9xx_set_pipeconf(pipe_config); - intel_color_set_csc(pipe_config); - intel_crtc->active = true; intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); @@ -6160,13 +6303,14 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, i9xx_pfit_enable(pipe_config); intel_color_load_luts(pipe_config); + intel_color_commit(pipe_config); dev_priv->display.initial_watermarks(old_intel_state, pipe_config); intel_enable_pipe(pipe_config); assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); + intel_crtc_vblank_on(pipe_config); intel_encoders_enable(crtc, pipe_config, old_state); } @@ -6216,6 +6360,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, i9xx_pfit_enable(pipe_config); intel_color_load_luts(pipe_config); + intel_color_commit(pipe_config); if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, @@ -6225,7 +6370,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(pipe_config); assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); + intel_crtc_vblank_on(pipe_config); intel_encoders_enable(crtc, pipe_config, old_state); } @@ -6356,7 +6501,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, domains = intel_crtc->enabled_power_domains; for_each_power_domain(domain, domains) - intel_display_power_put(dev_priv, domain); + intel_display_power_put_unchecked(dev_priv, domain); intel_crtc->enabled_power_domains = 0; dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe); @@ -6622,9 +6767,9 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } -static uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) +static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) { - uint32_t pixel_rate; + u32 pixel_rate; pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; @@ -6634,8 +6779,8 @@ static uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) */ if (pipe_config->pch_pfit.enabled) { - uint64_t pipe_w, pipe_h, pfit_w, pfit_h; - uint32_t pfit_size = pipe_config->pch_pfit.size; + u64 pipe_w, pipe_h, pfit_w, pfit_h; + u32 pfit_size = pipe_config->pch_pfit.size; pipe_w = pipe_config->pipe_src_w; pipe_h = pipe_config->pipe_src_h; @@ -6650,7 +6795,7 @@ static uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) if (WARN_ON(!pfit_w || !pfit_h)) return pixel_rate; - pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, + pixel_rate = div_u64((u64)pixel_rate * pipe_w * pipe_h, pfit_w * pfit_h); } @@ -6661,7 +6806,7 @@ static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) /* FIXME calculate proper pipe pixel rate for GMCH pfit */ crtc_state->pixel_rate = crtc_state->base.adjusted_mode.crtc_clock; @@ -6746,7 +6891,7 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, } static void -intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den) +intel_reduce_m_n_ratio(u32 *num, u32 *den) { while (*num > DATA_LINK_M_N_MASK || *den > DATA_LINK_M_N_MASK) { @@ -6756,7 +6901,7 @@ intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den) } static void compute_m_n(unsigned int m, unsigned int n, - uint32_t *ret_m, uint32_t *ret_n, + u32 *ret_m, u32 *ret_n, bool constant_n) { /* @@ -6771,7 +6916,7 @@ static void compute_m_n(unsigned int m, unsigned int n, else *ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX); - *ret_m = div_u64((uint64_t) m * *ret_n, n); + *ret_m = div_u64((u64)m * *ret_n, n); intel_reduce_m_n_ratio(ret_m, ret_n); } @@ -6801,12 +6946,12 @@ static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) && !(dev_priv->quirks & QUIRK_LVDS_SSC_DISABLE); } -static uint32_t pnv_dpll_compute_fp(struct dpll *dpll) +static u32 pnv_dpll_compute_fp(struct dpll *dpll) { return (1 << dpll->n) << 16 | dpll->m2; } -static uint32_t i9xx_dpll_compute_fp(struct dpll *dpll) +static u32 i9xx_dpll_compute_fp(struct dpll *dpll) { return dpll->n << 16 | dpll->m1 << 8 | dpll->m2; } @@ -7362,7 +7507,7 @@ static void intel_set_pipe_timings(const struct intel_crtc_state *crtc_state) enum pipe pipe = crtc->pipe; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; - uint32_t crtc_vtotal, crtc_vblank_end; + u32 crtc_vtotal, crtc_vblank_end; int vsyncshift = 0; /* We need to be careful not to changed the adjusted mode, for otherwise @@ -7437,7 +7582,7 @@ static void intel_get_pipe_timings(struct intel_crtc *crtc, struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; - uint32_t tmp; + u32 tmp; tmp = I915_READ(HTOTAL(cpu_transcoder)); pipe_config->base.adjusted_mode.crtc_hdisplay = (tmp & 0xffff) + 1; @@ -7508,7 +7653,7 @@ static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - uint32_t pipeconf; + u32 pipeconf; pipeconf = 0; @@ -7753,7 +7898,7 @@ static void i9xx_get_pfit_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - uint32_t tmp; + u32 tmp; if (INTEL_GEN(dev_priv) <= 3 && (IS_I830(dev_priv) || !IS_MOBILE(dev_priv))) @@ -7968,11 +8113,13 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum intel_display_power_domain power_domain; - uint32_t tmp; + intel_wakeref_t wakeref; + u32 tmp; bool ret; power_domain = POWER_DOMAIN_PIPE(crtc->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; @@ -8073,7 +8220,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -8247,7 +8394,7 @@ static void ironlake_init_pch_refclk(struct drm_i915_private *dev_priv) static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv) { - uint32_t tmp; + u32 tmp; tmp = I915_READ(SOUTH_CHICKEN2); tmp |= FDI_MPHY_IOSFSB_RESET_CTL; @@ -8269,7 +8416,7 @@ static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv) /* WaMPhyProgramming:hsw */ static void lpt_program_fdi_mphy(struct drm_i915_private *dev_priv) { - uint32_t tmp; + u32 tmp; tmp = intel_sbi_read(dev_priv, 0x8008, SBI_MPHY); tmp &= ~(0xFF << 24); @@ -8350,7 +8497,7 @@ static void lpt_program_fdi_mphy(struct drm_i915_private *dev_priv) static void lpt_enable_clkout_dp(struct drm_i915_private *dev_priv, bool with_spread, bool with_fdi) { - uint32_t reg, tmp; + u32 reg, tmp; if (WARN(with_fdi && !with_spread, "FDI requires downspread\n")) with_spread = true; @@ -8389,7 +8536,7 @@ static void lpt_enable_clkout_dp(struct drm_i915_private *dev_priv, /* Sequence to disable CLKOUT_DP */ static void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv) { - uint32_t reg, tmp; + u32 reg, tmp; mutex_lock(&dev_priv->sb_lock); @@ -8414,7 +8561,7 @@ static void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv) #define BEND_IDX(steps) ((50 + (steps)) / 5) -static const uint16_t sscdivintphase[] = { +static const u16 sscdivintphase[] = { [BEND_IDX( 50)] = 0x3B23, [BEND_IDX( 45)] = 0x3B23, [BEND_IDX( 40)] = 0x3C23, @@ -8446,7 +8593,7 @@ static const uint16_t sscdivintphase[] = { */ static void lpt_bend_clkout_dp(struct drm_i915_private *dev_priv, int steps) { - uint32_t tmp; + u32 tmp; int idx = BEND_IDX(steps); if (WARN_ON(steps % 5 != 0)) @@ -8512,7 +8659,7 @@ static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - uint32_t val; + u32 val; val = 0; @@ -8859,7 +9006,7 @@ static void skylake_get_pfit_config(struct intel_crtc *crtc, struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc_scaler_state *scaler_state = &pipe_config->scaler_state; - uint32_t ps_ctrl = 0; + u32 ps_ctrl = 0; int id = -1; int i; @@ -8871,6 +9018,7 @@ static void skylake_get_pfit_config(struct intel_crtc *crtc, pipe_config->pch_pfit.enabled = true; pipe_config->pch_pfit.pos = I915_READ(SKL_PS_WIN_POS(crtc->pipe, i)); pipe_config->pch_pfit.size = I915_READ(SKL_PS_WIN_SZ(crtc->pipe, i)); + scaler_state->scalers[i].in_use = true; break; } } @@ -9015,7 +9163,7 @@ static void ironlake_get_pfit_config(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t tmp; + u32 tmp; tmp = I915_READ(PF_CTL(crtc->pipe)); @@ -9040,11 +9188,13 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum intel_display_power_domain power_domain; - uint32_t tmp; + intel_wakeref_t wakeref; + u32 tmp; bool ret; power_domain = POWER_DOMAIN_PIPE(crtc->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; @@ -9127,7 +9277,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -9167,7 +9317,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv) I915_STATE_WARN(intel_irqs_enabled(dev_priv), "IRQs enabled\n"); } -static uint32_t hsw_read_dcomp(struct drm_i915_private *dev_priv) +static u32 hsw_read_dcomp(struct drm_i915_private *dev_priv) { if (IS_HASWELL(dev_priv)) return I915_READ(D_COMP_HSW); @@ -9175,7 +9325,7 @@ static uint32_t hsw_read_dcomp(struct drm_i915_private *dev_priv) return I915_READ(D_COMP_BDW); } -static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val) +static void hsw_write_dcomp(struct drm_i915_private *dev_priv, u32 val) { if (IS_HASWELL(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); @@ -9200,7 +9350,7 @@ static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val) static void hsw_disable_lcpll(struct drm_i915_private *dev_priv, bool switch_to_fclk, bool allow_power_down) { - uint32_t val; + u32 val; assert_can_disable_lcpll(dev_priv); @@ -9247,7 +9397,7 @@ static void hsw_disable_lcpll(struct drm_i915_private *dev_priv, */ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) { - uint32_t val; + u32 val; val = I915_READ(LCPLL_CTL); @@ -9322,7 +9472,7 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) */ void hsw_enable_pc8(struct drm_i915_private *dev_priv) { - uint32_t val; + u32 val; DRM_DEBUG_KMS("Enabling package C8+\n"); @@ -9338,7 +9488,7 @@ void hsw_enable_pc8(struct drm_i915_private *dev_priv) void hsw_disable_pc8(struct drm_i915_private *dev_priv) { - uint32_t val; + u32 val; DRM_DEBUG_KMS("Disabling package C8+\n"); @@ -9406,7 +9556,7 @@ static void icelake_get_ddi_pll(struct drm_i915_private *dev_priv, if (WARN_ON(!intel_dpll_is_combophy(id))) return; } else if (intel_port_is_tc(dev_priv, port)) { - id = icl_port_to_mg_pll_id(port); + id = icl_tc_port_to_pll_id(intel_port_to_tc(dev_priv, port)); } else { WARN(1, "Invalid port %x\n", port); return; @@ -9460,7 +9610,7 @@ static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, struct intel_crtc_state *pipe_config) { enum intel_dpll_id id; - uint32_t ddi_pll_sel = I915_READ(PORT_CLK_SEL(port)); + u32 ddi_pll_sel = I915_READ(PORT_CLK_SEL(port)); switch (ddi_pll_sel) { case PORT_CLK_SEL_WRPLL1: @@ -9517,7 +9667,9 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, * XXX: Do intel_display_power_get_if_enabled before reading this (for * consistency and less surprising code; it's in always on power). */ - for_each_set_bit(panel_transcoder, &panel_transcoder_mask, 32) { + for_each_set_bit(panel_transcoder, + &panel_transcoder_mask, + ARRAY_SIZE(INTEL_INFO(dev_priv)->trans_offsets)) { enum pipe trans_pipe; tmp = I915_READ(TRANS_DDI_FUNC_CTL(panel_transcoder)); @@ -9563,6 +9715,8 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) return false; + + WARN_ON(*power_domain_mask & BIT_ULL(power_domain)); *power_domain_mask |= BIT_ULL(power_domain); tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder)); @@ -9590,6 +9744,8 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) continue; + + WARN_ON(*power_domain_mask & BIT_ULL(power_domain)); *power_domain_mask |= BIT_ULL(power_domain); /* @@ -9624,7 +9780,7 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll; enum port port; - uint32_t tmp; + u32 tmp; tmp = I915_READ(TRANS_DDI_FUNC_CTL(pipe_config->cpu_transcoder)); @@ -9706,7 +9862,9 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe); if (intel_display_power_get_if_enabled(dev_priv, power_domain)) { + WARN_ON(power_domain_mask & BIT_ULL(power_domain)); power_domain_mask |= BIT_ULL(power_domain); + if (INTEL_GEN(dev_priv) >= 9) skylake_get_pfit_config(crtc, pipe_config); else @@ -9736,7 +9894,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, out: for_each_power_domain(power_domain, power_domain_mask) - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put_unchecked(dev_priv, power_domain); return active; } @@ -9757,7 +9915,7 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) base += plane_state->color_plane[0].offset; /* ILK+ do this automagically */ - if (HAS_GMCH_DISPLAY(dev_priv) && + if (HAS_GMCH(dev_priv) && plane_state->base.rotation & DRM_MODE_ROTATE_180) base += (plane_state->base.crtc_h * plane_state->base.crtc_w - 1) * fb->format->cpp[0]; @@ -9870,11 +10028,15 @@ i845_cursor_max_stride(struct intel_plane *plane, return 2048; } +static u32 i845_cursor_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + return CURSOR_GAMMA_ENABLE; +} + static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { return CURSOR_ENABLE | - CURSOR_GAMMA_ENABLE | CURSOR_FORMAT_ARGB | CURSOR_STRIDE(plane_state->color_plane[0].stride); } @@ -9944,7 +10106,9 @@ static void i845_update_cursor(struct intel_plane *plane, unsigned int width = plane_state->base.crtc_w; unsigned int height = plane_state->base.crtc_h; - cntl = plane_state->ctl; + cntl = plane_state->ctl | + i845_cursor_ctl_crtc(crtc_state); + size = (height << 12) | width; base = intel_cursor_base(plane_state); @@ -9986,17 +10150,19 @@ static bool i845_cursor_get_hw_state(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; bool ret; power_domain = POWER_DOMAIN_PIPE(PIPE_A); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; ret = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; *pipe = PIPE_A; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -10009,27 +10175,36 @@ i9xx_cursor_max_stride(struct intel_plane *plane, return plane->base.dev->mode_config.cursor_width * 4; } -static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 i9xx_cursor_ctl_crtc(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 cntl = 0; - if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv)) - cntl |= MCURSOR_TRICKLE_FEED_DISABLE; + if (INTEL_GEN(dev_priv) >= 11) + return cntl; - if (INTEL_GEN(dev_priv) <= 10) { - cntl |= MCURSOR_GAMMA_ENABLE; + cntl |= MCURSOR_GAMMA_ENABLE; - if (HAS_DDI(dev_priv)) - cntl |= MCURSOR_PIPE_CSC_ENABLE; - } + if (HAS_DDI(dev_priv)) + cntl |= MCURSOR_PIPE_CSC_ENABLE; if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) cntl |= MCURSOR_PIPE_SELECT(crtc->pipe); + return cntl; +} + +static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + u32 cntl = 0; + + if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv)) + cntl |= MCURSOR_TRICKLE_FEED_DISABLE; + switch (plane_state->base.crtc_w) { case 64: cntl |= MCURSOR_MODE_64_ARGB_AX; @@ -10154,7 +10329,8 @@ static void i9xx_update_cursor(struct intel_plane *plane, unsigned long irqflags; if (plane_state && plane_state->base.visible) { - cntl = plane_state->ctl; + cntl = plane_state->ctl | + i9xx_cursor_ctl_crtc(crtc_state); if (plane_state->base.crtc_h != plane_state->base.crtc_w) fbc_ctl = CUR_FBC_CTL_EN | (plane_state->base.crtc_h - 1); @@ -10219,6 +10395,7 @@ static bool i9xx_cursor_get_hw_state(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; bool ret; u32 val; @@ -10228,7 +10405,8 @@ static bool i9xx_cursor_get_hw_state(struct intel_plane *plane, * display power wells. */ power_domain = POWER_DOMAIN_PIPE(plane->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; val = I915_READ(CURCNTR(plane->pipe)); @@ -10241,7 +10419,7 @@ static bool i9xx_cursor_get_hw_state(struct intel_plane *plane, *pipe = (val & MCURSOR_PIPE_SELECT_MASK) >> MCURSOR_PIPE_SELECT_SHIFT; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -10835,8 +11013,11 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat * Despite the w/a only being listed for IVB we assume that * the ILK/SNB note has similar ramifications, hence we apply * the w/a on all three platforms. + * + * With experimental results seems this is needed also for primary + * plane, not only sprite plane. */ - if (plane->id == PLANE_SPRITE0 && + if (plane->id != PLANE_CURSOR && (IS_GEN_RANGE(dev_priv, 5, 6) || IS_IVYBRIDGE(dev_priv)) && (turn_on || (!needs_scaling(old_plane_state) && @@ -10981,7 +11162,8 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, int ret; bool mode_changed = needs_modeset(crtc_state); - if (mode_changed && !crtc_state->active) + if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv) && + mode_changed && !crtc_state->active) pipe_config->update_wm_post = true; if (mode_changed && crtc_state->enable && @@ -10993,7 +11175,7 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, return ret; } - if (crtc_state->color_mgmt_changed) { + if (mode_changed || crtc_state->color_mgmt_changed) { ret = intel_color_check(pipe_config); if (ret) return ret; @@ -11292,7 +11474,7 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, pipe_config->scaler_state.scaler_users, pipe_config->scaler_state.scaler_id); - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) DRM_DEBUG_KMS("gmch pfit: control: 0x%08x, ratios: 0x%08x, lvds border: 0x%08x\n", pipe_config->gmch_pfit.control, pipe_config->gmch_pfit.pgm_ratios, @@ -11404,44 +11586,38 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) return ret; } -static void +static int clear_intel_crtc_state(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - struct intel_crtc_scaler_state scaler_state; - struct intel_dpll_hw_state dpll_hw_state; - struct intel_shared_dpll *shared_dpll; - struct intel_crtc_wm_state wm_state; - bool force_thru, ips_force_disable; + struct intel_crtc_state *saved_state; + + saved_state = kzalloc(sizeof(*saved_state), GFP_KERNEL); + if (!saved_state) + return -ENOMEM; /* FIXME: before the switch to atomic started, a new pipe_config was * kzalloc'd. Code that depends on any field being zero should be * fixed, so that the crtc_state can be safely duplicated. For now, * only fields that are know to not cause problems are preserved. */ - scaler_state = crtc_state->scaler_state; - shared_dpll = crtc_state->shared_dpll; - dpll_hw_state = crtc_state->dpll_hw_state; - force_thru = crtc_state->pch_pfit.force_thru; - ips_force_disable = crtc_state->ips_force_disable; + saved_state->scaler_state = crtc_state->scaler_state; + saved_state->shared_dpll = crtc_state->shared_dpll; + saved_state->dpll_hw_state = crtc_state->dpll_hw_state; + saved_state->pch_pfit.force_thru = crtc_state->pch_pfit.force_thru; + saved_state->ips_force_disable = crtc_state->ips_force_disable; if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - wm_state = crtc_state->wm; + saved_state->wm = crtc_state->wm; /* Keep base drm_crtc_state intact, only clear our extended struct */ BUILD_BUG_ON(offsetof(struct intel_crtc_state, base)); - memset(&crtc_state->base + 1, 0, + memcpy(&crtc_state->base + 1, &saved_state->base + 1, sizeof(*crtc_state) - sizeof(crtc_state->base)); - crtc_state->scaler_state = scaler_state; - crtc_state->shared_dpll = shared_dpll; - crtc_state->dpll_hw_state = dpll_hw_state; - crtc_state->pch_pfit.force_thru = force_thru; - crtc_state->ips_force_disable = ips_force_disable; - if (IS_G4X(dev_priv) || - IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - crtc_state->wm = wm_state; + kfree(saved_state); + return 0; } static int @@ -11456,7 +11632,9 @@ intel_modeset_pipe_config(struct drm_crtc *crtc, int i; bool retry = true; - clear_intel_crtc_state(pipe_config); + ret = clear_intel_crtc_state(pipe_config); + if (ret) + return ret; pipe_config->cpu_transcoder = (enum transcoder) to_intel_crtc(crtc)->pipe; @@ -11665,6 +11843,23 @@ pipe_config_err(bool adjust, const char *name, const char *format, ...) va_end(args); } +static bool fastboot_enabled(struct drm_i915_private *dev_priv) +{ + if (i915_modparams.fastboot != -1) + return i915_modparams.fastboot; + + /* Enable fastboot by default on Skylake and newer */ + if (INTEL_GEN(dev_priv) >= 9) + return true; + + /* Enable fastboot by default on VLV and CHV */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + return true; + + /* Disabled by default on all others */ + return false; +} + static bool intel_pipe_config_compare(struct drm_i915_private *dev_priv, struct intel_crtc_state *current_config, @@ -11676,6 +11871,11 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); + if (fixup_inherited && !fastboot_enabled(dev_priv)) { + DRM_DEBUG_KMS("initial modeset and fastboot not set\n"); + ret = false; + } + #define PIPE_CONF_CHECK_X(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ @@ -12699,8 +12899,7 @@ static int intel_atomic_check(struct drm_device *dev, return ret; } - if (i915_modparams.fastboot && - intel_pipe_config_compare(dev_priv, + if (intel_pipe_config_compare(dev_priv, to_intel_crtc_state(old_crtc_state), pipe_config, true)) { crtc_state->mode_changed = false; @@ -12749,8 +12948,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; + struct drm_vblank_crtc *vblank = &dev->vblank[drm_crtc_index(&crtc->base)]; - if (!dev->max_vblank_count) + if (!vblank->max_vblank_count) return (u32)drm_crtc_accurate_vblank_count(&crtc->base); return dev->driver->get_vblank_counter(dev, crtc->pipe); @@ -12959,6 +13159,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) struct drm_crtc *crtc; struct intel_crtc *intel_crtc; u64 put_domains[I915_MAX_PIPES] = {}; + intel_wakeref_t wakeref = 0; int i; intel_atomic_commit_fence_wait(intel_state); @@ -12966,7 +13167,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_wait_for_dependencies(state); if (intel_state->modeset) - intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET); for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { old_intel_crtc_state = to_intel_crtc_state(old_crtc_state); @@ -13009,7 +13210,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) /* FIXME unify this for all platforms */ if (!new_crtc_state->active && - !HAS_GMCH_DISPLAY(dev_priv) && + !HAS_GMCH(dev_priv) && dev_priv->display.initial_watermarks) dev_priv->display.initial_watermarks(intel_state, new_intel_crtc_state); @@ -13063,6 +13264,16 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) */ drm_atomic_helper_wait_for_flip_done(dev, state); + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + new_intel_crtc_state = to_intel_crtc_state(new_crtc_state); + + if (new_crtc_state->active && + !needs_modeset(new_crtc_state) && + (new_intel_crtc_state->base.color_mgmt_changed || + new_intel_crtc_state->update_pipe)) + intel_color_load_luts(new_intel_crtc_state); + } + /* * Now that the vblank has passed, we can go ahead and program the * optimal watermarks on platforms that need two-step watermark @@ -13103,7 +13314,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * the culprit. */ intel_uncore_arm_unclaimed_mmio_detection(dev_priv); - intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET); + intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET, wakeref); } /* @@ -13578,19 +13789,16 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc, intel_atomic_get_new_crtc_state(old_intel_state, intel_crtc); bool modeset = needs_modeset(&intel_cstate->base); - if (!modeset && - (intel_cstate->base.color_mgmt_changed || - intel_cstate->update_pipe)) { - intel_color_set_csc(intel_cstate); - intel_color_load_luts(intel_cstate); - } - /* Perform vblank evasion around commit operation */ intel_pipe_update_start(intel_cstate); if (modeset) goto out; + if (intel_cstate->base.color_mgmt_changed || + intel_cstate->update_pipe) + intel_color_commit(intel_cstate); + if (intel_cstate->update_pipe) intel_update_pipe_config(old_intel_cstate, intel_cstate); else if (INTEL_GEN(dev_priv) >= 9) @@ -13731,8 +13939,8 @@ intel_legacy_cursor_update(struct drm_plane *plane, struct drm_framebuffer *fb, int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, - uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h, + u32 src_x, u32 src_y, + u32 src_w, u32 src_h, struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(crtc->dev); @@ -14206,7 +14414,7 @@ static int intel_encoder_clones(struct intel_encoder *encoder) return index_mask; } -static bool has_edp_a(struct drm_i915_private *dev_priv) +static bool ilk_has_edp_a(struct drm_i915_private *dev_priv) { if (!IS_MOBILE(dev_priv)) return false; @@ -14220,7 +14428,7 @@ static bool has_edp_a(struct drm_i915_private *dev_priv) return true; } -static bool intel_crt_present(struct drm_i915_private *dev_priv) +static bool intel_ddi_crt_present(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) >= 9) return false; @@ -14228,15 +14436,12 @@ static bool intel_crt_present(struct drm_i915_private *dev_priv) if (IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)) return false; - if (IS_CHERRYVIEW(dev_priv)) - return false; - if (HAS_PCH_LPT_H(dev_priv) && I915_READ(SFUSE_STRAP) & SFUSE_STRAP_CRT_DISABLED) return false; /* DDI E can't be used if DDI A requires 4 lanes */ - if (HAS_DDI(dev_priv) && I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) + if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) return false; if (!dev_priv->vbt.int_crt_support) @@ -14291,23 +14496,21 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (!HAS_DISPLAY(dev_priv)) return; - /* - * intel_edp_init_connector() depends on this completing first, to - * prevent the registeration of both eDP and LVDS and the incorrect - * sharing of the PPS. - */ - intel_lvds_init(dev_priv); - - if (intel_crt_present(dev_priv)) - intel_crt_init(dev_priv); - if (IS_ICELAKE(dev_priv)) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); intel_ddi_init(dev_priv, PORT_D); intel_ddi_init(dev_priv, PORT_E); - intel_ddi_init(dev_priv, PORT_F); + /* + * On some ICL SKUs port F is not present. No strap bits for + * this, so rely on VBT. + * Work around broken VBTs on SKUs known to have no port F. + */ + if (IS_ICL_WITH_PORT_F(dev_priv) && + intel_bios_is_port_present(dev_priv, PORT_F)) + intel_ddi_init(dev_priv, PORT_F); + icl_dsi_init(dev_priv); } else if (IS_GEN9_LP(dev_priv)) { /* @@ -14323,6 +14526,9 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) } else if (HAS_DDI(dev_priv)) { int found; + if (intel_ddi_crt_present(dev_priv)) + intel_crt_init(dev_priv); + /* * Haswell uses DDI functions to detect digital outputs. * On SKL pre-D0 the strap isn't connected, so we assume @@ -14349,16 +14555,23 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) * On SKL we don't have a way to detect DDI-E so we rely on VBT. */ if (IS_GEN9_BC(dev_priv) && - (dev_priv->vbt.ddi_port_info[PORT_E].supports_dp || - dev_priv->vbt.ddi_port_info[PORT_E].supports_dvi || - dev_priv->vbt.ddi_port_info[PORT_E].supports_hdmi)) + intel_bios_is_port_present(dev_priv, PORT_E)) intel_ddi_init(dev_priv, PORT_E); } else if (HAS_PCH_SPLIT(dev_priv)) { int found; + + /* + * intel_edp_init_connector() depends on this completing first, + * to prevent the registration of both eDP and LVDS and the + * incorrect sharing of the PPS. + */ + intel_lvds_init(dev_priv); + intel_crt_init(dev_priv); + dpd_is_edp = intel_dp_is_port_edp(dev_priv, PORT_D); - if (has_edp_a(dev_priv)) + if (ilk_has_edp_a(dev_priv)) intel_dp_init(dev_priv, DP_A, PORT_A); if (I915_READ(PCH_HDMIB) & SDVO_DETECTED) { @@ -14384,6 +14597,9 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { bool has_edp, has_port; + if (IS_VALLEYVIEW(dev_priv) && dev_priv->vbt.int_crt_support) + intel_crt_init(dev_priv); + /* * The DP_DETECTED bit is the latched state of the DDC * SDA pin at boot. However since eDP doesn't require DDC @@ -14426,9 +14642,17 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) } vlv_dsi_init(dev_priv); - } else if (!IS_GEN(dev_priv, 2) && !IS_PINEVIEW(dev_priv)) { + } else if (IS_PINEVIEW(dev_priv)) { + intel_lvds_init(dev_priv); + intel_crt_init(dev_priv); + } else if (IS_GEN_RANGE(dev_priv, 3, 4)) { bool found = false; + if (IS_MOBILE(dev_priv)) + intel_lvds_init(dev_priv); + + intel_crt_init(dev_priv); + if (I915_READ(GEN3_SDVOB) & SDVO_DETECTED) { DRM_DEBUG_KMS("probing SDVOB\n"); found = intel_sdvo_init(dev_priv, GEN3_SDVOB, PORT_B); @@ -14460,11 +14684,16 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (IS_G4X(dev_priv) && (I915_READ(DP_D) & DP_DETECTED)) intel_dp_init(dev_priv, DP_D, PORT_D); - } else if (IS_GEN(dev_priv, 2)) - intel_dvo_init(dev_priv); - if (SUPPORTS_TV(dev_priv)) - intel_tv_init(dev_priv); + if (SUPPORTS_TV(dev_priv)) + intel_tv_init(dev_priv); + } else if (IS_GEN(dev_priv, 2)) { + if (IS_I85X(dev_priv)) + intel_lvds_init(dev_priv); + + intel_crt_init(dev_priv); + intel_dvo_init(dev_priv); + } intel_psr_init(dev_priv); @@ -14631,14 +14860,6 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); - if (fb->format->format == DRM_FORMAT_NV12 && - (fb->width < SKL_MIN_YUV_420_SRC_W || - fb->height < SKL_MIN_YUV_420_SRC_H || - (fb->width % 4) != 0 || (fb->height % 4) != 0)) { - DRM_DEBUG_KMS("src dimensions not correct for NV12\n"); - goto err; - } - for (i = 0; i < fb->format->num_planes; i++) { u32 stride_alignment; @@ -14974,7 +15195,7 @@ retry: * intermediate watermarks (since we don't trust the current * watermarks). */ - if (!HAS_GMCH_DISPLAY(dev_priv)) + if (!HAS_GMCH(dev_priv)) intel_state->skip_intermediate_wm = true; ret = intel_atomic_check(dev, state); @@ -15215,7 +15436,7 @@ int intel_modeset_init(struct drm_device *dev) * Note that we need to do this after reconstructing the BIOS fb's * since the watermark calculation done here will use pstate->fb. */ - if (!HAS_GMCH_DISPLAY(dev_priv)) + if (!HAS_GMCH(dev_priv)) sanitize_watermarks(dev); /* @@ -15408,6 +15629,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, plane->base.type != DRM_PLANE_TYPE_PRIMARY) intel_plane_disable_noatomic(crtc, plane); } + + /* + * Disable any background color set by the BIOS, but enable the + * gamma and CSC to match how we program our planes. + */ + if (INTEL_GEN(dev_priv) >= 9) + I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe), + SKL_BOTTOM_COLOR_GAMMA_ENABLE | + SKL_BOTTOM_COLOR_CSC_ENABLE); } /* Adjust the state of the output pipe according to whether we @@ -15415,7 +15645,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, if (crtc_state->base.active && !intel_crtc_has_encoders(crtc)) intel_crtc_disable_noatomic(&crtc->base, ctx); - if (crtc_state->base.active || HAS_GMCH_DISPLAY(dev_priv)) { + if (crtc_state->base.active || HAS_GMCH(dev_priv)) { /* * We start out with underrun reporting disabled to avoid races. * For correct bookkeeping mark this on active crtcs. @@ -15444,16 +15674,45 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, } } +static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + /* + * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram + * the hardware when a high res displays plugged in. DPLL P + * divider is zero, and the pipe timings are bonkers. We'll + * try to disable everything in that case. + * + * FIXME would be nice to be able to sanitize this state + * without several WARNs, but for now let's take the easy + * road. + */ + return IS_GEN(dev_priv, 6) && + crtc_state->base.active && + crtc_state->shared_dpll && + crtc_state->port_clock == 0; +} + static void intel_sanitize_encoder(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_connector *connector; + struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc_state *crtc_state = crtc ? + to_intel_crtc_state(crtc->base.state) : NULL; /* We need to check both for a crtc link (meaning that the * encoder is active and trying to read from a pipe) and the * pipe itself being active. */ - bool has_active_crtc = encoder->base.crtc && - to_intel_crtc(encoder->base.crtc)->active; + bool has_active_crtc = crtc_state && + crtc_state->base.active; + + if (crtc_state && has_bogus_dpll_config(crtc_state)) { + DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n", + pipe_name(crtc->pipe)); + has_active_crtc = false; + } connector = intel_encoder_find_connector(encoder); if (connector && !has_active_crtc) { @@ -15464,16 +15723,25 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) /* Connector is active, but has no active pipe. This is * fallout from our resume register restoring. Disable * the encoder manually again. */ - if (encoder->base.crtc) { - struct drm_crtc_state *crtc_state = encoder->base.crtc->state; + if (crtc_state) { + struct drm_encoder *best_encoder; DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n", encoder->base.base.id, encoder->base.name); + + /* avoid oopsing in case the hooks consult best_encoder */ + best_encoder = connector->base.state->best_encoder; + connector->base.state->best_encoder = &encoder->base; + if (encoder->disable) - encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); + encoder->disable(encoder, crtc_state, + connector->base.state); if (encoder->post_disable) - encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); + encoder->post_disable(encoder, crtc_state, + connector->base.state); + + connector->base.state->best_encoder = best_encoder; } encoder->base.crtc = NULL; @@ -15505,19 +15773,25 @@ void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv) void i915_redisable_vga(struct drm_i915_private *dev_priv) { - /* This function can be called both from intel_modeset_setup_hw_state or + intel_wakeref_t wakeref; + + /* + * This function can be called both from intel_modeset_setup_hw_state or * at a very early point in our resume sequence, where the power well * structures are not yet restored. Since this function is at a very * paranoid "someone might have enabled VGA while we were not looking" * level, just check if the power well is enabled instead of trying to * follow the "don't touch the power well if we don't need it" policy - * the rest of the driver uses. */ - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_VGA)) + * the rest of the driver uses. + */ + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_VGA); + if (!wakeref) return; i915_redisable_vga_power_on(dev_priv); - intel_display_power_put(dev_priv, POWER_DOMAIN_VGA); + intel_display_power_put(dev_priv, POWER_DOMAIN_VGA, wakeref); } /* FIXME read out full plane state for all planes */ @@ -15817,12 +16091,13 @@ intel_modeset_setup_hw_state(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; struct intel_encoder *encoder; + struct intel_crtc *crtc; + intel_wakeref_t wakeref; int i; - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); intel_early_display_was(dev_priv); intel_modeset_readout_hw_state(dev); @@ -15838,10 +16113,12 @@ intel_modeset_setup_hw_state(struct drm_device *dev, * waits, so we need vblank interrupts restored beforehand. */ for_each_intel_crtc(&dev_priv->drm, crtc) { + crtc_state = to_intel_crtc_state(crtc->base.state); + drm_crtc_vblank_reset(&crtc->base); - if (crtc->base.state->active) - drm_crtc_vblank_on(&crtc->base); + if (crtc_state->base.active) + intel_crtc_vblank_on(crtc_state); } intel_sanitize_plane_mapping(dev_priv); @@ -15892,7 +16169,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev, modeset_put_power_domains(dev_priv, put_domains); } - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref); intel_fbc_init_pipe_state(dev_priv); } @@ -16115,7 +16392,7 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv) error->pipe[i].source = I915_READ(PIPESRC(i)); - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) error->pipe[i].stat = I915_READ(PIPESTAT(i)); } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 6f0bb9a744fb..cf709835fb9a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -429,7 +429,7 @@ static void intel_dp_set_common_rates(struct intel_dp *intel_dp) } static bool intel_dp_link_params_valid(struct intel_dp *intel_dp, int link_rate, - uint8_t lane_count) + u8 lane_count) { /* * FIXME: we need to synchronize the current link parameters with @@ -449,7 +449,7 @@ static bool intel_dp_link_params_valid(struct intel_dp *intel_dp, int link_rate, static bool intel_dp_can_link_train_fallback_for_edp(struct intel_dp *intel_dp, int link_rate, - uint8_t lane_count) + u8 lane_count) { const struct drm_display_mode *fixed_mode = intel_dp->attached_connector->panel.fixed_mode; @@ -464,7 +464,7 @@ static bool intel_dp_can_link_train_fallback_for_edp(struct intel_dp *intel_dp, } int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, - int link_rate, uint8_t lane_count) + int link_rate, u8 lane_count) { int index; @@ -572,19 +572,19 @@ intel_dp_mode_valid(struct drm_connector *connector, return MODE_OK; } -uint32_t intel_dp_pack_aux(const uint8_t *src, int src_bytes) +u32 intel_dp_pack_aux(const u8 *src, int src_bytes) { - int i; - uint32_t v = 0; + int i; + u32 v = 0; if (src_bytes > 4) src_bytes = 4; for (i = 0; i < src_bytes; i++) - v |= ((uint32_t) src[i]) << ((3-i) * 8); + v |= ((u32)src[i]) << ((3 - i) * 8); return v; } -static void intel_dp_unpack_aux(uint32_t src, uint8_t *dst, int dst_bytes) +static void intel_dp_unpack_aux(u32 src, u8 *dst, int dst_bytes) { int i; if (dst_bytes > 4) @@ -601,30 +601,39 @@ intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp, static void intel_dp_pps_init(struct intel_dp *intel_dp); -static void pps_lock(struct intel_dp *intel_dp) +static intel_wakeref_t +pps_lock(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + intel_wakeref_t wakeref; /* * See intel_power_sequencer_reset() why we need * a power domain reference here. */ - intel_display_power_get(dev_priv, - intel_aux_power_domain(dp_to_dig_port(intel_dp))); + wakeref = intel_display_power_get(dev_priv, + intel_aux_power_domain(dp_to_dig_port(intel_dp))); mutex_lock(&dev_priv->pps_mutex); + + return wakeref; } -static void pps_unlock(struct intel_dp *intel_dp) +static intel_wakeref_t +pps_unlock(struct intel_dp *intel_dp, intel_wakeref_t wakeref) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); mutex_unlock(&dev_priv->pps_mutex); - intel_display_power_put(dev_priv, - intel_aux_power_domain(dp_to_dig_port(intel_dp))); + intel_aux_power_domain(dp_to_dig_port(intel_dp)), + wakeref); + return 0; } +#define with_pps_lock(dp, wf) \ + for ((wf) = pps_lock(dp); (wf); (wf) = pps_unlock((dp), (wf))) + static void vlv_power_sequencer_kick(struct intel_dp *intel_dp) { @@ -634,7 +643,7 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) bool pll_enabled, release_cl_override = false; enum dpio_phy phy = DPIO_PHY(pipe); enum dpio_channel ch = vlv_pipe_to_channel(pipe); - uint32_t DP; + u32 DP; if (WARN(I915_READ(intel_dp->output_reg) & DP_PORT_EN, "skipping pipe %c power sequencer kick due to port %c being active\n", @@ -973,30 +982,29 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code, struct intel_dp *intel_dp = container_of(this, typeof(* intel_dp), edp_notifier); struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + intel_wakeref_t wakeref; if (!intel_dp_is_edp(intel_dp) || code != SYS_RESTART) return 0; - pps_lock(intel_dp); - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - enum pipe pipe = vlv_power_sequencer_pipe(intel_dp); - i915_reg_t pp_ctrl_reg, pp_div_reg; - u32 pp_div; - - pp_ctrl_reg = PP_CONTROL(pipe); - pp_div_reg = PP_DIVISOR(pipe); - pp_div = I915_READ(pp_div_reg); - pp_div &= PP_REFERENCE_DIVIDER_MASK; - - /* 0x1F write to PP_DIV_REG sets max cycle delay */ - I915_WRITE(pp_div_reg, pp_div | 0x1F); - I915_WRITE(pp_ctrl_reg, PANEL_UNLOCK_REGS | PANEL_POWER_OFF); - msleep(intel_dp->panel_power_cycle_delay); + with_pps_lock(intel_dp, wakeref) { + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + enum pipe pipe = vlv_power_sequencer_pipe(intel_dp); + i915_reg_t pp_ctrl_reg, pp_div_reg; + u32 pp_div; + + pp_ctrl_reg = PP_CONTROL(pipe); + pp_div_reg = PP_DIVISOR(pipe); + pp_div = I915_READ(pp_div_reg); + pp_div &= PP_REFERENCE_DIVIDER_MASK; + + /* 0x1F write to PP_DIV_REG sets max cycle delay */ + I915_WRITE(pp_div_reg, pp_div | 0x1F); + I915_WRITE(pp_ctrl_reg, PANEL_UNLOCK_REGS); + msleep(intel_dp->panel_power_cycle_delay); + } } - pps_unlock(intel_dp); - return 0; } @@ -1042,17 +1050,21 @@ intel_dp_check_edp(struct intel_dp *intel_dp) } } -static uint32_t +static u32 intel_dp_aux_wait_done(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); - uint32_t status; + u32 status; bool done; #define C (((status = I915_READ_NOTRACE(ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0) done = wait_event_timeout(dev_priv->gmbus_wait_queue, C, msecs_to_jiffies_timeout(10)); + + /* just trace the final value */ + trace_i915_reg_rw(false, ch_ctl, status, sizeof(status), true); + if (!done) DRM_ERROR("dp aux hw did not signal timeout!\n"); #undef C @@ -1060,7 +1072,7 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp) return status; } -static uint32_t g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) +static u32 g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -1074,7 +1086,7 @@ static uint32_t g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); } -static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) +static u32 ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); @@ -1093,7 +1105,7 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); } -static uint32_t hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index) +static u32 hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); @@ -1110,7 +1122,7 @@ static uint32_t hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index) return ilk_get_aux_clock_divider(intel_dp, index); } -static uint32_t skl_get_aux_clock_divider(struct intel_dp *intel_dp, int index) +static u32 skl_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { /* * SKL doesn't need us to program the AUX clock divider (Hardware will @@ -1120,14 +1132,14 @@ static uint32_t skl_get_aux_clock_divider(struct intel_dp *intel_dp, int index) return index ? 0 : 1; } -static uint32_t g4x_get_aux_send_ctl(struct intel_dp *intel_dp, - int send_bytes, - uint32_t aux_clock_divider) +static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp, + int send_bytes, + u32 aux_clock_divider) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - uint32_t precharge, timeout; + u32 precharge, timeout; if (IS_GEN(dev_priv, 6)) precharge = 3; @@ -1150,12 +1162,12 @@ static uint32_t g4x_get_aux_send_ctl(struct intel_dp *intel_dp, (aux_clock_divider << DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT); } -static uint32_t skl_get_aux_send_ctl(struct intel_dp *intel_dp, - int send_bytes, - uint32_t unused) +static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, + int send_bytes, + u32 unused) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - uint32_t ret; + u32 ret; ret = DP_AUX_CH_CTL_SEND_BUSY | DP_AUX_CH_CTL_DONE | @@ -1175,25 +1187,26 @@ static uint32_t skl_get_aux_send_ctl(struct intel_dp *intel_dp, static int intel_dp_aux_xfer(struct intel_dp *intel_dp, - const uint8_t *send, int send_bytes, - uint8_t *recv, int recv_size, + const u8 *send, int send_bytes, + u8 *recv, int recv_size, u32 aux_send_ctl_flags) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); i915_reg_t ch_ctl, ch_data[5]; - uint32_t aux_clock_divider; + u32 aux_clock_divider; + intel_wakeref_t wakeref; int i, ret, recv_bytes; - uint32_t status; int try, clock = 0; + u32 status; bool vdd; ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); for (i = 0; i < ARRAY_SIZE(ch_data); i++) ch_data[i] = intel_dp->aux_ch_data_reg(intel_dp, i); - pps_lock(intel_dp); + wakeref = pps_lock(intel_dp); /* * We will be called with VDD already enabled for dpcd/edid/oui reads. @@ -1218,6 +1231,8 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, break; msleep(1); } + /* just trace the final value */ + trace_i915_reg_rw(false, ch_ctl, status, sizeof(status), true); if (try == 3) { static u32 last_status = -1; @@ -1337,7 +1352,7 @@ out: if (vdd) edp_panel_vdd_off(intel_dp, false); - pps_unlock(intel_dp); + pps_unlock(intel_dp, wakeref); return ret; } @@ -1359,7 +1374,7 @@ static ssize_t intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { struct intel_dp *intel_dp = container_of(aux, struct intel_dp, aux); - uint8_t txbuf[20], rxbuf[20]; + u8 txbuf[20], rxbuf[20]; size_t txsize, rxsize; int ret; @@ -1692,7 +1707,7 @@ int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) } void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, - uint8_t *link_bw, uint8_t *rate_select) + u8 *link_bw, u8 *rate_select) { /* eDP 1.4 rate select method. */ if (intel_dp->use_rate_select) { @@ -2132,7 +2147,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, return ret; } - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) intel_gmch_panel_fitting(intel_crtc, pipe_config, conn_state->scaling_mode); else @@ -2143,7 +2158,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; - if (HAS_GMCH_DISPLAY(dev_priv) && + if (HAS_GMCH(dev_priv) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) return -EINVAL; @@ -2207,7 +2222,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, } void intel_dp_set_link_params(struct intel_dp *intel_dp, - int link_rate, uint8_t lane_count, + int link_rate, u8 lane_count, bool link_mst) { intel_dp->link_trained = false; @@ -2469,15 +2484,15 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) */ void intel_edp_panel_vdd_on(struct intel_dp *intel_dp) { + intel_wakeref_t wakeref; bool vdd; if (!intel_dp_is_edp(intel_dp)) return; - pps_lock(intel_dp); - vdd = edp_panel_vdd_on(intel_dp); - pps_unlock(intel_dp); - + vdd = false; + with_pps_lock(intel_dp, wakeref) + vdd = edp_panel_vdd_on(intel_dp); I915_STATE_WARN(!vdd, "eDP port %c VDD already requested on\n", port_name(dp_to_dig_port(intel_dp)->base.port)); } @@ -2516,19 +2531,21 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) if ((pp & PANEL_POWER_ON) == 0) intel_dp->panel_power_off_time = ktime_get_boottime(); - intel_display_power_put(dev_priv, - intel_aux_power_domain(intel_dig_port)); + intel_display_power_put_unchecked(dev_priv, + intel_aux_power_domain(intel_dig_port)); } static void edp_panel_vdd_work(struct work_struct *__work) { - struct intel_dp *intel_dp = container_of(to_delayed_work(__work), - struct intel_dp, panel_vdd_work); + struct intel_dp *intel_dp = + container_of(to_delayed_work(__work), + struct intel_dp, panel_vdd_work); + intel_wakeref_t wakeref; - pps_lock(intel_dp); - if (!intel_dp->want_panel_vdd) - edp_panel_vdd_off_sync(intel_dp); - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) { + if (!intel_dp->want_panel_vdd) + edp_panel_vdd_off_sync(intel_dp); + } } static void edp_panel_vdd_schedule_off(struct intel_dp *intel_dp) @@ -2618,12 +2635,13 @@ static void edp_panel_on(struct intel_dp *intel_dp) void intel_edp_panel_on(struct intel_dp *intel_dp) { + intel_wakeref_t wakeref; + if (!intel_dp_is_edp(intel_dp)) return; - pps_lock(intel_dp); - edp_panel_on(intel_dp); - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) + edp_panel_on(intel_dp); } @@ -2662,25 +2680,25 @@ static void edp_panel_off(struct intel_dp *intel_dp) intel_dp->panel_power_off_time = ktime_get_boottime(); /* We got a reference when we enabled the VDD. */ - intel_display_power_put(dev_priv, intel_aux_power_domain(dig_port)); + intel_display_power_put_unchecked(dev_priv, intel_aux_power_domain(dig_port)); } void intel_edp_panel_off(struct intel_dp *intel_dp) { + intel_wakeref_t wakeref; + if (!intel_dp_is_edp(intel_dp)) return; - pps_lock(intel_dp); - edp_panel_off(intel_dp); - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) + edp_panel_off(intel_dp); } /* Enable backlight in the panel power control. */ static void _intel_edp_backlight_on(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - u32 pp; - i915_reg_t pp_ctrl_reg; + intel_wakeref_t wakeref; /* * If we enable the backlight right away following a panel power @@ -2690,17 +2708,16 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp) */ wait_backlight_on(intel_dp); - pps_lock(intel_dp); - - pp = ironlake_get_pp_control(intel_dp); - pp |= EDP_BLC_ENABLE; + with_pps_lock(intel_dp, wakeref) { + i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp); + u32 pp; - pp_ctrl_reg = _pp_ctrl_reg(intel_dp); + pp = ironlake_get_pp_control(intel_dp); + pp |= EDP_BLC_ENABLE; - I915_WRITE(pp_ctrl_reg, pp); - POSTING_READ(pp_ctrl_reg); - - pps_unlock(intel_dp); + I915_WRITE(pp_ctrl_reg, pp); + POSTING_READ(pp_ctrl_reg); + } } /* Enable backlight PWM and backlight PP control. */ @@ -2722,23 +2739,21 @@ void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, static void _intel_edp_backlight_off(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - u32 pp; - i915_reg_t pp_ctrl_reg; + intel_wakeref_t wakeref; if (!intel_dp_is_edp(intel_dp)) return; - pps_lock(intel_dp); - - pp = ironlake_get_pp_control(intel_dp); - pp &= ~EDP_BLC_ENABLE; - - pp_ctrl_reg = _pp_ctrl_reg(intel_dp); + with_pps_lock(intel_dp, wakeref) { + i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp); + u32 pp; - I915_WRITE(pp_ctrl_reg, pp); - POSTING_READ(pp_ctrl_reg); + pp = ironlake_get_pp_control(intel_dp); + pp &= ~EDP_BLC_ENABLE; - pps_unlock(intel_dp); + I915_WRITE(pp_ctrl_reg, pp); + POSTING_READ(pp_ctrl_reg); + } intel_dp->last_backlight_off = jiffies; edp_wait_backlight_off(intel_dp); @@ -2766,12 +2781,12 @@ static void intel_edp_backlight_power(struct intel_connector *connector, bool enable) { struct intel_dp *intel_dp = intel_attached_dp(&connector->base); + intel_wakeref_t wakeref; bool is_enabled; - pps_lock(intel_dp); - is_enabled = ironlake_get_pp_control(intel_dp) & EDP_BLC_ENABLE; - pps_unlock(intel_dp); - + is_enabled = false; + with_pps_lock(intel_dp, wakeref) + is_enabled = ironlake_get_pp_control(intel_dp) & EDP_BLC_ENABLE; if (is_enabled == enable) return; @@ -2988,16 +3003,18 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + intel_wakeref_t wakeref; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; ret = intel_dp_port_enabled(dev_priv, intel_dp->output_reg, encoder->port, pipe); - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } @@ -3165,20 +3182,20 @@ static void chv_post_disable_dp(struct intel_encoder *encoder, static void _intel_dp_set_link_train(struct intel_dp *intel_dp, - uint32_t *DP, - uint8_t dp_train_pat) + u32 *DP, + u8 dp_train_pat) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); enum port port = intel_dig_port->base.port; - uint8_t train_pat_mask = drm_dp_training_pattern_mask(intel_dp->dpcd); + u8 train_pat_mask = drm_dp_training_pattern_mask(intel_dp->dpcd); if (dp_train_pat & train_pat_mask) DRM_DEBUG_KMS("Using DP training pattern TPS%d\n", dp_train_pat & train_pat_mask); if (HAS_DDI(dev_priv)) { - uint32_t temp = I915_READ(DP_TP_CTL(port)); + u32 temp = I915_READ(DP_TP_CTL(port)); if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE) temp |= DP_TP_CTL_SCRAMBLE_DISABLE; @@ -3277,24 +3294,23 @@ static void intel_enable_dp(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); - uint32_t dp_reg = I915_READ(intel_dp->output_reg); + u32 dp_reg = I915_READ(intel_dp->output_reg); enum pipe pipe = crtc->pipe; + intel_wakeref_t wakeref; if (WARN_ON(dp_reg & DP_PORT_EN)) return; - pps_lock(intel_dp); - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_init_panel_power_sequencer(encoder, pipe_config); - - intel_dp_enable_port(intel_dp, pipe_config); + with_pps_lock(intel_dp, wakeref) { + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + vlv_init_panel_power_sequencer(encoder, pipe_config); - edp_panel_vdd_on(intel_dp); - edp_panel_on(intel_dp); - edp_panel_vdd_off(intel_dp, true); + intel_dp_enable_port(intel_dp, pipe_config); - pps_unlock(intel_dp); + edp_panel_vdd_on(intel_dp); + edp_panel_on(intel_dp); + edp_panel_vdd_off(intel_dp, true); + } if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { unsigned int lane_mask = 0x0; @@ -3497,14 +3513,14 @@ static void chv_dp_post_pll_disable(struct intel_encoder *encoder, * link status information */ bool -intel_dp_get_link_status(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_STATUS_SIZE]) +intel_dp_get_link_status(struct intel_dp *intel_dp, u8 link_status[DP_LINK_STATUS_SIZE]) { return drm_dp_dpcd_read(&intel_dp->aux, DP_LANE0_1_STATUS, link_status, DP_LINK_STATUS_SIZE) == DP_LINK_STATUS_SIZE; } /* These are source-specific values. */ -uint8_t +u8 intel_dp_voltage_max(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -3523,8 +3539,8 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; } -uint8_t -intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing) +u8 +intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, u8 voltage_swing) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; @@ -3569,12 +3585,12 @@ intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing) } } -static uint32_t vlv_signal_levels(struct intel_dp *intel_dp) +static u32 vlv_signal_levels(struct intel_dp *intel_dp) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; unsigned long demph_reg_value, preemph_reg_value, uniqtranscale_reg_value; - uint8_t train_set = intel_dp->train_set[0]; + u8 train_set = intel_dp->train_set[0]; switch (train_set & DP_TRAIN_PRE_EMPHASIS_MASK) { case DP_TRAIN_PRE_EMPH_LEVEL_0: @@ -3655,12 +3671,12 @@ static uint32_t vlv_signal_levels(struct intel_dp *intel_dp) return 0; } -static uint32_t chv_signal_levels(struct intel_dp *intel_dp) +static u32 chv_signal_levels(struct intel_dp *intel_dp) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; u32 deemph_reg_value, margin_reg_value; bool uniq_trans_scale = false; - uint8_t train_set = intel_dp->train_set[0]; + u8 train_set = intel_dp->train_set[0]; switch (train_set & DP_TRAIN_PRE_EMPHASIS_MASK) { case DP_TRAIN_PRE_EMPH_LEVEL_0: @@ -3738,10 +3754,10 @@ static uint32_t chv_signal_levels(struct intel_dp *intel_dp) return 0; } -static uint32_t -g4x_signal_levels(uint8_t train_set) +static u32 +g4x_signal_levels(u8 train_set) { - uint32_t signal_levels = 0; + u32 signal_levels = 0; switch (train_set & DP_TRAIN_VOLTAGE_SWING_MASK) { case DP_TRAIN_VOLTAGE_SWING_LEVEL_0: @@ -3777,8 +3793,8 @@ g4x_signal_levels(uint8_t train_set) } /* SNB CPU eDP voltage swing and pre-emphasis control */ -static uint32_t -snb_cpu_edp_signal_levels(uint8_t train_set) +static u32 +snb_cpu_edp_signal_levels(u8 train_set) { int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | DP_TRAIN_PRE_EMPHASIS_MASK); @@ -3805,8 +3821,8 @@ snb_cpu_edp_signal_levels(uint8_t train_set) } /* IVB CPU eDP voltage swing and pre-emphasis control */ -static uint32_t -ivb_cpu_edp_signal_levels(uint8_t train_set) +static u32 +ivb_cpu_edp_signal_levels(u8 train_set) { int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | DP_TRAIN_PRE_EMPHASIS_MASK); @@ -3841,8 +3857,8 @@ intel_dp_set_signal_levels(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); enum port port = intel_dig_port->base.port; - uint32_t signal_levels, mask = 0; - uint8_t train_set = intel_dp->train_set[0]; + u32 signal_levels, mask = 0; + u8 train_set = intel_dp->train_set[0]; if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10) { signal_levels = bxt_signal_levels(intel_dp); @@ -3881,7 +3897,7 @@ intel_dp_set_signal_levels(struct intel_dp *intel_dp) void intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, - uint8_t dp_train_pat) + u8 dp_train_pat) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = @@ -3898,7 +3914,7 @@ void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); enum port port = intel_dig_port->base.port; - uint32_t val; + u32 val; if (!HAS_DDI(dev_priv)) return; @@ -3933,7 +3949,7 @@ intel_dp_link_down(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); enum port port = encoder->port; - uint32_t DP = intel_dp->DP; + u32 DP = intel_dp->DP; if (WARN_ON(HAS_DDI(dev_priv))) return; @@ -3992,9 +4008,10 @@ intel_dp_link_down(struct intel_encoder *encoder, intel_dp->DP = DP; if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - pps_lock(intel_dp); - intel_dp->active_pipe = INVALID_PIPE; - pps_unlock(intel_dp); + intel_wakeref_t wakeref; + + with_pps_lock(intel_dp, wakeref) + intel_dp->active_pipe = INVALID_PIPE; } } @@ -4273,7 +4290,7 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) DP_DPRX_ESI_LEN; } -u16 intel_dp_dsc_get_output_bpp(int link_clock, uint8_t lane_count, +u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, int mode_clock, int mode_hdisplay) { u16 bits_per_pixel, max_bpp_small_joiner_ram; @@ -4340,7 +4357,7 @@ u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, return 0; } /* Also take into account max slice width */ - min_slice_count = min_t(uint8_t, min_slice_count, + min_slice_count = min_t(u8, min_slice_count, DIV_ROUND_UP(mode_hdisplay, max_slice_width)); @@ -4358,11 +4375,11 @@ u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, return 0; } -static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) +static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp) { int status = 0; int test_link_rate; - uint8_t test_lane_count, test_link_bw; + u8 test_lane_count, test_link_bw; /* (DP CTS 1.2) * 4.3.1.11 */ @@ -4395,10 +4412,10 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) return DP_TEST_ACK; } -static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) +static u8 intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) { - uint8_t test_pattern; - uint8_t test_misc; + u8 test_pattern; + u8 test_misc; __be16 h_width, v_height; int status = 0; @@ -4456,9 +4473,9 @@ static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) return DP_TEST_ACK; } -static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) +static u8 intel_dp_autotest_edid(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_ACK; + u8 test_result = DP_TEST_ACK; struct intel_connector *intel_connector = intel_dp->attached_connector; struct drm_connector *connector = &intel_connector->base; @@ -4500,16 +4517,16 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) return test_result; } -static uint8_t intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) +static u8 intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_NAK; + u8 test_result = DP_TEST_NAK; return test_result; } static void intel_dp_handle_test_request(struct intel_dp *intel_dp) { - uint8_t response = DP_TEST_NAK; - uint8_t request = 0; + u8 response = DP_TEST_NAK; + u8 request = 0; int status; status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_REQUEST, &request); @@ -4597,12 +4614,10 @@ go_again: return ret; } else { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); DRM_DEBUG_KMS("failed to get ESI - device may have failed\n"); intel_dp->is_mst = false; - drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst); - /* send a hotplug event */ - drm_kms_helper_hotplug_event(intel_dig_port->base.base.dev); + drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, + intel_dp->is_mst); } } return -EINVAL; @@ -4835,8 +4850,8 @@ static enum drm_connector_status intel_dp_detect_dpcd(struct intel_dp *intel_dp) { struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp); - uint8_t *dpcd = intel_dp->dpcd; - uint8_t type; + u8 *dpcd = intel_dp->dpcd; + u8 type; if (lspcon->active) lspcon_resume(lspcon); @@ -5289,7 +5304,7 @@ bool intel_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (HAS_GMCH_DISPLAY(dev_priv)) { + if (HAS_GMCH(dev_priv)) { if (IS_GM45(dev_priv)) return gm45_digital_port_connected(encoder); else @@ -5370,12 +5385,13 @@ intel_dp_detect(struct drm_connector *connector, enum drm_connector_status status; enum intel_display_power_domain aux_domain = intel_aux_power_domain(dig_port); + intel_wakeref_t wakeref; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); - intel_display_power_get(dev_priv, aux_domain); + wakeref = intel_display_power_get(dev_priv, aux_domain); /* Can't disconnect eDP */ if (intel_dp_is_edp(intel_dp)) @@ -5441,7 +5457,7 @@ intel_dp_detect(struct drm_connector *connector, ret = intel_dp_retrain_link(encoder, ctx); if (ret) { - intel_display_power_put(dev_priv, aux_domain); + intel_display_power_put(dev_priv, aux_domain, wakeref); return ret; } } @@ -5465,7 +5481,7 @@ out: if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); - intel_display_power_put(dev_priv, aux_domain); + intel_display_power_put(dev_priv, aux_domain, wakeref); return status; } @@ -5478,6 +5494,7 @@ intel_dp_force(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); enum intel_display_power_domain aux_domain = intel_aux_power_domain(dig_port); + intel_wakeref_t wakeref; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -5486,11 +5503,11 @@ intel_dp_force(struct drm_connector *connector) if (connector->status != connector_status_connected) return; - intel_display_power_get(dev_priv, aux_domain); + wakeref = intel_display_power_get(dev_priv, aux_domain); intel_dp_set_edid(intel_dp); - intel_display_power_put(dev_priv, aux_domain); + intel_display_power_put(dev_priv, aux_domain, wakeref); } static int intel_dp_get_modes(struct drm_connector *connector) @@ -5562,14 +5579,15 @@ void intel_dp_encoder_flush_work(struct drm_encoder *encoder) intel_dp_mst_encoder_cleanup(intel_dig_port); if (intel_dp_is_edp(intel_dp)) { + intel_wakeref_t wakeref; + cancel_delayed_work_sync(&intel_dp->panel_vdd_work); /* * vdd might still be enabled do to the delayed vdd off. * Make sure vdd is actually turned off here. */ - pps_lock(intel_dp); - edp_panel_vdd_off_sync(intel_dp); - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) + edp_panel_vdd_off_sync(intel_dp); if (intel_dp->edp_notifier.notifier_call) { unregister_reboot_notifier(&intel_dp->edp_notifier); @@ -5591,6 +5609,7 @@ static void intel_dp_encoder_destroy(struct drm_encoder *encoder) void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) { struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base); + intel_wakeref_t wakeref; if (!intel_dp_is_edp(intel_dp)) return; @@ -5600,9 +5619,8 @@ void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) * Make sure vdd is actually turned off here. */ cancel_delayed_work_sync(&intel_dp->panel_vdd_work); - pps_lock(intel_dp); - edp_panel_vdd_off_sync(intel_dp); - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) + edp_panel_vdd_off_sync(intel_dp); } static @@ -5615,7 +5633,7 @@ int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, .address = DP_AUX_HDCP_AKSV, .size = DRM_HDCP_KSV_LEN, }; - uint8_t txbuf[HEADER_SIZE + DRM_HDCP_KSV_LEN] = {}, rxbuf[2], reply = 0; + u8 txbuf[HEADER_SIZE + DRM_HDCP_KSV_LEN] = {}, rxbuf[2], reply = 0; ssize_t dpcd_ret; int ret; @@ -5883,6 +5901,7 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) struct drm_i915_private *dev_priv = to_i915(encoder->dev); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp); + intel_wakeref_t wakeref; if (!HAS_DDI(dev_priv)) intel_dp->DP = I915_READ(intel_dp->output_reg); @@ -5892,18 +5911,19 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) intel_dp->reset_link_params = true; - pps_lock(intel_dp); - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - intel_dp->active_pipe = vlv_active_pipe(intel_dp); + with_pps_lock(intel_dp, wakeref) { + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + intel_dp->active_pipe = vlv_active_pipe(intel_dp); - if (intel_dp_is_edp(intel_dp)) { - /* Reinit the power sequencer, in case BIOS did something with it. */ - intel_dp_pps_init(intel_dp); - intel_edp_panel_vdd_sanitize(intel_dp); + if (intel_dp_is_edp(intel_dp)) { + /* + * Reinit the power sequencer, in case BIOS did + * something nasty with it. + */ + intel_dp_pps_init(intel_dp); + intel_edp_panel_vdd_sanitize(intel_dp); + } } - - pps_unlock(intel_dp); } static const struct drm_connector_funcs intel_dp_connector_funcs = { @@ -5936,6 +5956,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); enum irqreturn ret = IRQ_NONE; + intel_wakeref_t wakeref; if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) { /* @@ -5958,8 +5979,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) return IRQ_NONE; } - intel_display_power_get(dev_priv, - intel_aux_power_domain(intel_dig_port)); + wakeref = intel_display_power_get(dev_priv, + intel_aux_power_domain(intel_dig_port)); if (intel_dp->is_mst) { if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { @@ -5989,7 +6010,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) put_power: intel_display_power_put(dev_priv, - intel_aux_power_domain(intel_dig_port)); + intel_aux_power_domain(intel_dig_port), + wakeref); return ret; } @@ -6020,7 +6042,7 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) drm_connector_attach_max_bpc_property(connector, 6, 10); else if (INTEL_GEN(dev_priv) >= 5) drm_connector_attach_max_bpc_property(connector, 6, 12); @@ -6029,7 +6051,7 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect u32 allowed_scalers; allowed_scalers = BIT(DRM_MODE_SCALE_ASPECT) | BIT(DRM_MODE_SCALE_FULLSCREEN); - if (!HAS_GMCH_DISPLAY(dev_priv)) + if (!HAS_GMCH(dev_priv)) allowed_scalers |= BIT(DRM_MODE_SCALE_CENTER); drm_connector_attach_scaling_mode_property(connector, allowed_scalers); @@ -6697,8 +6719,9 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, struct drm_display_mode *downclock_mode = NULL; bool has_dpcd; struct drm_display_mode *scan; - struct edid *edid; enum pipe pipe = INVALID_PIPE; + intel_wakeref_t wakeref; + struct edid *edid; if (!intel_dp_is_edp(intel_dp)) return true; @@ -6718,13 +6741,11 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, return false; } - pps_lock(intel_dp); - - intel_dp_init_panel_power_timestamps(intel_dp); - intel_dp_pps_init(intel_dp); - intel_edp_panel_vdd_sanitize(intel_dp); - - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) { + intel_dp_init_panel_power_timestamps(intel_dp); + intel_dp_pps_init(intel_dp); + intel_edp_panel_vdd_sanitize(intel_dp); + } /* Cache DPCD and EDID for edp. */ has_dpcd = intel_edp_init_dpcd(intel_dp); @@ -6809,9 +6830,8 @@ out_vdd_off: * vdd might still be enabled do to the delayed vdd off. * Make sure vdd is actually turned off here. */ - pps_lock(intel_dp); - edp_panel_vdd_off_sync(intel_dp); - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) + edp_panel_vdd_off_sync(intel_dp); return false; } @@ -6903,7 +6923,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, drm_connector_init(dev, connector, &intel_dp_connector_funcs, type); drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs); - if (!HAS_GMCH_DISPLAY(dev_priv)) + if (!HAS_GMCH(dev_priv)) connector->interlace_allowed = true; connector->doublescan_allowed = 0; @@ -6985,6 +7005,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder->compute_config = intel_dp_compute_config; intel_encoder->get_hw_state = intel_dp_get_hw_state; intel_encoder->get_config = intel_dp_get_config; + intel_encoder->update_pipe = intel_panel_update_backlight; intel_encoder->suspend = intel_dp_encoder_suspend; if (IS_CHERRYVIEW(dev_priv)) { intel_encoder->pre_pll_enable = chv_dp_pre_pll_enable; @@ -7079,7 +7100,10 @@ void intel_dp_mst_resume(struct drm_i915_private *dev_priv) continue; ret = drm_dp_mst_topology_mgr_resume(&intel_dp->mst_mgr); - if (ret) - intel_dp_check_mst_status(intel_dp); + if (ret) { + intel_dp->is_mst = false; + drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, + false); + } } } diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index 30be0e39bd5f..b59c87daa4f7 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -24,7 +24,7 @@ #include "intel_drv.h" static void -intel_dp_dump_link_status(const uint8_t link_status[DP_LINK_STATUS_SIZE]) +intel_dp_dump_link_status(const u8 link_status[DP_LINK_STATUS_SIZE]) { DRM_DEBUG_KMS("ln0_1:0x%x ln2_3:0x%x align:0x%x sink:0x%x adj_req0_1:0x%x adj_req2_3:0x%x", @@ -34,17 +34,17 @@ intel_dp_dump_link_status(const uint8_t link_status[DP_LINK_STATUS_SIZE]) static void intel_get_adjust_train(struct intel_dp *intel_dp, - const uint8_t link_status[DP_LINK_STATUS_SIZE]) + const u8 link_status[DP_LINK_STATUS_SIZE]) { - uint8_t v = 0; - uint8_t p = 0; + u8 v = 0; + u8 p = 0; int lane; - uint8_t voltage_max; - uint8_t preemph_max; + u8 voltage_max; + u8 preemph_max; for (lane = 0; lane < intel_dp->lane_count; lane++) { - uint8_t this_v = drm_dp_get_adjust_request_voltage(link_status, lane); - uint8_t this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane); + u8 this_v = drm_dp_get_adjust_request_voltage(link_status, lane); + u8 this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane); if (this_v > v) v = this_v; @@ -66,9 +66,9 @@ intel_get_adjust_train(struct intel_dp *intel_dp, static bool intel_dp_set_link_train(struct intel_dp *intel_dp, - uint8_t dp_train_pat) + u8 dp_train_pat) { - uint8_t buf[sizeof(intel_dp->train_set) + 1]; + u8 buf[sizeof(intel_dp->train_set) + 1]; int ret, len; intel_dp_program_link_training_pattern(intel_dp, dp_train_pat); @@ -92,7 +92,7 @@ intel_dp_set_link_train(struct intel_dp *intel_dp, static bool intel_dp_reset_link_train(struct intel_dp *intel_dp, - uint8_t dp_train_pat) + u8 dp_train_pat) { memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set)); intel_dp_set_signal_levels(intel_dp); @@ -128,11 +128,11 @@ static bool intel_dp_link_max_vswing_reached(struct intel_dp *intel_dp) static bool intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) { - uint8_t voltage; + u8 voltage; int voltage_tries, cr_tries, max_cr_tries; bool max_vswing_reached = false; - uint8_t link_config[2]; - uint8_t link_bw, rate_select; + u8 link_config[2]; + u8 link_bw, rate_select; if (intel_dp->prepare_link_retrain) intel_dp->prepare_link_retrain(intel_dp); @@ -186,7 +186,7 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) voltage_tries = 1; for (cr_tries = 0; cr_tries < max_cr_tries; ++cr_tries) { - uint8_t link_status[DP_LINK_STATUS_SIZE]; + u8 link_status[DP_LINK_STATUS_SIZE]; drm_dp_link_train_clock_recovery_delay(intel_dp->dpcd); @@ -282,7 +282,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) { int tries; u32 training_pattern; - uint8_t link_status[DP_LINK_STATUS_SIZE]; + u8 link_status[DP_LINK_STATUS_SIZE]; bool channel_eq = false; training_pattern = intel_dp_training_pattern(intel_dp); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 5d8471f080d9..fb67cd931117 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -242,7 +242,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, struct intel_connector *connector = to_intel_connector(conn_state->connector); int ret; - uint32_t temp; + u32 temp; /* MST encoders are bound to a crtc, not to a connector, * force the mapping here for get_hw_state. diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 3c7f10d17658..95cb8b154f87 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -413,7 +413,7 @@ static void _bxt_ddi_phy_init(struct drm_i915_private *dev_priv, } if (phy_info->rcomp_phy != -1) { - uint32_t grc_code; + u32 grc_code; bxt_phy_wait_grc_done(dev_priv, phy_info->rcomp_phy); @@ -445,7 +445,7 @@ static void _bxt_ddi_phy_init(struct drm_i915_private *dev_priv, void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy) { const struct bxt_ddi_phy_info *phy_info; - uint32_t val; + u32 val; phy_info = bxt_get_phy_info(dev_priv, phy); @@ -515,7 +515,7 @@ bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, enum dpio_phy phy) { const struct bxt_ddi_phy_info *phy_info; - uint32_t mask; + u32 mask; bool ok; phy_info = bxt_get_phy_info(dev_priv, phy); @@ -567,8 +567,8 @@ bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, #undef _CHK } -uint8_t -bxt_ddi_phy_calc_lane_lat_optim_mask(uint8_t lane_count) +u8 +bxt_ddi_phy_calc_lane_lat_optim_mask(u8 lane_count) { switch (lane_count) { case 1: @@ -585,7 +585,7 @@ bxt_ddi_phy_calc_lane_lat_optim_mask(uint8_t lane_count) } void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, - uint8_t lane_lat_optim_mask) + u8 lane_lat_optim_mask) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; @@ -610,7 +610,7 @@ void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, } } -uint8_t +u8 bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -618,7 +618,7 @@ bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder) enum dpio_phy phy; enum dpio_channel ch; int lane; - uint8_t mask; + u8 mask; bxt_port_to_phy_channel(dev_priv, port, &phy, &ch); @@ -739,7 +739,7 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder, enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base)); struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); enum pipe pipe = crtc->pipe; - uint32_t val; + u32 val; val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch)); if (reset) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index d513ca875c67..0a42d11c4c33 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -247,7 +247,7 @@ intel_find_shared_dpll(struct intel_crtc *crtc, enum intel_dpll_id range_max) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_shared_dpll *pll; + struct intel_shared_dpll *pll, *unused_pll = NULL; struct intel_shared_dpll_state *shared_dpll; enum intel_dpll_id i; @@ -257,8 +257,11 @@ intel_find_shared_dpll(struct intel_crtc *crtc, pll = &dev_priv->shared_dplls[i]; /* Only want to check enabled timings first */ - if (shared_dpll[i].crtc_mask == 0) + if (shared_dpll[i].crtc_mask == 0) { + if (!unused_pll) + unused_pll = pll; continue; + } if (memcmp(&crtc_state->dpll_hw_state, &shared_dpll[i].hw_state, @@ -273,14 +276,11 @@ intel_find_shared_dpll(struct intel_crtc *crtc, } /* Ok no matching timings, maybe there's a free one? */ - for (i = range_min; i <= range_max; i++) { - pll = &dev_priv->shared_dplls[i]; - if (shared_dpll[i].crtc_mask == 0) { - DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n", - crtc->base.base.id, crtc->base.name, - pll->info->name); - return pll; - } + if (unused_pll) { + DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n", + crtc->base.base.id, crtc->base.name, + unused_pll->info->name); + return unused_pll; } return NULL; @@ -345,9 +345,12 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; + intel_wakeref_t wakeref; + u32 val; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; val = I915_READ(PCH_DPLL(id)); @@ -355,7 +358,7 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv, hw_state->fp0 = I915_READ(PCH_FP0(id)); hw_state->fp1 = I915_READ(PCH_FP1(id)); - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return val & DPLL_VCO_ENABLE; } @@ -487,7 +490,7 @@ static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; + u32 val; val = I915_READ(WRPLL_CTL(id)); I915_WRITE(WRPLL_CTL(id), val & ~WRPLL_PLL_ENABLE); @@ -497,7 +500,7 @@ static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv, static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - uint32_t val; + u32 val; val = I915_READ(SPLL_CTL); I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE); @@ -509,15 +512,18 @@ static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; + intel_wakeref_t wakeref; + u32 val; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; val = I915_READ(WRPLL_CTL(id)); hw_state->wrpll = val; - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return val & WRPLL_PLL_ENABLE; } @@ -526,15 +532,18 @@ static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - uint32_t val; + intel_wakeref_t wakeref; + u32 val; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; val = I915_READ(SPLL_CTL); hw_state->spll = val; - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return val & SPLL_PLL_ENABLE; } @@ -630,11 +639,12 @@ static unsigned hsw_wrpll_get_budget_for_freq(int clock) return budget; } -static void hsw_wrpll_update_rnp(uint64_t freq2k, unsigned budget, - unsigned r2, unsigned n2, unsigned p, +static void hsw_wrpll_update_rnp(u64 freq2k, unsigned int budget, + unsigned int r2, unsigned int n2, + unsigned int p, struct hsw_wrpll_rnp *best) { - uint64_t a, b, c, d, diff, diff_best; + u64 a, b, c, d, diff, diff_best; /* No best (r,n,p) yet */ if (best->p == 0) { @@ -693,7 +703,7 @@ static void hsw_ddi_calculate_wrpll(int clock /* in Hz */, unsigned *r2_out, unsigned *n2_out, unsigned *p_out) { - uint64_t freq2k; + u64 freq2k; unsigned p, n2, r2; struct hsw_wrpll_rnp best = { 0, 0, 0 }; unsigned budget; @@ -759,7 +769,7 @@ static struct intel_shared_dpll *hsw_ddi_hdmi_get_dpll(int clock, struct intel_crtc_state *crtc_state) { struct intel_shared_dpll *pll; - uint32_t val; + u32 val; unsigned int p, n2, r2; hsw_ddi_calculate_wrpll(clock * 1000, &r2, &n2, &p); @@ -921,7 +931,7 @@ static void skl_ddi_pll_write_ctrl1(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; + u32 val; val = I915_READ(DPLL_CTRL1); @@ -986,12 +996,15 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - uint32_t val; + u32 val; const struct skl_dpll_regs *regs = skl_dpll_regs; const enum intel_dpll_id id = pll->info->id; + intel_wakeref_t wakeref; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; ret = false; @@ -1011,7 +1024,7 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return ret; } @@ -1020,12 +1033,15 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - uint32_t val; const struct skl_dpll_regs *regs = skl_dpll_regs; const enum intel_dpll_id id = pll->info->id; + intel_wakeref_t wakeref; + u32 val; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; ret = false; @@ -1041,15 +1057,15 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return ret; } struct skl_wrpll_context { - uint64_t min_deviation; /* current minimal deviation */ - uint64_t central_freq; /* chosen central freq */ - uint64_t dco_freq; /* chosen dco freq */ + u64 min_deviation; /* current minimal deviation */ + u64 central_freq; /* chosen central freq */ + u64 dco_freq; /* chosen dco freq */ unsigned int p; /* chosen divider */ }; @@ -1065,11 +1081,11 @@ static void skl_wrpll_context_init(struct skl_wrpll_context *ctx) #define SKL_DCO_MAX_NDEVIATION 600 static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx, - uint64_t central_freq, - uint64_t dco_freq, + u64 central_freq, + u64 dco_freq, unsigned int divider) { - uint64_t deviation; + u64 deviation; deviation = div64_u64(10000 * abs_diff(dco_freq, central_freq), central_freq); @@ -1143,21 +1159,21 @@ static void skl_wrpll_get_multipliers(unsigned int p, } struct skl_wrpll_params { - uint32_t dco_fraction; - uint32_t dco_integer; - uint32_t qdiv_ratio; - uint32_t qdiv_mode; - uint32_t kdiv; - uint32_t pdiv; - uint32_t central_freq; + u32 dco_fraction; + u32 dco_integer; + u32 qdiv_ratio; + u32 qdiv_mode; + u32 kdiv; + u32 pdiv; + u32 central_freq; }; static void skl_wrpll_params_populate(struct skl_wrpll_params *params, - uint64_t afe_clock, - uint64_t central_freq, - uint32_t p0, uint32_t p1, uint32_t p2) + u64 afe_clock, + u64 central_freq, + u32 p0, u32 p1, u32 p2) { - uint64_t dco_freq; + u64 dco_freq; switch (central_freq) { case 9600000000ULL: @@ -1223,10 +1239,10 @@ static bool skl_ddi_calculate_wrpll(int clock /* in Hz */, struct skl_wrpll_params *wrpll_params) { - uint64_t afe_clock = clock * 5; /* AFE Clock is 5x Pixel clock */ - uint64_t dco_central_freq[3] = {8400000000ULL, - 9000000000ULL, - 9600000000ULL}; + u64 afe_clock = clock * 5; /* AFE Clock is 5x Pixel clock */ + u64 dco_central_freq[3] = { 8400000000ULL, + 9000000000ULL, + 9600000000ULL }; static const int even_dividers[] = { 4, 6, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 48, 52, 54, 56, 60, 64, 66, 68, @@ -1250,7 +1266,7 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */, for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) { for (i = 0; i < dividers[d].n_dividers; i++) { unsigned int p = dividers[d].list[i]; - uint64_t dco_freq = p * afe_clock; + u64 dco_freq = p * afe_clock; skl_wrpll_try_divider(&ctx, dco_central_freq[dco], @@ -1296,7 +1312,7 @@ static bool skl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, int clock) { - uint32_t ctrl1, cfgcr1, cfgcr2; + u32 ctrl1, cfgcr1, cfgcr2; struct skl_wrpll_params wrpll_params = { 0, }; /* @@ -1333,7 +1349,7 @@ static bool skl_ddi_dp_set_dpll_hw_state(int clock, struct intel_dpll_hw_state *dpll_hw_state) { - uint32_t ctrl1; + u32 ctrl1; /* * See comment in intel_dpll_hw_state to understand why we always use 0 @@ -1435,7 +1451,7 @@ static const struct intel_shared_dpll_funcs skl_ddi_dpll0_funcs = { static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - uint32_t temp; + u32 temp; enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ enum dpio_phy phy; enum dpio_channel ch; @@ -1556,7 +1572,7 @@ static void bxt_ddi_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ - uint32_t temp; + u32 temp; temp = I915_READ(BXT_PORT_PLL_ENABLE(port)); temp &= ~PORT_PLL_ENABLE; @@ -1579,14 +1595,17 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state) { enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ - uint32_t val; - bool ret; + intel_wakeref_t wakeref; enum dpio_phy phy; enum dpio_channel ch; + u32 val; + bool ret; bxt_port_to_phy_channel(dev_priv, port, &phy, &ch); - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; ret = false; @@ -1643,7 +1662,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return ret; } @@ -1651,12 +1670,12 @@ out: /* bxt clock parameters */ struct bxt_clk_div { int clock; - uint32_t p1; - uint32_t p2; - uint32_t m2_int; - uint32_t m2_frac; + u32 p1; + u32 p2; + u32 m2_int; + u32 m2_frac; bool m2_frac_en; - uint32_t n; + u32 n; int vco; }; @@ -1723,8 +1742,8 @@ static bool bxt_ddi_set_dpll_hw_state(int clock, struct intel_dpll_hw_state *dpll_hw_state) { int vco = clk_div->vco; - uint32_t prop_coef, int_coef, gain_ctl, targ_cnt; - uint32_t lanestagger; + u32 prop_coef, int_coef, gain_ctl, targ_cnt; + u32 lanestagger; if (vco >= 6200000 && vco <= 6700000) { prop_coef = 4; @@ -1873,7 +1892,7 @@ static void intel_ddi_pll_init(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); if (INTEL_GEN(dev_priv) < 9) { - uint32_t val = I915_READ(LCPLL_CTL); + u32 val = I915_READ(LCPLL_CTL); /* * The LCPLL register should be turned on by the BIOS. For now @@ -1959,7 +1978,7 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; + u32 val; /* 1. Enable DPLL power in DPLL_ENABLE. */ val = I915_READ(CNL_DPLL_ENABLE(id)); @@ -2034,7 +2053,7 @@ static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; + u32 val; /* * 1. Configure DPCLKA_CFGCR0 to turn off the clock for the DDI. @@ -2091,10 +2110,13 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; + intel_wakeref_t wakeref; + u32 val; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; ret = false; @@ -2113,7 +2135,7 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return ret; } @@ -2225,7 +2247,7 @@ cnl_ddi_calculate_wrpll(int clock, struct skl_wrpll_params *wrpll_params) { u32 afe_clock = clock * 5; - uint32_t ref_clock; + u32 ref_clock; u32 dco_min = 7998000; u32 dco_max = 10000000; u32 dco_mid = (dco_min + dco_max) / 2; @@ -2271,7 +2293,7 @@ static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, int clock) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - uint32_t cfgcr0, cfgcr1; + u32 cfgcr0, cfgcr1; struct skl_wrpll_params wrpll_params = { 0, }; cfgcr0 = DPLL_CFGCR0_HDMI_MODE; @@ -2300,7 +2322,7 @@ static bool cnl_ddi_dp_set_dpll_hw_state(int clock, struct intel_dpll_hw_state *dpll_hw_state) { - uint32_t cfgcr0; + u32 cfgcr0; cfgcr0 = DPLL_CFGCR0_SSC_ENABLE; @@ -2517,7 +2539,7 @@ static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state, struct intel_dpll_hw_state *pll_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - uint32_t cfgcr0, cfgcr1; + u32 cfgcr0, cfgcr1; struct skl_wrpll_params pll_params = { 0 }; bool ret; @@ -2547,10 +2569,10 @@ static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state, } int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv, - uint32_t pll_id) + u32 pll_id) { - uint32_t cfgcr0, cfgcr1; - uint32_t pdiv, kdiv, qdiv_mode, qdiv_ratio, dco_integer, dco_fraction; + u32 cfgcr0, cfgcr1; + u32 pdiv, kdiv, qdiv_mode, qdiv_ratio, dco_integer, dco_fraction; const struct skl_wrpll_params *params; int index, n_entries, link_clock; @@ -2617,14 +2639,14 @@ int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv, return link_clock; } -static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id) +static enum tc_port icl_pll_id_to_tc_port(enum intel_dpll_id id) { - return id - DPLL_ID_ICL_MGPLL1 + PORT_C; + return id - DPLL_ID_ICL_MGPLL1; } -enum intel_dpll_id icl_port_to_mg_pll_id(enum port port) +enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port) { - return port - PORT_C + DPLL_ID_ICL_MGPLL1; + return tc_port + DPLL_ID_ICL_MGPLL1; } bool intel_dpll_is_combophy(enum intel_dpll_id id) @@ -2633,10 +2655,10 @@ bool intel_dpll_is_combophy(enum intel_dpll_id id) } static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, - uint32_t *target_dco_khz, + u32 *target_dco_khz, struct intel_dpll_hw_state *state) { - uint32_t dco_min_freq, dco_max_freq; + u32 dco_min_freq, dco_max_freq; int div1_vals[] = {7, 5, 3, 2}; unsigned int i; int div2; @@ -2712,12 +2734,12 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int refclk_khz = dev_priv->cdclk.hw.ref; - uint32_t dco_khz, m1div, m2div_int, m2div_rem, m2div_frac; - uint32_t iref_ndiv, iref_trim, iref_pulse_w; - uint32_t prop_coeff, int_coeff; - uint32_t tdc_targetcnt, feedfwgain; - uint64_t ssc_stepsize, ssc_steplen, ssc_steplog; - uint64_t tmp; + u32 dco_khz, m1div, m2div_int, m2div_rem, m2div_frac; + u32 iref_ndiv, iref_trim, iref_pulse_w; + u32 prop_coeff, int_coeff; + u32 tdc_targetcnt, feedfwgain; + u64 ssc_stepsize, ssc_steplen, ssc_steplog; + u64 tmp; bool use_ssc = false; bool is_dp = !intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI); @@ -2740,7 +2762,7 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, } m2div_rem = dco_khz % (refclk_khz * m1div); - tmp = (uint64_t)m2div_rem * (1 << 22); + tmp = (u64)m2div_rem * (1 << 22); do_div(tmp, refclk_khz * m1div); m2div_frac = tmp; @@ -2799,11 +2821,11 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, } if (use_ssc) { - tmp = (uint64_t)dco_khz * 47 * 32; + tmp = (u64)dco_khz * 47 * 32; do_div(tmp, refclk_khz * m1div * 10000); ssc_stepsize = tmp; - tmp = (uint64_t)dco_khz * 1000; + tmp = (u64)dco_khz * 1000; ssc_steplen = DIV_ROUND_UP_ULL(tmp, 32 * 2 * 32); } else { ssc_stepsize = 0; @@ -2903,7 +2925,10 @@ icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, ret = icl_calc_dpll_state(crtc_state, encoder, clock, &pll_state); } else { - min = icl_port_to_mg_pll_id(port); + enum tc_port tc_port; + + tc_port = intel_port_to_tc(dev_priv, port); + min = icl_tc_port_to_pll_id(tc_port); max = min; ret = icl_calc_mg_pll_state(crtc_state, encoder, clock, &pll_state); @@ -2937,12 +2962,8 @@ static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id) return CNL_DPLL_ENABLE(id); else if (id == DPLL_ID_ICL_TBTPLL) return TBT_PLL_ENABLE; - else - /* - * TODO: Make MG_PLL macros use - * tc port id instead of port id - */ - return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id)); + + return MG_PLL_ENABLE(icl_pll_id_to_tc_port(id)); } static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, @@ -2950,11 +2971,13 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; - enum port port; + intel_wakeref_t wakeref; bool ret = false; + u32 val; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; val = I915_READ(icl_pll_id_to_enable_reg(id)); @@ -2966,32 +2989,33 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); } else { - port = icl_mg_pll_id_to_port(id); - hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port)); + enum tc_port tc_port = icl_pll_id_to_tc_port(id); + + hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(tc_port)); hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK; hw_state->mg_clktop2_coreclkctl1 = - I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); + I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port)); hw_state->mg_clktop2_coreclkctl1 &= MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; hw_state->mg_clktop2_hsclkctl = - I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port)); hw_state->mg_clktop2_hsclkctl &= MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK; - hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); - hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port)); - hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port)); - hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port)); - hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port)); + hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port)); + hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(tc_port)); + hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(tc_port)); + hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(tc_port)); + hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(tc_port)); - hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port)); + hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(tc_port)); hw_state->mg_pll_tdc_coldst_bias = - I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); + I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port)); if (dev_priv->cdclk.hw.ref == 38400) { hw_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART; @@ -3007,7 +3031,7 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return ret; } @@ -3026,7 +3050,7 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; - enum port port = icl_mg_pll_id_to_port(pll->info->id); + enum tc_port tc_port = icl_pll_id_to_tc_port(pll->info->id); u32 val; /* @@ -3035,41 +3059,41 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, * during the calc/readout phase if the mask depends on some other HW * state like refclk, see icl_calc_mg_pll_state(). */ - val = I915_READ(MG_REFCLKIN_CTL(port)); + val = I915_READ(MG_REFCLKIN_CTL(tc_port)); val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK; val |= hw_state->mg_refclkin_ctl; - I915_WRITE(MG_REFCLKIN_CTL(port), val); + I915_WRITE(MG_REFCLKIN_CTL(tc_port), val); - val = I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); + val = I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port)); val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; val |= hw_state->mg_clktop2_coreclkctl1; - I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), val); + I915_WRITE(MG_CLKTOP2_CORECLKCTL1(tc_port), val); - val = I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + val = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port)); val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK); val |= hw_state->mg_clktop2_hsclkctl; - I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), val); + I915_WRITE(MG_CLKTOP2_HSCLKCTL(tc_port), val); - I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0); - I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1); - I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf); - I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock); - I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc); + I915_WRITE(MG_PLL_DIV0(tc_port), hw_state->mg_pll_div0); + I915_WRITE(MG_PLL_DIV1(tc_port), hw_state->mg_pll_div1); + I915_WRITE(MG_PLL_LF(tc_port), hw_state->mg_pll_lf); + I915_WRITE(MG_PLL_FRAC_LOCK(tc_port), hw_state->mg_pll_frac_lock); + I915_WRITE(MG_PLL_SSC(tc_port), hw_state->mg_pll_ssc); - val = I915_READ(MG_PLL_BIAS(port)); + val = I915_READ(MG_PLL_BIAS(tc_port)); val &= ~hw_state->mg_pll_bias_mask; val |= hw_state->mg_pll_bias; - I915_WRITE(MG_PLL_BIAS(port), val); + I915_WRITE(MG_PLL_BIAS(tc_port), val); - val = I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); + val = I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port)); val &= ~hw_state->mg_pll_tdc_coldst_bias_mask; val |= hw_state->mg_pll_tdc_coldst_bias; - I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), val); + I915_WRITE(MG_PLL_TDC_COLDST_BIAS(tc_port), val); - POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port)); + POSTING_READ(MG_PLL_TDC_COLDST_BIAS(tc_port)); } static void icl_pll_enable(struct drm_i915_private *dev_priv, @@ -3077,7 +3101,7 @@ static void icl_pll_enable(struct drm_i915_private *dev_priv, { const enum intel_dpll_id id = pll->info->id; i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); - uint32_t val; + u32 val; val = I915_READ(enable_reg); val |= PLL_POWER_ENABLE; @@ -3118,7 +3142,7 @@ static void icl_pll_disable(struct drm_i915_private *dev_priv, { const enum intel_dpll_id id = pll->info->id; i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); - uint32_t val; + u32 val; /* The first steps are done by intel_ddi_post_disable(). */ diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index a033d8f06d4a..40e8391a92f2 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -138,14 +138,14 @@ enum intel_dpll_id { struct intel_dpll_hw_state { /* i9xx, pch plls */ - uint32_t dpll; - uint32_t dpll_md; - uint32_t fp0; - uint32_t fp1; + u32 dpll; + u32 dpll_md; + u32 fp0; + u32 fp1; /* hsw, bdw */ - uint32_t wrpll; - uint32_t spll; + u32 wrpll; + u32 spll; /* skl */ /* @@ -154,34 +154,33 @@ struct intel_dpll_hw_state { * the register. This allows us to easily compare the state to share * the DPLL. */ - uint32_t ctrl1; + u32 ctrl1; /* HDMI only, 0 when used for DP */ - uint32_t cfgcr1, cfgcr2; + u32 cfgcr1, cfgcr2; /* cnl */ - uint32_t cfgcr0; + u32 cfgcr0; /* CNL also uses cfgcr1 */ /* bxt */ - uint32_t ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, - pcsdw12; + u32 ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, pcsdw12; /* * ICL uses the following, already defined: - * uint32_t cfgcr0, cfgcr1; - */ - uint32_t mg_refclkin_ctl; - uint32_t mg_clktop2_coreclkctl1; - uint32_t mg_clktop2_hsclkctl; - uint32_t mg_pll_div0; - uint32_t mg_pll_div1; - uint32_t mg_pll_lf; - uint32_t mg_pll_frac_lock; - uint32_t mg_pll_ssc; - uint32_t mg_pll_bias; - uint32_t mg_pll_tdc_coldst_bias; - uint32_t mg_pll_bias_mask; - uint32_t mg_pll_tdc_coldst_bias_mask; + * u32 cfgcr0, cfgcr1; + */ + u32 mg_refclkin_ctl; + u32 mg_clktop2_coreclkctl1; + u32 mg_clktop2_hsclkctl; + u32 mg_pll_div0; + u32 mg_pll_div1; + u32 mg_pll_lf; + u32 mg_pll_frac_lock; + u32 mg_pll_ssc; + u32 mg_pll_bias; + u32 mg_pll_tdc_coldst_bias; + u32 mg_pll_bias_mask; + u32 mg_pll_tdc_coldst_bias_mask; }; /** @@ -280,7 +279,7 @@ struct dpll_info { * Inform the state checker that the DPLL is kept enabled even if * not in use by any CRTC. */ - uint32_t flags; + u32 flags; }; /** @@ -343,9 +342,9 @@ void intel_shared_dpll_init(struct drm_device *dev); void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state); int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv, - uint32_t pll_id); + u32 pll_id); int cnl_hdmi_pll_ref_clock(struct drm_i915_private *dev_priv); -enum intel_dpll_id icl_port_to_mg_pll_id(enum port port); +enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port); bool intel_dpll_is_combophy(enum intel_dpll_id id); #endif /* _INTEL_DPLL_MGR_H_ */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index c1759babe1dd..15db41394b9e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -29,6 +29,7 @@ #include <linux/i2c.h> #include <linux/hdmi.h> #include <linux/sched/clock.h> +#include <linux/stackdepot.h> #include <drm/i915_drm.h> #include "i915_drv.h" #include <drm/drm_crtc.h> @@ -42,6 +43,8 @@ #include <drm/drm_atomic.h> #include <media/cec-notifier.h> +struct drm_printer; + /** * __wait_for - magic wait macro * @@ -210,6 +213,16 @@ struct intel_fbdev { unsigned long vma_flags; async_cookie_t cookie; int preferred_bpp; + + /* Whether or not fbdev hpd processing is temporarily suspended */ + bool hpd_suspended : 1; + /* Set when a hotplug was received while HPD processing was + * suspended + */ + bool hpd_waiting : 1; + + /* Protects hpd_suspended */ + struct mutex hpd_lock; }; struct intel_encoder { @@ -298,13 +311,12 @@ struct intel_panel { /* Connector and platform specific backlight functions */ int (*setup)(struct intel_connector *connector, enum pipe pipe); - uint32_t (*get)(struct intel_connector *connector); - void (*set)(const struct drm_connector_state *conn_state, uint32_t level); + u32 (*get)(struct intel_connector *connector); + void (*set)(const struct drm_connector_state *conn_state, u32 level); void (*disable)(const struct drm_connector_state *conn_state); void (*enable)(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); - uint32_t (*hz_to_pwm)(struct intel_connector *connector, - uint32_t hz); + u32 (*hz_to_pwm)(struct intel_connector *connector, u32 hz); void (*power)(struct intel_connector *, bool enable); } backlight; }; @@ -596,7 +608,7 @@ struct intel_initial_plane_config { struct intel_scaler { int in_use; - uint32_t mode; + u32 mode; }; struct intel_crtc_scaler_state { @@ -628,13 +640,15 @@ struct intel_crtc_scaler_state { }; /* drm_mode->private_flags */ -#define I915_MODE_FLAG_INHERITED 1 +#define I915_MODE_FLAG_INHERITED (1<<0) /* Flag to get scanline using frame time stamps */ #define I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP (1<<1) +/* Flag to use the scanline counter instead of the pixel counter */ +#define I915_MODE_FLAG_USE_SCANLINE_COUNTER (1<<2) struct intel_pipe_wm { struct intel_wm_level wm[5]; - uint32_t linetime; + u32 linetime; bool fbc_wm_enabled; bool pipe_enabled; bool sprites_enabled; @@ -650,7 +664,7 @@ struct skl_plane_wm { struct skl_pipe_wm { struct skl_plane_wm planes[I915_MAX_PLANES]; - uint32_t linetime; + u32 linetime; }; enum vlv_wm_level { @@ -663,7 +677,7 @@ enum vlv_wm_level { struct vlv_wm_state { struct g4x_pipe_wm wm[NUM_VLV_WM_LEVELS]; struct g4x_sr_wm sr[NUM_VLV_WM_LEVELS]; - uint8_t num_levels; + u8 num_levels; bool cxsr; }; @@ -876,13 +890,13 @@ struct intel_crtc_state { /* Used by SDVO (and if we ever fix it, HDMI). */ unsigned pixel_multiplier; - uint8_t lane_count; + u8 lane_count; /* * Used by platforms having DP/HDMI PHY with programmable lane * latency optimization. */ - uint8_t lane_lat_optim_mask; + u8 lane_lat_optim_mask; /* minimum acceptable voltage level */ u8 min_voltage_level; @@ -926,7 +940,7 @@ struct intel_crtc_state { struct intel_crtc_wm_state wm; /* Gamma mode programmed on the pipe */ - uint32_t gamma_mode; + u32 gamma_mode; /* bitmask of visible planes (enum plane_id) */ u8 active_planes; @@ -1012,7 +1026,7 @@ struct intel_plane { enum pipe pipe; bool has_fbc; bool has_ccs; - uint32_t frontbuffer_bit; + u32 frontbuffer_bit; struct { u32 base, cntl, size; @@ -1107,9 +1121,9 @@ enum link_m_n_set { struct intel_dp_compliance_data { unsigned long edid; - uint8_t video_pattern; - uint16_t hdisplay, vdisplay; - uint8_t bpc; + u8 video_pattern; + u16 hdisplay, vdisplay; + u8 bpc; }; struct intel_dp_compliance { @@ -1122,18 +1136,18 @@ struct intel_dp_compliance { struct intel_dp { i915_reg_t output_reg; - uint32_t DP; + u32 DP; int link_rate; - uint8_t lane_count; - uint8_t sink_count; + u8 lane_count; + u8 sink_count; bool link_mst; bool link_trained; bool has_audio; bool reset_link_params; - uint8_t dpcd[DP_RECEIVER_CAP_SIZE]; - uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; - uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; - uint8_t edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]; + u8 dpcd[DP_RECEIVER_CAP_SIZE]; + u8 psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; + u8 downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; + u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]; u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]; u8 fec_capable; /* source rates */ @@ -1153,7 +1167,7 @@ struct intel_dp { /* sink or branch descriptor */ struct drm_dp_desc desc; struct drm_dp_aux aux; - uint8_t train_set[4]; + u8 train_set[4]; int panel_power_up_delay; int panel_power_down_delay; int panel_power_cycle_delay; @@ -1195,14 +1209,13 @@ struct intel_dp { struct intel_dp_mst_encoder *mst_encoders[I915_MAX_PIPES]; struct drm_dp_mst_topology_mgr mst_mgr; - uint32_t (*get_aux_clock_divider)(struct intel_dp *dp, int index); + u32 (*get_aux_clock_divider)(struct intel_dp *dp, int index); /* * This function returns the value we have to program the AUX_CTL * register with to kick off an AUX transaction. */ - uint32_t (*get_aux_send_ctl)(struct intel_dp *dp, - int send_bytes, - uint32_t aux_clock_divider); + u32 (*get_aux_send_ctl)(struct intel_dp *dp, int send_bytes, + u32 aux_clock_divider); i915_reg_t (*aux_ch_ctl_reg)(struct intel_dp *dp); i915_reg_t (*aux_ch_data_reg)(struct intel_dp *dp, int index); @@ -1236,7 +1249,7 @@ struct intel_digital_port { struct intel_lspcon lspcon; enum irqreturn (*hpd_pulse)(struct intel_digital_port *, bool); bool release_cl2_override; - uint8_t max_lanes; + u8 max_lanes; /* Used for DP and ICL+ TypeC/DP and TypeC/HDMI ports. */ enum aux_ch aux_ch; enum intel_display_power_domain ddi_io_power_domain; @@ -1471,8 +1484,8 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv); void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv); /* i915_irq.c */ -void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); -void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); +void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, u32 mask); +void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, u32 mask); void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv); @@ -1535,7 +1548,7 @@ void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, struct intel_crtc_state *crtc_state); u32 bxt_signal_levels(struct intel_dp *intel_dp); -uint32_t ddi_signal_levels(struct intel_dp *intel_dp); +u32 ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); u8 intel_ddi_dp_pre_emphasis_max(struct intel_encoder *encoder, u8 voltage_swing); @@ -1675,11 +1688,11 @@ void intel_cleanup_plane_fb(struct drm_plane *plane, int intel_plane_atomic_get_property(struct drm_plane *plane, const struct drm_plane_state *state, struct drm_property *property, - uint64_t *val); + u64 *val); int intel_plane_atomic_set_property(struct drm_plane *plane, struct drm_plane_state *state, struct drm_property *property, - uint64_t val); + u64 val); int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state, struct drm_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, @@ -1753,9 +1766,10 @@ static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state) u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); +u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state); u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); -u32 glk_color_ctl(const struct intel_plane_state *plane_state); +u32 skl_plane_ctl_crtc(const struct intel_crtc_state *crtc_state); u32 skl_plane_stride(const struct intel_plane_state *plane_state, int plane); int skl_check_plane_surface(struct intel_plane_state *plane_state); @@ -1799,10 +1813,10 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, i915_reg_t output_reg, bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector); void intel_dp_set_link_params(struct intel_dp *intel_dp, - int link_rate, uint8_t lane_count, + int link_rate, u8 lane_count, bool link_mst); int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, - int link_rate, uint8_t lane_count); + int link_rate, u8 lane_count); void intel_dp_start_link_train(struct intel_dp *intel_dp); void intel_dp_stop_link_train(struct intel_dp *intel_dp); int intel_dp_retrain_link(struct intel_encoder *encoder, @@ -1834,7 +1848,7 @@ int intel_dp_max_lane_count(struct intel_dp *intel_dp); int intel_dp_rate_select(struct intel_dp *intel_dp, int rate); void intel_dp_hot_plug(struct intel_encoder *intel_encoder); void intel_power_sequencer_reset(struct drm_i915_private *dev_priv); -uint32_t intel_dp_pack_aux(const uint8_t *src, int src_bytes); +u32 intel_dp_pack_aux(const u8 *src, int src_bytes); void intel_plane_destroy(struct drm_plane *plane); void intel_edp_drrs_enable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); @@ -1847,24 +1861,24 @@ void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, void intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, - uint8_t dp_train_pat); + u8 dp_train_pat); void intel_dp_set_signal_levels(struct intel_dp *intel_dp); void intel_dp_set_idle_link_train(struct intel_dp *intel_dp); -uint8_t +u8 intel_dp_voltage_max(struct intel_dp *intel_dp); -uint8_t -intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing); +u8 +intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, u8 voltage_swing); void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, - uint8_t *link_bw, uint8_t *rate_select); + u8 *link_bw, u8 *rate_select); bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp); bool intel_dp_source_supports_hbr3(struct intel_dp *intel_dp); bool -intel_dp_get_link_status(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_STATUS_SIZE]); -uint16_t intel_dp_dsc_get_output_bpp(int link_clock, uint8_t lane_count, - int mode_clock, int mode_hdisplay); -uint8_t intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, int mode_clock, - int mode_hdisplay); +intel_dp_get_link_status(struct intel_dp *intel_dp, u8 link_status[DP_LINK_STATUS_SIZE]); +u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, + int mode_clock, int mode_hdisplay); +u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, int mode_clock, + int mode_hdisplay); /* intel_vdsc.c */ int intel_dp_compute_dsc_params(struct intel_dp *intel_dp, @@ -2023,6 +2037,9 @@ int intel_panel_setup_backlight(struct drm_connector *connector, enum pipe pipe); void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); +void intel_panel_update_backlight(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_state); extern struct drm_display_mode *intel_find_panel_downclock( struct drm_i915_private *dev_priv, @@ -2084,6 +2101,7 @@ bool intel_psr_enabled(struct intel_dp *intel_dp); void intel_init_quirks(struct drm_i915_private *dev_priv); /* intel_runtime_pm.c */ +void intel_runtime_pm_init_early(struct drm_i915_private *dev_priv); int intel_power_domains_init(struct drm_i915_private *); void intel_power_domains_cleanup(struct drm_i915_private *dev_priv); void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume); @@ -2106,6 +2124,7 @@ void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume); void bxt_display_core_uninit(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable(struct drm_i915_private *dev_priv); void intel_runtime_pm_disable(struct drm_i915_private *dev_priv); +void intel_runtime_pm_cleanup(struct drm_i915_private *dev_priv); const char * intel_display_power_domain_str(enum intel_display_power_domain domain); @@ -2113,33 +2132,42 @@ bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); -void intel_display_power_get(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain); -bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, +intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); +intel_wakeref_t +intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain); +void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain); +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) void intel_display_power_put(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain); + enum intel_display_power_domain domain, + intel_wakeref_t wakeref); +#else +#define intel_display_power_put(i915, domain, wakeref) \ + intel_display_power_put_unchecked(i915, domain) +#endif void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, u8 req_slices); static inline void -assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv) +assert_rpm_device_not_suspended(struct drm_i915_private *i915) { - WARN_ONCE(dev_priv->runtime_pm.suspended, + WARN_ONCE(i915->runtime_pm.suspended, "Device suspended during HW access\n"); } static inline void -assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) +assert_rpm_wakelock_held(struct drm_i915_private *i915) { - assert_rpm_device_not_suspended(dev_priv); - WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count), + assert_rpm_device_not_suspended(i915); + WARN_ONCE(!atomic_read(&i915->runtime_pm.wakeref_count), "RPM wakelock ref not held during HW access"); } /** * disable_rpm_wakeref_asserts - disable the RPM assert checks - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function disable asserts that check if we hold an RPM wakelock * reference, while keeping the device-not-suspended checks still enabled. @@ -2156,14 +2184,14 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) * enable_rpm_wakeref_asserts(). */ static inline void -disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) +disable_rpm_wakeref_asserts(struct drm_i915_private *i915) { - atomic_inc(&dev_priv->runtime_pm.wakeref_count); + atomic_inc(&i915->runtime_pm.wakeref_count); } /** * enable_rpm_wakeref_asserts - re-enable the RPM assert checks - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function re-enables the RPM assert checks after disabling them with * disable_rpm_wakeref_asserts. It's meant to be used only in special @@ -2173,15 +2201,39 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) * disable_rpm_wakeref_asserts(). */ static inline void -enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) +enable_rpm_wakeref_asserts(struct drm_i915_private *i915) { - atomic_dec(&dev_priv->runtime_pm.wakeref_count); + atomic_dec(&i915->runtime_pm.wakeref_count); } -void intel_runtime_pm_get(struct drm_i915_private *dev_priv); -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv); -void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv); -void intel_runtime_pm_put(struct drm_i915_private *dev_priv); +intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915); +intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915); +intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915); + +#define with_intel_runtime_pm(i915, wf) \ + for ((wf) = intel_runtime_pm_get(i915); (wf); \ + intel_runtime_pm_put((i915), (wf)), (wf) = 0) + +#define with_intel_runtime_pm_if_in_use(i915, wf) \ + for ((wf) = intel_runtime_pm_get_if_in_use(i915); (wf); \ + intel_runtime_pm_put((i915), (wf)), (wf) = 0) + +void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915); +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) +void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref); +#else +#define intel_runtime_pm_put(i915, wref) intel_runtime_pm_put_unchecked(i915) +#endif + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) +void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915, + struct drm_printer *p); +#else +static inline void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915, + struct drm_printer *p) +{ +} +#endif void chv_phy_powergate_lanes(struct intel_encoder *encoder, bool override, unsigned int mask); @@ -2287,11 +2339,11 @@ void intel_tv_init(struct drm_i915_private *dev_priv); int intel_digital_connector_atomic_get_property(struct drm_connector *connector, const struct drm_connector_state *state, struct drm_property *property, - uint64_t *val); + u64 *val); int intel_digital_connector_atomic_set_property(struct drm_connector *connector, struct drm_connector_state *state, struct drm_property *property, - uint64_t val); + u64 val); int intel_digital_connector_atomic_check(struct drm_connector *conn, struct drm_connector_state *new_state); struct drm_connector_state * @@ -2338,8 +2390,8 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ /* intel_color.c */ void intel_color_init(struct intel_crtc *crtc); int intel_color_check(struct intel_crtc_state *crtc_state); -void intel_color_set_csc(struct intel_crtc_state *crtc_state); -void intel_color_load_luts(struct intel_crtc_state *crtc_state); +void intel_color_commit(const struct intel_crtc_state *crtc_state); +void intel_color_load_luts(const struct intel_crtc_state *crtc_state); /* intel_lspcon.c */ bool lspcon_init(struct intel_digital_port *intel_dig_port); diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index fc7a09049f81..a9a19778dc7f 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -39,6 +39,7 @@ struct intel_dsi { struct intel_encoder base; struct intel_dsi_host *dsi_hosts[I915_MAX_PORTS]; + intel_wakeref_t io_wakeref[I915_MAX_PORTS]; /* GPIO Desc for CRC based Panel control */ struct gpio_desc *gpio_panel; @@ -172,7 +173,7 @@ int vlv_dsi_pll_compute(struct intel_encoder *encoder, void vlv_dsi_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *config); void vlv_dsi_pll_disable(struct intel_encoder *encoder); -u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, +u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, struct intel_crtc_state *config); void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port); @@ -182,7 +183,7 @@ int bxt_dsi_pll_compute(struct intel_encoder *encoder, void bxt_dsi_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *config); void bxt_dsi_pll_disable(struct intel_encoder *encoder); -u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, +u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, struct intel_crtc_state *config); void bxt_dsi_reset_clocks(struct intel_encoder *encoder, enum port port); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 236cd040f271..49fa43ff02ba 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -25,6 +25,7 @@ #include <drm/drm_print.h> #include "i915_drv.h" +#include "i915_reset.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -457,12 +458,6 @@ cleanup: void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno) { intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - - /* After manually advancing the seqno, fake the interrupt in case - * there are any waiters for that seqno. - */ - intel_engine_wakeup(engine); - GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); } @@ -479,53 +474,67 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); - execlists->queue_priority = INT_MIN; + execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; } -/** - * intel_engines_setup_common - setup engine state not requiring hw access - * @engine: Engine to setup. - * - * Initializes @engine@ structure members shared between legacy and execlists - * submission modes which do not require hardware access. - * - * Typically done early in the submission mode specific engine setup stage. - */ -void intel_engine_setup_common(struct intel_engine_cs *engine) -{ - i915_timeline_init(engine->i915, &engine->timeline, engine->name); - i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - - intel_engine_init_execlist(engine); - intel_engine_init_hangcheck(engine); - intel_engine_init_batch_pool(engine); - intel_engine_init_cmd_parser(engine); -} - static void cleanup_status_page(struct intel_engine_cs *engine) { + struct i915_vma *vma; + /* Prevent writes into HWSP after returning the page to the system */ intel_engine_set_hwsp_writemask(engine, ~0u); - if (HWS_NEEDS_PHYSICAL(engine->i915)) { - void *addr = fetch_and_zero(&engine->status_page.page_addr); + vma = fetch_and_zero(&engine->status_page.vma); + if (!vma) + return; - __free_page(virt_to_page(addr)); - } + if (!HWS_NEEDS_PHYSICAL(engine->i915)) + i915_vma_unpin(vma); + + i915_gem_object_unpin_map(vma->obj); + __i915_gem_object_release_unless_active(vma->obj); +} + +static int pin_ggtt_status_page(struct intel_engine_cs *engine, + struct i915_vma *vma) +{ + unsigned int flags; + + flags = PIN_GLOBAL; + if (!HAS_LLC(engine->i915)) + /* + * On g33, we cannot place HWS above 256MiB, so + * restrict its pinning to the low mappable arena. + * Though this restriction is not documented for + * gen4, gen5, or byt, they also behave similarly + * and hang if the HWS is placed at the top of the + * GTT. To generalise, it appears that all !llc + * platforms have issues with us placing the HWS + * above the mappable region (even though we never + * actually map it). + */ + flags |= PIN_MAPPABLE; + else + flags |= PIN_HIGH; - i915_vma_unpin_and_release(&engine->status_page.vma, - I915_VMA_RELEASE_MAP); + return i915_vma_pin(vma, 0, 0, flags); } static int init_status_page(struct intel_engine_cs *engine) { struct drm_i915_gem_object *obj; struct i915_vma *vma; - unsigned int flags; void *vaddr; int ret; + /* + * Though the HWS register does support 36bit addresses, historically + * we have had hangs and corruption reported due to wild writes if + * the HWS is placed above 4G. We only allow objects to be allocated + * in GFP_DMA32 for i965, and no earlier physical address users had + * access to more than 4G. + */ obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate status page\n"); @@ -542,59 +551,67 @@ static int init_status_page(struct intel_engine_cs *engine) goto err; } - flags = PIN_GLOBAL; - if (!HAS_LLC(engine->i915)) - /* On g33, we cannot place HWS above 256MiB, so - * restrict its pinning to the low mappable arena. - * Though this restriction is not documented for - * gen4, gen5, or byt, they also behave similarly - * and hang if the HWS is placed at the top of the - * GTT. To generalise, it appears that all !llc - * platforms have issues with us placing the HWS - * above the mappable region (even though we never - * actually map it). - */ - flags |= PIN_MAPPABLE; - else - flags |= PIN_HIGH; - ret = i915_vma_pin(vma, 0, 0, flags); - if (ret) - goto err; - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); - goto err_unpin; + goto err; } + engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE); engine->status_page.vma = vma; - engine->status_page.ggtt_offset = i915_ggtt_offset(vma); - engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); + + if (!HWS_NEEDS_PHYSICAL(engine->i915)) { + ret = pin_ggtt_status_page(engine, vma); + if (ret) + goto err_unpin; + } + return 0; err_unpin: - i915_vma_unpin(vma); + i915_gem_object_unpin_map(obj); err: i915_gem_object_put(obj); return ret; } -static int init_phys_status_page(struct intel_engine_cs *engine) +/** + * intel_engines_setup_common - setup engine state not requiring hw access + * @engine: Engine to setup. + * + * Initializes @engine@ structure members shared between legacy and execlists + * submission modes which do not require hardware access. + * + * Typically done early in the submission mode specific engine setup stage. + */ +int intel_engine_setup_common(struct intel_engine_cs *engine) { - struct page *page; + int err; - /* - * Though the HWS register does support 36bit addresses, historically - * we have had hangs and corruption reported due to wild writes if - * the HWS is placed above 4G. - */ - page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO); - if (!page) - return -ENOMEM; + err = init_status_page(engine); + if (err) + return err; + + err = i915_timeline_init(engine->i915, + &engine->timeline, + engine->name, + engine->status_page.vma); + if (err) + goto err_hwsp; + + i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - engine->status_page.page_addr = page_address(page); + intel_engine_init_breadcrumbs(engine); + intel_engine_init_execlist(engine); + intel_engine_init_hangcheck(engine); + intel_engine_init_batch_pool(engine); + intel_engine_init_cmd_parser(engine); return 0; + +err_hwsp: + cleanup_status_page(engine); + return err; } static void __intel_context_unpin(struct i915_gem_context *ctx, @@ -603,6 +620,56 @@ static void __intel_context_unpin(struct i915_gem_context *ctx, intel_context_unpin(to_intel_context(ctx, engine)); } +struct measure_breadcrumb { + struct i915_request rq; + struct i915_timeline timeline; + struct intel_ring ring; + u32 cs[1024]; +}; + +static int measure_breadcrumb_dw(struct intel_engine_cs *engine) +{ + struct measure_breadcrumb *frame; + int dw = -ENOMEM; + + GEM_BUG_ON(!engine->i915->gt.scratch); + + frame = kzalloc(sizeof(*frame), GFP_KERNEL); + if (!frame) + return -ENOMEM; + + if (i915_timeline_init(engine->i915, + &frame->timeline, "measure", + engine->status_page.vma)) + goto out_frame; + + INIT_LIST_HEAD(&frame->ring.request_list); + frame->ring.timeline = &frame->timeline; + frame->ring.vaddr = frame->cs; + frame->ring.size = sizeof(frame->cs); + frame->ring.effective_size = frame->ring.size; + intel_ring_update_space(&frame->ring); + + frame->rq.i915 = engine->i915; + frame->rq.engine = engine; + frame->rq.ring = &frame->ring; + frame->rq.timeline = &frame->timeline; + + dw = i915_timeline_pin(&frame->timeline); + if (dw < 0) + goto out_timeline; + + dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; + + i915_timeline_unpin(&frame->timeline); + +out_timeline: + i915_timeline_fini(&frame->timeline); +out_frame: + kfree(frame); + return dw; +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -645,21 +712,14 @@ int intel_engine_init_common(struct intel_engine_cs *engine) } } - ret = intel_engine_init_breadcrumbs(engine); - if (ret) + ret = measure_breadcrumb_dw(engine); + if (ret < 0) goto err_unpin_preempt; - if (HWS_NEEDS_PHYSICAL(i915)) - ret = init_phys_status_page(engine); - else - ret = init_status_page(engine); - if (ret) - goto err_breadcrumbs; + engine->emit_fini_breadcrumb_dw = ret; return 0; -err_breadcrumbs: - intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: if (i915->preempt_context) __intel_context_unpin(i915->preempt_context, engine); @@ -799,15 +859,15 @@ u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) return mcr_s_ss_select; } -static inline uint32_t +static inline u32 read_subslice_reg(struct drm_i915_private *dev_priv, int slice, int subslice, i915_reg_t reg) { - uint32_t mcr_slice_subslice_mask; - uint32_t mcr_slice_subslice_select; - uint32_t default_mcr_s_ss_select; - uint32_t mcr; - uint32_t ret; + u32 mcr_slice_subslice_mask; + u32 mcr_slice_subslice_select; + u32 default_mcr_s_ss_select; + u32 mcr; + u32 ret; enum forcewake_domains fw_domains; if (INTEL_GEN(dev_priv) >= 11) { @@ -913,10 +973,15 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, static bool ring_is_idle(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + intel_wakeref_t wakeref; bool idle = true; + if (I915_SELFTEST_ONLY(!engine->mmio_base)) + return true; + /* If the whole device is asleep, the engine must be idle */ - if (!intel_runtime_pm_get_if_in_use(dev_priv)) + wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + if (!wakeref) return true; /* First check that no commands are left in the ring */ @@ -928,7 +993,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) idle = false; - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return idle; } @@ -952,9 +1017,6 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (!intel_engine_signaled(engine, intel_engine_last_submit(engine))) return false; - if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock)) - return true; - /* Waiting to drain ELSP? */ if (READ_ONCE(engine->execlists.active)) { struct tasklet_struct *t = &engine->execlists.tasklet; @@ -980,10 +1042,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return false; /* Ring stopped? */ - if (!ring_is_idle(engine)) - return false; - - return true; + return ring_is_idle(engine); } bool intel_engines_are_idle(struct drm_i915_private *dev_priv) @@ -1027,7 +1086,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) * the last request that remains in the timeline. When idle, it is * the last executed context as tracked by retirement. */ - rq = __i915_gem_active_peek(&engine->timeline.last_request); + rq = __i915_active_request_peek(&engine->timeline.last_request); if (rq) return rq->hw_context == kernel_context; else @@ -1071,10 +1130,8 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force) if (!reset_engines(i915) && !force) return; - for_each_engine(engine, i915, id) { - if (engine->reset.reset) - engine->reset.reset(engine, NULL); - } + for_each_engine(engine, i915, id) + intel_engine_reset(engine, false); } /** @@ -1110,7 +1167,7 @@ void intel_engines_park(struct drm_i915_private *i915) } /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->execlists.queue_priority != INT_MIN); + GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); if (engine->park) engine->park(engine); @@ -1226,10 +1283,14 @@ static void print_request(struct drm_printer *m, x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); - drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n", + drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n", prefix, rq->global_seqno, - i915_request_completed(rq) ? "!" : "", + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags) ? "+" : "", rq->fence.context, rq->fence.seqno, buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), @@ -1320,7 +1381,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, } if (HAS_EXECLISTS(dev_priv)) { - const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + const u32 *hws = + &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int idx; u8 read, write; @@ -1363,9 +1425,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, char hdr[80]; snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ", + "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ", idx, count, - i915_ggtt_offset(rq->ring->vma)); + i915_ggtt_offset(rq->ring->vma), + rq->timeline->hwsp_offset); print_request(m, rq, hdr); } else { drm_printf(m, "\t\tELSP[%d] idle\n", idx); @@ -1420,14 +1483,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) { - const int MAX_REQUESTS_TO_SHOW = 8; - struct intel_breadcrumbs * const b = &engine->breadcrumbs; - const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; - struct i915_request *rq, *last; - unsigned long flags; - struct rb_node *rb; - int count; + struct i915_request *rq; + intel_wakeref_t wakeref; if (header) { va_list ap; @@ -1477,80 +1535,30 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->ring->emit); drm_printf(m, "\t\tring->space: 0x%08x\n", rq->ring->space); + drm_printf(m, "\t\tring->hwsp: 0x%08x\n", + rq->timeline->hwsp_offset); print_request_ring(m, rq); } rcu_read_unlock(); - if (intel_runtime_pm_get_if_in_use(engine->i915)) { + wakeref = intel_runtime_pm_get_if_in_use(engine->i915); + if (wakeref) { intel_engine_print_registers(engine, m); - intel_runtime_pm_put(engine->i915); + intel_runtime_pm_put(engine->i915, wakeref); } else { drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } - local_irq_save(flags); - spin_lock(&engine->timeline.lock); - - last = NULL; - count = 0; - list_for_each_entry(rq, &engine->timeline.requests, link) { - if (count++ < MAX_REQUESTS_TO_SHOW - 1) - print_request(m, rq, "\t\tE "); - else - last = rq; - } - if (last) { - if (count > MAX_REQUESTS_TO_SHOW) { - drm_printf(m, - "\t\t...skipping %d executing requests...\n", - count - MAX_REQUESTS_TO_SHOW); - } - print_request(m, last, "\t\tE "); - } - - last = NULL; - count = 0; - drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - int i; - - priolist_for_each_request(rq, p, i) { - if (count++ < MAX_REQUESTS_TO_SHOW - 1) - print_request(m, rq, "\t\tQ "); - else - last = rq; - } - } - if (last) { - if (count > MAX_REQUESTS_TO_SHOW) { - drm_printf(m, - "\t\t...skipping %d queued requests...\n", - count - MAX_REQUESTS_TO_SHOW); - } - print_request(m, last, "\t\tQ "); - } - - spin_unlock(&engine->timeline.lock); - - spin_lock(&b->rb_lock); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - drm_printf(m, "\t%s [%d:%c] waiting for %x\n", - w->tsk->comm, w->tsk->pid, - task_state_to_char(w->tsk), - w->seqno); - } - spin_unlock(&b->rb_lock); - local_irq_restore(flags); + intel_execlists_show_requests(engine, m, print_request, 8); drm_printf(m, "HWSP:\n"); - hexdump(m, engine->status_page.page_addr, PAGE_SIZE); + hexdump(m, engine->status_page.addr, PAGE_SIZE); drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); + + intel_engine_print_breadcrumbs(engine, m); } static u8 user_class_map[] = { diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 3a93cdd1dd91..656e684e7c9a 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -596,7 +596,7 @@ static bool stride_is_valid(struct drm_i915_private *dev_priv, } static bool pixel_format_is_valid(struct drm_i915_private *dev_priv, - uint32_t pixel_format) + u32 pixel_format) { switch (pixel_format) { case DRM_FORMAT_XRGB8888: diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index d54e7c0ce93c..376ffe842e26 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -179,8 +179,9 @@ static int intelfb_create(struct drm_fb_helper *helper, const struct i915_ggtt_view view = { .type = I915_GGTT_VIEW_NORMAL, }; - struct fb_info *info; struct drm_framebuffer *fb; + intel_wakeref_t wakeref; + struct fb_info *info; struct i915_vma *vma; unsigned long flags = 0; bool prealloc = false; @@ -211,7 +212,7 @@ static int intelfb_create(struct drm_fb_helper *helper, } mutex_lock(&dev->struct_mutex); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); /* Pin the GGTT vma for our access via info->screen_base. * This also validates that any existing fb inherited from the @@ -278,7 +279,7 @@ static int intelfb_create(struct drm_fb_helper *helper, ifbdev->vma = vma; ifbdev->vma_flags = flags; - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); mutex_unlock(&dev->struct_mutex); vga_switcheroo_client_fb_set(pdev, info); return 0; @@ -286,7 +287,7 @@ static int intelfb_create(struct drm_fb_helper *helper, out_unpin: intel_unpin_fb_vma(vma, flags); out_unlock: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); mutex_unlock(&dev->struct_mutex); return ret; } @@ -680,6 +681,7 @@ int intel_fbdev_init(struct drm_device *dev) if (ifbdev == NULL) return -ENOMEM; + mutex_init(&ifbdev->hpd_lock); drm_fb_helper_prepare(dev, &ifbdev->helper, &intel_fb_helper_funcs); if (!intel_fbdev_init_bios(dev, ifbdev)) @@ -753,6 +755,26 @@ void intel_fbdev_fini(struct drm_i915_private *dev_priv) intel_fbdev_destroy(ifbdev); } +/* Suspends/resumes fbdev processing of incoming HPD events. When resuming HPD + * processing, fbdev will perform a full connector reprobe if a hotplug event + * was received while HPD was suspended. + */ +static void intel_fbdev_hpd_set_suspend(struct intel_fbdev *ifbdev, int state) +{ + bool send_hpd = false; + + mutex_lock(&ifbdev->hpd_lock); + ifbdev->hpd_suspended = state == FBINFO_STATE_SUSPENDED; + send_hpd = !ifbdev->hpd_suspended && ifbdev->hpd_waiting; + ifbdev->hpd_waiting = false; + mutex_unlock(&ifbdev->hpd_lock); + + if (send_hpd) { + DRM_DEBUG_KMS("Handling delayed fbcon HPD event\n"); + drm_fb_helper_hotplug_event(&ifbdev->helper); + } +} + void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -774,6 +796,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous */ if (state != FBINFO_STATE_RUNNING) flush_work(&dev_priv->fbdev_suspend_work); + console_lock(); } else { /* @@ -801,17 +824,26 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous drm_fb_helper_set_suspend(&ifbdev->helper, state); console_unlock(); + + intel_fbdev_hpd_set_suspend(ifbdev, state); } void intel_fbdev_output_poll_changed(struct drm_device *dev) { struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; + bool send_hpd; if (!ifbdev) return; intel_fbdev_sync(ifbdev); - if (ifbdev->vma || ifbdev->helper.deferred_setup) + + mutex_lock(&ifbdev->hpd_lock); + send_hpd = !ifbdev->hpd_suspended; + ifbdev->hpd_waiting = true; + mutex_unlock(&ifbdev->hpd_lock); + + if (send_hpd && (ifbdev->vma || ifbdev->helper.deferred_setup)) drm_fb_helper_hotplug_event(&ifbdev->helper); } diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index 9b39975c8389..f33de4be4b89 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -127,8 +127,8 @@ static void ironlake_set_fifo_underrun_reporting(struct drm_device *dev, enum pipe pipe, bool enable) { struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t bit = (pipe == PIPE_A) ? DE_PIPEA_FIFO_UNDERRUN : - DE_PIPEB_FIFO_UNDERRUN; + u32 bit = (pipe == PIPE_A) ? + DE_PIPEA_FIFO_UNDERRUN : DE_PIPEB_FIFO_UNDERRUN; if (enable) ilk_enable_display_irq(dev_priv, bit); @@ -140,7 +140,7 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - uint32_t err_int = I915_READ(GEN7_ERR_INT); + u32 err_int = I915_READ(GEN7_ERR_INT); lockdep_assert_held(&dev_priv->irq_lock); @@ -193,8 +193,8 @@ static void ibx_set_fifo_underrun_reporting(struct drm_device *dev, bool enable) { struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t bit = (pch_transcoder == PIPE_A) ? - SDE_TRANSA_FIFO_UNDER : SDE_TRANSB_FIFO_UNDER; + u32 bit = (pch_transcoder == PIPE_A) ? + SDE_TRANSA_FIFO_UNDER : SDE_TRANSB_FIFO_UNDER; if (enable) ibx_enable_display_interrupt(dev_priv, bit); @@ -206,7 +206,7 @@ static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pch_transcoder = crtc->pipe; - uint32_t serr_int = I915_READ(SERR_INT); + u32 serr_int = I915_READ(SERR_INT); lockdep_assert_held(&dev_priv->irq_lock); @@ -258,7 +258,7 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, old = !crtc->cpu_fifo_underrun_disabled; crtc->cpu_fifo_underrun_disabled = !enable; - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) i9xx_set_fifo_underrun_reporting(dev, pipe, enable, old); else if (IS_GEN_RANGE(dev_priv, 5, 6)) ironlake_set_fifo_underrun_reporting(dev, pipe, enable); @@ -369,7 +369,7 @@ void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, return; /* GMCH can't disable fifo underruns, filter them. */ - if (HAS_GMCH_DISPLAY(dev_priv) && + if (HAS_GMCH(dev_priv) && crtc->cpu_fifo_underrun_disabled) return; @@ -421,7 +421,7 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv) if (crtc->cpu_fifo_underrun_disabled) continue; - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) i9xx_check_fifo_underruns(crtc); else if (IS_GEN(dev_priv, 7)) ivybridge_check_fifo_underruns(crtc); diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h index 105e2a9e874a..b96a31bc1080 100644 --- a/drivers/gpu/drm/i915/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h @@ -112,7 +112,6 @@ #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ #define MI_USE_GGTT (1 << 22) /* g4x+ */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) -#define MI_STORE_DWORD_INDEX_SHIFT 2 /* * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 0f1c4f9ebfd8..744220296653 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -192,4 +192,7 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) spin_unlock_irq(&guc->irq_lock); } +int intel_guc_reset_engine(struct intel_guc *guc, + struct intel_engine_cs *engine); + #endif diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index d3ebdbc0182e..806fdfd7c78a 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -140,6 +140,9 @@ static struct dentry *create_buf_file_callback(const char *filename, buf_file = debugfs_create_file(filename, mode, parent, buf, &relay_file_operations); + if (IS_ERR(buf_file)) + return NULL; + return buf_file; } @@ -436,6 +439,7 @@ static void guc_log_capture_logs(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); + intel_wakeref_t wakeref; guc_read_update_log_buffer(log); @@ -443,9 +447,8 @@ static void guc_log_capture_logs(struct intel_guc_log *log) * Generally device is expected to be active only at this * time, so get/put should be really quick. */ - intel_runtime_pm_get(dev_priv); - guc_action_flush_log_complete(guc); - intel_runtime_pm_put(dev_priv); + with_intel_runtime_pm(dev_priv, wakeref) + guc_action_flush_log_complete(guc); } int intel_guc_log_create(struct intel_guc_log *log) @@ -505,7 +508,8 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); - int ret; + intel_wakeref_t wakeref; + int ret = 0; BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0); GEM_BUG_ON(!log->vma); @@ -519,16 +523,14 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) mutex_lock(&dev_priv->drm.struct_mutex); - if (log->level == level) { - ret = 0; + if (log->level == level) goto out_unlock; - } - intel_runtime_pm_get(dev_priv); - ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level), - GUC_LOG_LEVEL_IS_ENABLED(level), - GUC_LOG_LEVEL_TO_VERBOSITY(level)); - intel_runtime_pm_put(dev_priv); + with_intel_runtime_pm(dev_priv, wakeref) + ret = guc_action_control_log(guc, + GUC_LOG_LEVEL_IS_VERBOSE(level), + GUC_LOG_LEVEL_IS_ENABLED(level), + GUC_LOG_LEVEL_TO_VERBOSITY(level)); if (ret) { DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret); goto out_unlock; @@ -601,6 +603,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *i915 = guc_to_i915(guc); + intel_wakeref_t wakeref; /* * Before initiating the forceful flush, wait for any pending/ongoing @@ -608,9 +611,8 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) */ flush_work(&log->relay.flush_work); - intel_runtime_pm_get(i915); - guc_action_flush_log(guc); - intel_runtime_pm_put(i915); + with_intel_runtime_pm(i915, wakeref) + guc_action_flush_log(guc); /* GuC would have updated log buffer by now, so capture it */ guc_log_capture_logs(log); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index ab1c49b106f2..8bc8aa54aa35 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -81,6 +81,12 @@ * */ +static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_PREEMPT_ADDR); +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -623,6 +629,8 @@ static void inject_preempt_context(struct work_struct *work) EXECLISTS_ACTIVE_PREEMPT); tasklet_schedule(&engine->execlists.tasklet); } + + (void)I915_SELFTEST_ONLY(engine->execlists.preempt_hang.count++); } /* @@ -666,7 +674,7 @@ static void complete_preempt_context(struct intel_engine_cs *engine) execlists_unwind_incomplete_requests(execlists); wait_for_guc_preempt_report(engine); - intel_write_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX, 0); + intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0); } /** @@ -731,7 +739,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) if (intel_engine_has_preemption(engine)) { struct guc_preempt_work *preempt_work = &engine->i915->guc.preempt_work[engine->id]; - int prio = execlists->queue_priority; + int prio = execlists->queue_priority_hint; if (__execlists_need_preempt(prio, port_prio(port))) { execlists_set_active(execlists, @@ -777,7 +785,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) kmem_cache_free(engine->i915->priorities, p); } done: - execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; + execlists->queue_priority_hint = + rb ? to_priolist(rb)->priority : INT_MIN; if (submit) port_assign(port, last); if (last) @@ -824,7 +833,7 @@ static void guc_submission_tasklet(unsigned long data) } if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) && - intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) == + intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) == GUC_PREEMPT_FINISHED) complete_preempt_context(engine); @@ -834,8 +843,7 @@ static void guc_submission_tasklet(unsigned long data) spin_unlock_irqrestore(&engine->timeline.lock, flags); } -static struct i915_request * -guc_reset_prepare(struct intel_engine_cs *engine) +static void guc_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -861,8 +869,6 @@ guc_reset_prepare(struct intel_engine_cs *engine) */ if (engine->i915->guc.preempt_wq) flush_workqueue(engine->i915->guc.preempt_wq); - - return i915_gem_find_active_request(engine); } /* diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index c22b3e18a0f5..1d7d26e4cf14 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -49,6 +49,9 @@ static bool is_supported_device(struct drm_i915_private *dev_priv) return true; if (IS_BROXTON(dev_priv)) return true; + if (IS_COFFEELAKE(dev_priv)) + return true; + return false; } @@ -105,15 +108,6 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) return -EIO; } - /* - * We're not in host or fail to find a MPT module, disable GVT-g - */ - ret = intel_gvt_init_host(); - if (ret) { - DRM_DEBUG_DRIVER("Not in host or MPT modules not found\n"); - goto bail; - } - ret = intel_gvt_init_device(dev_priv); if (ret) { DRM_DEBUG_DRIVER("Fail to init GVT device\n"); diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 51e9efec5116..a219c796e56d 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -23,6 +23,18 @@ */ #include "i915_drv.h" +#include "i915_reset.h" + +struct hangcheck { + u64 acthd; + u32 seqno; + enum intel_engine_hangcheck_action action; + unsigned long action_timestamp; + int deadlock; + struct intel_instdone instdone; + bool wedged:1; + bool stalled:1; +}; static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) { @@ -118,25 +130,22 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) } static void hangcheck_load_sample(struct intel_engine_cs *engine, - struct intel_engine_hangcheck *hc) + struct hangcheck *hc) { hc->acthd = intel_engine_get_active_head(engine); hc->seqno = intel_engine_get_seqno(engine); } static void hangcheck_store_sample(struct intel_engine_cs *engine, - const struct intel_engine_hangcheck *hc) + const struct hangcheck *hc) { engine->hangcheck.acthd = hc->acthd; engine->hangcheck.seqno = hc->seqno; - engine->hangcheck.action = hc->action; - engine->hangcheck.stalled = hc->stalled; - engine->hangcheck.wedged = hc->wedged; } static enum intel_engine_hangcheck_action hangcheck_get_action(struct intel_engine_cs *engine, - const struct intel_engine_hangcheck *hc) + const struct hangcheck *hc) { if (engine->hangcheck.seqno != hc->seqno) return ENGINE_ACTIVE_SEQNO; @@ -148,7 +157,7 @@ hangcheck_get_action(struct intel_engine_cs *engine, } static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, - struct intel_engine_hangcheck *hc) + struct hangcheck *hc) { unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT; @@ -194,10 +203,6 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, break; case ENGINE_DEAD: - if (GEM_SHOW_DEBUG()) { - struct drm_printer p = drm_debug_printer("hangcheck"); - intel_engine_dump(engine, &p, "%s\n", engine->name); - } break; default: @@ -268,22 +273,35 @@ static void i915_hangcheck_elapsed(struct work_struct *work) intel_uncore_arm_unclaimed_mmio_detection(dev_priv); for_each_engine(engine, dev_priv, id) { - struct intel_engine_hangcheck hc; + struct hangcheck hc; + + intel_engine_signal_breadcrumbs(engine); hangcheck_load_sample(engine, &hc); hangcheck_accumulate_sample(engine, &hc); hangcheck_store_sample(engine, &hc); - if (engine->hangcheck.stalled) { + if (hc.stalled) { hung |= intel_engine_flag(engine); if (hc.action != ENGINE_DEAD) stuck |= intel_engine_flag(engine); } - if (engine->hangcheck.wedged) + if (hc.wedged) wedged |= intel_engine_flag(engine); } + if (GEM_SHOW_DEBUG() && (hung | stuck)) { + struct drm_printer p = drm_debug_printer("hangcheck"); + + for_each_engine(engine, dev_priv, id) { + if (intel_engine_is_idle(engine)) + continue; + + intel_engine_dump(engine, &p, "%s\n", engine->name); + } + } + if (wedged) { dev_err(dev_priv->drm.dev, "GPU recovery timed out," diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c index 3fcb3b775948..ce7ba3a9c000 100644 --- a/drivers/gpu/drm/i915/intel_hdcp.c +++ b/drivers/gpu/drm/i915/intel_hdcp.c @@ -838,8 +838,8 @@ void intel_hdcp_atomic_check(struct drm_connector *connector, struct drm_connector_state *old_state, struct drm_connector_state *new_state) { - uint64_t old_cp = old_state->content_protection; - uint64_t new_cp = new_state->content_protection; + u64 old_cp = old_state->content_protection; + u64 new_cp = new_state->content_protection; struct drm_crtc_state *crtc_state; if (!new_state->crtc) { diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index ca195e6203c5..f125a62eba8c 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1186,15 +1186,17 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + intel_wakeref_t wakeref; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; ret = intel_sdvo_port_enabled(dev_priv, intel_hdmi->hdmi_reg, pipe); - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } @@ -1586,7 +1588,7 @@ intel_hdmi_mode_valid(struct drm_connector *connector, if (hdmi->has_hdmi_sink && !force_dvi) { /* if we can't do 8bpc we may still be able to do 12bpc */ - if (status != MODE_OK && !HAS_GMCH_DISPLAY(dev_priv)) + if (status != MODE_OK && !HAS_GMCH(dev_priv)) status = hdmi_port_clock_valid(hdmi, clock * 3 / 2, true, force_dvi); @@ -1611,7 +1613,7 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, &crtc_state->base.adjusted_mode; int i; - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) return false; if (bpc == 10 && INTEL_GEN(dev_priv) < 11) @@ -1890,11 +1892,12 @@ intel_hdmi_set_edid(struct drm_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); + intel_wakeref_t wakeref; struct edid *edid; bool connected = false; struct i2c_adapter *i2c; - intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); i2c = intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus); @@ -1909,7 +1912,7 @@ intel_hdmi_set_edid(struct drm_connector *connector) intel_hdmi_dp_dual_mode_detect(connector, edid != NULL); - intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); + intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref); to_intel_connector(connector)->detect_edid = edid; if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) { @@ -1931,11 +1934,12 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); struct intel_encoder *encoder = &hdmi_to_dig_port(intel_hdmi)->base; + intel_wakeref_t wakeref; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); if (IS_ICELAKE(dev_priv) && !intel_digital_port_connected(encoder)) @@ -1947,7 +1951,7 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) status = connector_status_connected; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); + intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref); if (status != connector_status_connected) cec_notifier_phys_addr_invalidate(intel_hdmi->cec_notifier); @@ -2146,7 +2150,7 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c drm_connector_attach_content_type_property(connector); connector->state->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE; - if (!HAS_GMCH_DISPLAY(dev_priv)) + if (!HAS_GMCH(dev_priv)) drm_connector_attach_max_bpc_property(connector, 8, 12); } diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index ae92d6560165..b8937c788f03 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -226,9 +226,10 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) container_of(work, typeof(*dev_priv), hotplug.reenable_work.work); struct drm_device *dev = &dev_priv->drm; + intel_wakeref_t wakeref; enum hpd_pin pin; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); spin_lock_irq(&dev_priv->irq_lock); for_each_hpd_pin(pin) { @@ -261,7 +262,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); } bool intel_encoder_hotplug(struct intel_encoder *encoder, @@ -469,7 +470,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, * hotplug bits itself. So only WARN about unexpected * interrupts on saner platforms. */ - WARN_ONCE(!HAS_GMCH_DISPLAY(dev_priv), + WARN_ONCE(!HAS_GMCH(dev_priv), "Received HPD interrupt on pin %d although disabled\n", pin); continue; } diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index bc27b691d824..9bd1c9002c2a 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -115,14 +115,14 @@ fail: int intel_huc_check_status(struct intel_huc *huc) { struct drm_i915_private *dev_priv = huc_to_i915(huc); - bool status; + intel_wakeref_t wakeref; + bool status = false; if (!HAS_HUC(dev_priv)) return -ENODEV; - intel_runtime_pm_get(dev_priv); - status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; - intel_runtime_pm_put(dev_priv); + with_intel_runtime_pm(dev_priv, wakeref) + status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; return status; } diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index c6159aff9dc8..5a733e711355 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -697,12 +697,13 @@ out: static int gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) { - struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus, - adapter); + struct intel_gmbus *bus = + container_of(adapter, struct intel_gmbus, adapter); struct drm_i915_private *dev_priv = bus->dev_priv; + intel_wakeref_t wakeref; int ret; - intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); if (bus->force_bit) { ret = i2c_bit_algo.master_xfer(adapter, msgs, num); @@ -714,17 +715,16 @@ gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) bus->force_bit |= GMBUS_FORCE_BIT_RETRY; } - intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); + intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref); return ret; } int intel_gmbus_output_aksv(struct i2c_adapter *adapter) { - struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus, - adapter); + struct intel_gmbus *bus = + container_of(adapter, struct intel_gmbus, adapter); struct drm_i915_private *dev_priv = bus->dev_priv; - int ret; u8 cmd = DRM_HDCP_DDC_AKSV; u8 buf[DRM_HDCP_KSV_LEN] = { 0 }; struct i2c_msg msgs[] = { @@ -741,8 +741,10 @@ int intel_gmbus_output_aksv(struct i2c_adapter *adapter) .buf = buf, } }; + intel_wakeref_t wakeref; + int ret; - intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); mutex_lock(&dev_priv->gmbus_mutex); /* @@ -753,7 +755,7 @@ int intel_gmbus_output_aksv(struct i2c_adapter *adapter) ret = do_gmbus_xfer(adapter, msgs, ARRAY_SIZE(msgs), GMBUS_AKSV_SELECT); mutex_unlock(&dev_priv->gmbus_mutex); - intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); + intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref); return ret; } @@ -821,7 +823,7 @@ int intel_setup_gmbus(struct drm_i915_private *dev_priv) if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) dev_priv->gpio_mmio_base = VLV_DISPLAY_BASE; - else if (!HAS_GMCH_DISPLAY(dev_priv)) + else if (!HAS_GMCH(dev_priv)) /* * Broxton uses the same PCH offsets for South Display Engine, * even though it doesn't have a PCH. diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1effbf49fa08..5e98fd79bd9d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -136,6 +136,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" #include "i915_gem_render_state.h" +#include "i915_reset.h" #include "i915_vgpu.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" @@ -171,6 +172,12 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); +static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_INDEX_ADDR); +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -181,13 +188,90 @@ static inline int rq_prio(const struct i915_request *rq) return rq->sched.attr.priority; } +static int queue_prio(const struct intel_engine_execlists *execlists) +{ + struct i915_priolist *p; + struct rb_node *rb; + + rb = rb_first_cached(&execlists->queue); + if (!rb) + return INT_MIN; + + /* + * As the priolist[] are inverted, with the highest priority in [0], + * we have to flip the index value to become priority. + */ + p = to_priolist(rb); + return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); +} + static inline bool need_preempt(const struct intel_engine_cs *engine, - const struct i915_request *last, - int prio) + const struct i915_request *rq) +{ + const int last_prio = rq_prio(rq); + + if (!intel_engine_has_preemption(engine)) + return false; + + if (i915_request_completed(rq)) + return false; + + /* + * Check if the current priority hint merits a preemption attempt. + * + * We record the highest value priority we saw during rescheduling + * prior to this dequeue, therefore we know that if it is strictly + * less than the current tail of ESLP[0], we do not need to force + * a preempt-to-idle cycle. + * + * However, the priority hint is a mere hint that we may need to + * preempt. If that hint is stale or we may be trying to preempt + * ourselves, ignore the request. + */ + if (!__execlists_need_preempt(engine->execlists.queue_priority_hint, + last_prio)) + return false; + + /* + * Check against the first request in ELSP[1], it will, thanks to the + * power of PI, be the highest priority of that context. + */ + if (!list_is_last(&rq->link, &engine->timeline.requests) && + rq_prio(list_next_entry(rq, link)) > last_prio) + return true; + + /* + * If the inflight context did not trigger the preemption, then maybe + * it was the set of queued requests? Pick the highest priority in + * the queue (the first active priolist) and see if it deserves to be + * running instead of ELSP[0]. + * + * The highest priority request in the queue can not be either + * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same + * context, it's priority would not exceed ELSP[0] aka last_prio. + */ + return queue_prio(&engine->execlists) > last_prio; +} + +__maybe_unused static inline bool +assert_priority_queue(const struct intel_engine_execlists *execlists, + const struct i915_request *prev, + const struct i915_request *next) { - return (intel_engine_has_preemption(engine) && - __execlists_need_preempt(prio, rq_prio(last)) && - !i915_request_completed(last)); + if (!prev) + return true; + + /* + * Without preemption, the prev may refer to the still active element + * which we refuse to let go. + * + * Even with preemption, there are times when we think it is better not + * to preempt and leave an ostensibly lower priority request in flight. + */ + if (port_request(execlists->port) == prev) + return true; + + return rq_prio(prev) >= rq_prio(next); } /* @@ -264,7 +348,8 @@ static void unwind_wa_tail(struct i915_request *rq) assert_ring_tail_valid(rq->ring, rq->tail); } -static void __unwind_incomplete_requests(struct intel_engine_cs *engine) +static struct i915_request * +__unwind_incomplete_requests(struct intel_engine_cs *engine) { struct i915_request *rq, *rn, *active = NULL; struct list_head *uninitialized_var(pl); @@ -302,9 +387,12 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) */ if (!(prio & I915_PRIORITY_NEWCLIENT)) { prio |= I915_PRIORITY_NEWCLIENT; + active->sched.attr.priority = prio; list_move_tail(&active->sched.link, i915_sched_lookup_priolist(engine, prio)); } + + return active; } void @@ -435,11 +523,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", engine->name, n, port[n].context_id, count, rq->global_seqno, rq->fence.context, rq->fence.seqno, + hwsp_seqno(rq), intel_engine_get_seqno(engine), rq_prio(rq)); } else { @@ -511,6 +600,8 @@ static void inject_preempt_context(struct intel_engine_cs *engine) execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); + + (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); } static void complete_preempt_context(struct intel_engine_execlists *execlists) @@ -579,7 +670,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) return; - if (need_preempt(engine, last, execlists->queue_priority)) { + if (need_preempt(engine, last)) { inject_preempt_context(engine); return; } @@ -612,7 +703,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_breadcrumb() for + * request. See gen8_emit_fini_breadcrumb() for * where we prepare the padding after the * end of the request. */ @@ -625,6 +716,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { + GEM_BUG_ON(!assert_priority_queue(execlists, last, rq)); + /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -684,20 +777,20 @@ done: /* * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. * - * We choose queue_priority such that if we add a request of greater + * We choose the priority hint such that if we add a request of greater * priority than this, we kick the submission tasklet to decide on * the right order of submitting the requests to hardware. We must * also be prepared to reorder requests as they are in-flight on the - * HW. We derive the queue_priority then as the first "hole" in + * HW. We derive the priority hint then as the first "hole" in * the HW submission ports and if there are no available slots, * the priority of the lowest executing request, i.e. last. * * When we do receive a higher priority request ready to run from the - * user, see queue_request(), the queue_priority is bumped to that + * user, see queue_request(), the priority hint is bumped to that * request triggering preemption on the next dequeue (or subsequent * interrupt for secondary ports). */ - execlists->queue_priority = + execlists->queue_priority_hint = port != execlists->port ? rq_prio(last) : INT_MIN; if (submit) { @@ -728,11 +821,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", + GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n", rq->engine->name, (unsigned int)(port - execlists->port), rq->global_seqno, rq->fence.context, rq->fence.seqno, + hwsp_seqno(rq), intel_engine_get_seqno(rq->engine)); GEM_BUG_ON(!execlists->active); @@ -816,10 +910,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) - continue; + if (!i915_request_signaled(rq)) + dma_fence_set_error(&rq->fence, -EIO); - dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); } /* Flush the queued requests to the timeline list (for retiring). */ @@ -829,9 +923,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) priolist_for_each_request_consume(rq, rn, p, i) { list_del_init(&rq->sched.link); - - dma_fence_set_error(&rq->fence, -EIO); __i915_request_submit(rq); + dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); } rb_erase_cached(&p->node, &execlists->queue); @@ -845,7 +939,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority = INT_MIN; + execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; GEM_BUG_ON(port_isset(execlists->port)); @@ -868,6 +962,8 @@ static void process_csb(struct intel_engine_cs *engine) const u32 * const buf = execlists->csb_status; u8 head, tail; + lockdep_assert_held(&engine->timeline.lock); + /* * Note that csb_write, csb_status may be either in HWSP or mmio. * When reading from the csb_write mmio register, we have to be @@ -956,12 +1052,13 @@ static void process_csb(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0, rq ? rq->fence.context : 0, rq ? rq->fence.seqno : 0, + rq ? hwsp_seqno(rq) : 0, intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0); @@ -1045,7 +1142,7 @@ static void execlists_submission_tasklet(unsigned long data) GEM_TRACE("%s awake?=%d, active=%x\n", engine->name, - engine->i915->gt.awake, + !!engine->i915->gt.awake, engine->execlists.active); spin_lock_irqsave(&engine->timeline.lock, flags); @@ -1075,8 +1172,8 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) static void submit_queue(struct intel_engine_cs *engine, int prio) { - if (prio > engine->execlists.queue_priority) { - engine->execlists.queue_priority = prio; + if (prio > engine->execlists.queue_priority_hint) { + engine->execlists.queue_priority_hint = prio; __submit_queue_imm(engine); } } @@ -1169,6 +1266,23 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) return i915_vma_pin(vma, 0, 0, flags); } +static void +__execlists_update_reg_state(struct intel_engine_cs *engine, + struct intel_context *ce) +{ + u32 *regs = ce->lrc_reg_state; + struct intel_ring *ring = ce->ring; + + regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma); + regs[CTX_RING_HEAD + 1] = ring->head; + regs[CTX_RING_TAIL + 1] = ring->tail; + + /* RPCS */ + if (engine->class == RENDER_CLASS) + regs[CTX_R_PWR_CLK_STATE + 1] = gen8_make_rpcs(engine->i915, + &ce->sseu); +} + static struct intel_context * __execlists_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx, @@ -1207,10 +1321,8 @@ __execlists_context_pin(struct intel_engine_cs *engine, GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head)); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = - i915_ggtt_offset(ce->ring->vma); - ce->lrc_reg_state[CTX_RING_HEAD + 1] = ce->ring->head; - ce->lrc_reg_state[CTX_RING_TAIL + 1] = ce->ring->tail; + + __execlists_update_reg_state(engine, ce); ce->state->obj->pin_global++; i915_gem_context_get(ctx); @@ -1250,6 +1362,34 @@ execlists_context_pin(struct intel_engine_cs *engine, return __execlists_context_pin(engine, ctx, ce); } +static int gen8_emit_init_breadcrumb(struct i915_request *rq) +{ + u32 *cs; + + GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Check if we have been preempted before we even get started. + * + * After this point i915_request_started() reports true, even if + * we get preempted and so are no longer running. + */ + *cs++ = MI_ARB_CHECK; + *cs++ = MI_NOOP; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = rq->timeline->hwsp_offset; + *cs++ = 0; + *cs++ = rq->fence.seqno - 1; + + intel_ring_advance(rq, cs); + return 0; +} + static int emit_pdps(struct i915_request *rq) { const struct intel_engine_cs * const engine = rq->engine; @@ -1675,7 +1815,7 @@ static void enable_execlists(struct intel_engine_cs *engine) _MASKED_BIT_DISABLE(STOP_RING)); I915_WRITE(RING_HWS_PGA(engine->mmio_base), - engine->status_page.ggtt_offset); + i915_ggtt_offset(engine->status_page.vma)); POSTING_READ(RING_HWS_PGA(engine->mmio_base)); } @@ -1712,11 +1852,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) return 0; } -static struct i915_request * -execlists_reset_prepare(struct intel_engine_cs *engine) +static void execlists_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request *request, *active; unsigned long flags; GEM_TRACE("%s: depth<-%d\n", engine->name, @@ -1732,59 +1870,21 @@ execlists_reset_prepare(struct intel_engine_cs *engine) * prevents the race. */ __tasklet_disable_sync_once(&execlists->tasklet); + GEM_BUG_ON(!reset_in_progress(execlists)); + /* And flush any current direct submission. */ spin_lock_irqsave(&engine->timeline.lock, flags); - - /* - * We want to flush the pending context switches, having disabled - * the tasklet above, we can assume exclusive access to the execlists. - * For this allows us to catch up with an inflight preemption event, - * and avoid blaming an innocent request if the stall was due to the - * preemption itself. - */ - process_csb(engine); - - /* - * The last active request can then be no later than the last request - * now in ELSP[0]. So search backwards from there, so that if the GPU - * has advanced beyond the last CSB update, it will be pardoned. - */ - active = NULL; - request = port_request(execlists->port); - if (request) { - /* - * Prevent the breadcrumb from advancing before we decide - * which request is currently active. - */ - intel_engine_stop_cs(engine); - - list_for_each_entry_from_reverse(request, - &engine->timeline.requests, - link) { - if (__i915_request_completed(request, - request->global_seqno)) - break; - - active = request; - } - } - + process_csb(engine); /* drain preemption events */ spin_unlock_irqrestore(&engine->timeline.lock, flags); - - return active; } -static void execlists_reset(struct intel_engine_cs *engine, - struct i915_request *request) +static void execlists_reset(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *rq; unsigned long flags; u32 *regs; - GEM_TRACE("%s request global=%d, current=%d\n", - engine->name, request ? request->global_seqno : 0, - intel_engine_get_seqno(engine)); - spin_lock_irqsave(&engine->timeline.lock, flags); /* @@ -1799,12 +1899,18 @@ static void execlists_reset(struct intel_engine_cs *engine, execlists_cancel_port_requests(execlists); /* Push back any incomplete requests for replay after the reset. */ - __unwind_incomplete_requests(engine); + rq = __unwind_incomplete_requests(engine); /* Following the reset, we need to reload the CSB read/write pointers */ reset_csb_pointers(&engine->execlists); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", + engine->name, + rq ? rq->global_seqno : 0, + intel_engine_get_seqno(engine), + yesno(stalled)); + if (!rq) + goto out_unlock; /* * If the request was innocent, we leave the request in the ELSP @@ -1817,8 +1923,9 @@ static void execlists_reset(struct intel_engine_cs *engine, * and have to at least restore the RING register in the context * image back to the expected values to skip over the guilty request. */ - if (!request || request->fence.error != -EIO) - return; + i915_reset_request(rq, stalled); + if (!stalled) + goto out_unlock; /* * We want a simple context + ring to execute the breadcrumb update. @@ -1828,25 +1935,22 @@ static void execlists_reset(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - regs = request->hw_context->lrc_reg_state; + regs = rq->hw_context->lrc_reg_state; if (engine->pinned_default_state) { memcpy(regs, /* skip restoring the vanilla PPHWSP */ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, engine->context_size - PAGE_SIZE); } - execlists_init_reg_state(regs, - request->gem_context, engine, request->ring); /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ - regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma); - - request->ring->head = intel_ring_wrap(request->ring, request->postfix); - regs[CTX_RING_HEAD + 1] = request->ring->head; + rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix); + intel_ring_update_space(rq->ring); - intel_ring_update_space(request->ring); + execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring); + __execlists_update_reg_state(engine, rq->hw_context); - /* Reset WaIdleLiteRestore:bdw,skl as well */ - unwind_wa_tail(request); +out_unlock: + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static void execlists_reset_finish(struct intel_engine_cs *engine) @@ -1859,6 +1963,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) * to sleep before we restart and reload a context. * */ + GEM_BUG_ON(!reset_in_progress(execlists)); if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) execlists->tasklet.func(execlists->tasklet.data); @@ -2031,53 +2136,62 @@ static int gen8_emit_flush_render(struct i915_request *request, * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) { /* Ensure there's always at least one preemption point per-request. */ *cs++ = MI_ARB_CHECK; *cs++ = MI_NOOP; request->wa_tail = intel_ring_offset(request, cs); + + return cs; } -static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - cs = gen8_emit_ggtt_write(cs, request->global_seqno, + cs = gen8_emit_ggtt_write(cs, + request->fence.seqno, + request->timeline->hwsp_offset); + + cs = gen8_emit_ggtt_write(cs, + request->global_seqno, intel_hws_seqno_address(request->engine)); + *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); - gen8_emit_wa_tail(request, cs); + return gen8_emit_wa_tail(request, cs); } -static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; -static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { - /* We're using qword write, seqno should be aligned to 8 bytes. */ - BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); - cs = gen8_emit_ggtt_write_rcs(cs, - request->global_seqno, - intel_hws_seqno_address(request->engine), + request->fence.seqno, + request->timeline->hwsp_offset, PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL); + cs = gen8_emit_ggtt_write_rcs(cs, + request->global_seqno, + intel_hws_seqno_address(request->engine), + PIPE_CONTROL_CS_STALL); + *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); - gen8_emit_wa_tail(request, cs); + return gen8_emit_wa_tail(request, cs); } -static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; static int gen8_init_rcs_context(struct i915_request *rq) { @@ -2169,8 +2283,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->request_alloc = execlists_request_alloc; engine->emit_flush = gen8_emit_flush; - engine->emit_breadcrumb = gen8_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; + engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; engine->set_default_submission = intel_execlists_set_default_submission; @@ -2209,10 +2323,14 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } -static void +static int logical_ring_setup(struct intel_engine_cs *engine) { - intel_engine_setup_common(engine); + int err; + + err = intel_engine_setup_common(engine); + if (err) + return err; /* Intentionally left blank. */ engine->buffer = NULL; @@ -2222,6 +2340,8 @@ logical_ring_setup(struct intel_engine_cs *engine) logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); + + return 0; } static int logical_ring_init(struct intel_engine_cs *engine) @@ -2256,10 +2376,10 @@ static int logical_ring_init(struct intel_engine_cs *engine) } execlists->csb_status = - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; execlists->csb_write = - &engine->status_page.page_addr[intel_hws_csb_write_index(i915)]; + &engine->status_page.addr[intel_hws_csb_write_index(i915)]; reset_csb_pointers(execlists); @@ -2270,13 +2390,14 @@ int logical_render_ring_init(struct intel_engine_cs *engine) { int ret; - logical_ring_setup(engine); + ret = logical_ring_setup(engine); + if (ret) + return ret; /* Override some for render ring. */ engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; - engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs; - engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; ret = logical_ring_init(engine); if (ret) @@ -2300,27 +2421,59 @@ int logical_render_ring_init(struct intel_engine_cs *engine) int logical_xcs_ring_init(struct intel_engine_cs *engine) { - logical_ring_setup(engine); + int err; + + err = logical_ring_setup(engine); + if (err) + return err; return logical_ring_init(engine); } -static u32 -make_rpcs(struct drm_i915_private *dev_priv) +u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu) { - bool subslice_pg = RUNTIME_INFO(dev_priv)->sseu.has_subslice_pg; - u8 slices = hweight8(RUNTIME_INFO(dev_priv)->sseu.slice_mask); - u8 subslices = hweight8(RUNTIME_INFO(dev_priv)->sseu.subslice_mask[0]); + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + bool subslice_pg = sseu->has_subslice_pg; + struct intel_sseu ctx_sseu; + u8 slices, subslices; u32 rpcs = 0; /* * No explicit RPCS request is needed to ensure full * slice/subslice/EU enablement prior to Gen9. */ - if (INTEL_GEN(dev_priv) < 9) + if (INTEL_GEN(i915) < 9) return 0; /* + * If i915/perf is active, we want a stable powergating configuration + * on the system. + * + * We could choose full enablement, but on ICL we know there are use + * cases which disable slices for functional, apart for performance + * reasons. So in this case we select a known stable subset. + */ + if (!i915->perf.oa.exclusive_stream) { + ctx_sseu = *req_sseu; + } else { + ctx_sseu = intel_device_default_sseu(i915); + + if (IS_GEN(i915, 11)) { + /* + * We only need subslice count so it doesn't matter + * which ones we select - just turn off low bits in the + * amount of half of all available subslices per slice. + */ + ctx_sseu.subslice_mask = + ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); + ctx_sseu.slice_mask = 0x1; + } + } + + slices = hweight8(ctx_sseu.slice_mask); + subslices = hweight8(ctx_sseu.subslice_mask); + + /* * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits * wide and Icelake has up to eight subslices, specfial programming is * needed in order to correctly enable all subslices. @@ -2345,7 +2498,9 @@ make_rpcs(struct drm_i915_private *dev_priv) * subslices are enabled, or a count between one and four on the first * slice. */ - if (IS_GEN(dev_priv, 11) && slices == 1 && subslices >= 4) { + if (IS_GEN(i915, 11) && + slices == 1 && + subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { GEM_BUG_ON(subslices & 1); subslice_pg = false; @@ -2358,10 +2513,10 @@ make_rpcs(struct drm_i915_private *dev_priv) * must make an explicit request through RPCS for full * enablement. */ - if (RUNTIME_INFO(dev_priv)->sseu.has_slice_pg) { + if (sseu->has_slice_pg) { u32 mask, val = slices; - if (INTEL_GEN(dev_priv) >= 11) { + if (INTEL_GEN(i915) >= 11) { mask = GEN11_RPCS_S_CNT_MASK; val <<= GEN11_RPCS_S_CNT_SHIFT; } else { @@ -2386,18 +2541,16 @@ make_rpcs(struct drm_i915_private *dev_priv) rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; } - if (RUNTIME_INFO(dev_priv)->sseu.has_eu_pg) { + if (sseu->has_eu_pg) { u32 val; - val = RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice << - GEN8_RPCS_EU_MIN_SHIFT; + val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); val &= GEN8_RPCS_EU_MIN_MASK; rpcs |= val; - val = RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice << - GEN8_RPCS_EU_MAX_SHIFT; + val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); val &= GEN8_RPCS_EU_MAX_MASK; @@ -2530,8 +2683,7 @@ static void execlists_init_reg_state(u32 *regs, if (rcs) { regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - make_rpcs(dev_priv)); + CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); i915_oa_init_reg_state(engine, ctx, regs); } @@ -2608,7 +2760,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, { struct drm_i915_gem_object *ctx_obj; struct i915_vma *vma; - uint32_t context_size; + u32 context_size; struct intel_ring *ring; struct i915_timeline *timeline; int ret; @@ -2634,7 +2786,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, goto error_deref_obj; } - timeline = i915_timeline_create(ctx->i915, ctx->name); + timeline = i915_timeline_create(ctx->i915, ctx->name, NULL); if (IS_ERR(timeline)) { ret = PTR_ERR(timeline); goto error_deref_obj; @@ -2692,14 +2844,70 @@ void intel_lr_context_resume(struct drm_i915_private *i915) intel_ring_reset(ce->ring, 0); - if (ce->pin_count) { /* otherwise done in context_pin */ - u32 *regs = ce->lrc_reg_state; + if (ce->pin_count) /* otherwise done in context_pin */ + __execlists_update_reg_state(engine, ce); + } + } +} - regs[CTX_RING_HEAD + 1] = ce->ring->head; - regs[CTX_RING_TAIL + 1] = ce->ring->tail; - } +void intel_execlists_show_requests(struct intel_engine_cs *engine, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + struct i915_request *rq, + const char *prefix), + unsigned int max) +{ + const struct intel_engine_execlists *execlists = &engine->execlists; + struct i915_request *rq, *last; + unsigned long flags; + unsigned int count; + struct rb_node *rb; + + spin_lock_irqsave(&engine->timeline.lock, flags); + + last = NULL; + count = 0; + list_for_each_entry(rq, &engine->timeline.requests, link) { + if (count++ < max - 1) + show_request(m, rq, "\t\tE "); + else + last = rq; + } + if (last) { + if (count > max) { + drm_printf(m, + "\t\t...skipping %d executing requests...\n", + count - max); + } + show_request(m, last, "\t\tE "); + } + + last = NULL; + count = 0; + if (execlists->queue_priority_hint != INT_MIN) + drm_printf(m, "\t\tQueue priority hint: %d\n", + execlists->queue_priority_hint); + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + int i; + + priolist_for_each_request(rq, p, i) { + if (count++ < max - 1) + show_request(m, rq, "\t\tQ "); + else + last = rq; } } + if (last) { + if (count > max) { + drm_printf(m, + "\t\t...skipping %d queued requests...\n", + count - max); + } + show_request(m, last, "\t\tQ "); + } + + spin_unlock_irqrestore(&engine->timeline.lock, flags); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index f5a5502ecf70..f1aec8a6986f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -97,11 +97,21 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); */ #define LRC_HEADER_PAGES LRC_PPHWSP_PN +struct drm_printer; + struct drm_i915_private; struct i915_gem_context; void intel_lr_context_resume(struct drm_i915_private *dev_priv); - void intel_execlists_set_default_submission(struct intel_engine_cs *engine); +void intel_execlists_show_requests(struct intel_engine_cs *engine, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + struct i915_request *rq, + const char *prefix), + unsigned int max); + +u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index 7d15be5932e0..322bdddda164 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -288,12 +288,12 @@ static bool lspcon_parade_fw_ready(struct drm_dp_aux *aux) } static bool _lspcon_parade_write_infoframe_blocks(struct drm_dp_aux *aux, - uint8_t *avi_buf) + u8 *avi_buf) { u8 avi_if_ctrl; u8 block_count = 0; u8 *data; - uint16_t reg; + u16 reg; ssize_t ret; while (block_count < 4) { @@ -335,10 +335,10 @@ static bool _lspcon_parade_write_infoframe_blocks(struct drm_dp_aux *aux, } static bool _lspcon_write_avi_infoframe_parade(struct drm_dp_aux *aux, - const uint8_t *frame, + const u8 *frame, ssize_t len) { - uint8_t avi_if[LSPCON_PARADE_AVI_IF_DATA_SIZE] = {1, }; + u8 avi_if[LSPCON_PARADE_AVI_IF_DATA_SIZE] = {1, }; /* * Parade's frames contains 32 bytes of data, divided @@ -367,13 +367,13 @@ static bool _lspcon_write_avi_infoframe_parade(struct drm_dp_aux *aux, } static bool _lspcon_write_avi_infoframe_mca(struct drm_dp_aux *aux, - const uint8_t *buffer, ssize_t len) + const u8 *buffer, ssize_t len) { int ret; - uint32_t val = 0; - uint32_t retry; - uint16_t reg; - const uint8_t *data = buffer; + u32 val = 0; + u32 retry; + u16 reg; + const u8 *data = buffer; reg = LSPCON_MCA_AVI_IF_WRITE_OFFSET; while (val < len) { @@ -459,7 +459,7 @@ void lspcon_set_infoframes(struct intel_encoder *encoder, { ssize_t ret; union hdmi_infoframe frame; - uint8_t buf[VIDEO_DIP_DATA_SIZE]; + u8 buf[VIDEO_DIP_DATA_SIZE]; struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); struct intel_lspcon *lspcon = &dig_port->lspcon; const struct drm_display_mode *adjusted_mode = diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 904d16af89a8..b4aa49768e90 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -94,15 +94,17 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); + intel_wakeref_t wakeref; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; ret = intel_lvds_port_enabled(dev_priv, lvds_encoder->reg, pipe); - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } @@ -796,26 +798,6 @@ static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder) return (val & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP; } -static bool intel_lvds_supported(struct drm_i915_private *dev_priv) -{ - /* - * With the introduction of the PCH we gained a dedicated - * LVDS presence pin, use it. - */ - if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) - return true; - - /* - * Otherwise LVDS was only attached to mobile products, - * except for the inglorious 830gm - */ - if (INTEL_GEN(dev_priv) <= 4 && - IS_MOBILE(dev_priv) && !IS_I830(dev_priv)) - return true; - - return false; -} - /** * intel_lvds_init - setup LVDS connectors on this device * @dev_priv: i915 device @@ -840,9 +822,6 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) u8 pin; u32 allowed_scalers; - if (!intel_lvds_supported(dev_priv)) - return; - /* Skip init on machines we know falsely report LVDS */ if (dmi_check_system(intel_no_lvds)) { WARN(!dev_priv->vbt.int_lvds_support, @@ -908,6 +887,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) } intel_encoder->get_hw_state = intel_lvds_get_hw_state; intel_encoder->get_config = intel_lvds_get_config; + intel_encoder->update_pipe = intel_panel_update_backlight; intel_connector->get_hw_state = intel_connector_get_hw_state; intel_connector_attach_encoder(intel_connector, intel_encoder); diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index e976c5ce5479..331e7a678fb7 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -28,48 +28,60 @@ struct drm_i915_mocs_entry { u32 control_value; u16 l3cc_value; + u16 used; }; struct drm_i915_mocs_table { - u32 size; + unsigned int size; + unsigned int n_entries; const struct drm_i915_mocs_entry *table; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ -#define LE_CACHEABILITY(value) ((value) << 0) -#define LE_TGT_CACHE(value) ((value) << 2) +#define _LE_CACHEABILITY(value) ((value) << 0) +#define _LE_TGT_CACHE(value) ((value) << 2) #define LE_LRUM(value) ((value) << 4) #define LE_AOM(value) ((value) << 6) #define LE_RSC(value) ((value) << 7) #define LE_SCC(value) ((value) << 8) #define LE_PFM(value) ((value) << 11) #define LE_SCF(value) ((value) << 14) +#define LE_COS(value) ((value) << 15) +#define LE_SSE(value) ((value) << 17) /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ #define L3_ESC(value) ((value) << 0) #define L3_SCC(value) ((value) << 1) -#define L3_CACHEABILITY(value) ((value) << 4) +#define _L3_CACHEABILITY(value) ((value) << 4) /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */ +#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ /* (e)LLC caching options */ -#define LE_PAGETABLE 0 -#define LE_UC 1 -#define LE_WT 2 -#define LE_WB 3 - -/* L3 caching options */ -#define L3_DIRECT 0 -#define L3_UC 1 -#define L3_RESERVED 2 -#define L3_WB 3 +#define LE_0_PAGETABLE _LE_CACHEABILITY(0) +#define LE_1_UC _LE_CACHEABILITY(1) +#define LE_2_WT _LE_CACHEABILITY(2) +#define LE_3_WB _LE_CACHEABILITY(3) /* Target cache */ -#define LE_TC_PAGETABLE 0 -#define LE_TC_LLC 1 -#define LE_TC_LLC_ELLC 2 -#define LE_TC_LLC_ELLC_ALT 3 +#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0) +#define LE_TC_1_LLC _LE_TGT_CACHE(1) +#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2) +#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3) + +/* L3 caching options */ +#define L3_0_DIRECT _L3_CACHEABILITY(0) +#define L3_1_UC _L3_CACHEABILITY(1) +#define L3_2_RESERVED _L3_CACHEABILITY(2) +#define L3_3_WB _L3_CACHEABILITY(3) + +#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ + [__idx] = { \ + .control_value = __control_value, \ + .l3cc_value = __l3cc_value, \ + .used = 1, \ + } /* * MOCS tables @@ -80,85 +92,147 @@ struct drm_i915_mocs_table { * LNCFCMOCS0 - LNCFCMOCS32 registers. * * These tables are intended to be kept reasonably consistent across - * platforms. However some of the fields are not applicable to all of - * them. + * HW platforms, and for ICL+, be identical across OSes. To achieve + * that, for Icelake and above, list of entries is published as part + * of bspec. * * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For the time - * being they will be implicitly initialized to the strictest caching - * configuration (uncached) to guarantee forwards compatibility with - * userspace programs written against more recent kernels providing - * additional MOCS entries. + * being they will be initialized to PTE. * - * NOTE: These tables MUST start with being uncached and the length - * MUST be less than 63 as the last two registers are reserved - * by the hardware. These tables are part of the kernel ABI and - * may only be updated incrementally by adding entries at the - * end. + * The last two entries are reserved by the hardware. For ICL+ they + * should be initialized according to bspec and never used, for older + * platforms they should never be written to. + * + * NOTE: These tables are part of bspec and defined as part of hardware + * interface for ICL+. For older platforms, they are part of kernel + * ABI. It is expected that, for specific hardware platform, existing + * entries will remain constant and the table will only be updated by + * adding new entries, filling unused positions. */ +#define GEN9_MOCS_ENTRIES \ + MOCS_ENTRY(I915_MOCS_UNCACHED, \ + LE_1_UC | LE_TC_2_LLC_ELLC, \ + L3_1_UC), \ + MOCS_ENTRY(I915_MOCS_PTE, \ + LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ + L3_3_WB) + static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - [I915_MOCS_UNCACHED] = { - /* 0x00000009 */ - .control_value = LE_CACHEABILITY(LE_UC) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - - /* 0x0010 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), - }, - [I915_MOCS_PTE] = { - /* 0x00000038 */ - .control_value = LE_CACHEABILITY(LE_PAGETABLE) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - /* 0x0030 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), - }, - [I915_MOCS_CACHED] = { - /* 0x0000003b */ - .control_value = LE_CACHEABILITY(LE_WB) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - /* 0x0030 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), - }, + GEN9_MOCS_ENTRIES, + MOCS_ENTRY(I915_MOCS_CACHED, + LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), + L3_3_WB) }; /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - [I915_MOCS_UNCACHED] = { - /* 0x00000009 */ - .control_value = LE_CACHEABILITY(LE_UC) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - - /* 0x0010 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), - }, - [I915_MOCS_PTE] = { - /* 0x00000038 */ - .control_value = LE_CACHEABILITY(LE_PAGETABLE) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - - /* 0x0030 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), - }, - [I915_MOCS_CACHED] = { - /* 0x00000039 */ - .control_value = LE_CACHEABILITY(LE_UC) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - - /* 0x0030 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), - }, + GEN9_MOCS_ENTRIES, + MOCS_ENTRY(I915_MOCS_CACHED, + LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3), + L3_3_WB) +}; + +#define GEN11_MOCS_ENTRIES \ + /* Base - Uncached (Deprecated) */ \ + MOCS_ENTRY(I915_MOCS_UNCACHED, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_1_UC), \ + /* Base - L3 + LeCC:PAT (Deprecated) */ \ + MOCS_ENTRY(I915_MOCS_PTE, \ + LE_0_PAGETABLE | LE_TC_1_LLC, \ + L3_3_WB), \ + /* Base - L3 + LLC */ \ + MOCS_ENTRY(2, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_3_WB), \ + /* Base - Uncached */ \ + MOCS_ENTRY(3, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_1_UC), \ + /* Base - L3 */ \ + MOCS_ENTRY(4, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_3_WB), \ + /* Base - LLC */ \ + MOCS_ENTRY(5, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC), \ + /* Age 0 - LLC */ \ + MOCS_ENTRY(6, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ + L3_1_UC), \ + /* Age 0 - L3 + LLC */ \ + MOCS_ENTRY(7, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ + L3_3_WB), \ + /* Age: Don't Chg. - LLC */ \ + MOCS_ENTRY(8, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ + L3_1_UC), \ + /* Age: Don't Chg. - L3 + LLC */ \ + MOCS_ENTRY(9, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ + L3_3_WB), \ + /* No AOM - LLC */ \ + MOCS_ENTRY(10, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM - L3 + LLC */ \ + MOCS_ENTRY(11, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ + L3_3_WB), \ + /* No AOM; Age 0 - LLC */ \ + MOCS_ENTRY(12, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM; Age 0 - L3 + LLC */ \ + MOCS_ENTRY(13, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ + L3_3_WB), \ + /* No AOM; Age:DC - LLC */ \ + MOCS_ENTRY(14, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM; Age:DC - L3 + LLC */ \ + MOCS_ENTRY(15, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ + L3_3_WB), \ + /* Self-Snoop - L3 + LLC */ \ + MOCS_ENTRY(18, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(12.5%) */ \ + MOCS_ENTRY(19, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(25%) */ \ + MOCS_ENTRY(20, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(50%) */ \ + MOCS_ENTRY(21, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(75%) */ \ + MOCS_ENTRY(22, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(87.5%) */ \ + MOCS_ENTRY(23, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \ + L3_3_WB), \ + /* HW Reserved - SW program but never use */ \ + MOCS_ENTRY(62, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC), \ + /* HW Reserved - SW program but never use */ \ + MOCS_ENTRY(63, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC) + +static const struct drm_i915_mocs_entry icelake_mocs_table[] = { + GEN11_MOCS_ENTRIES }; /** @@ -178,13 +252,19 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) || - IS_ICELAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + table->size = ARRAY_SIZE(icelake_mocs_table); + table->table = icelake_mocs_table; + table->n_entries = GEN11_NUM_MOCS_ENTRIES; + result = true; + } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); + table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = skylake_mocs_table; result = true; } else if (IS_GEN9_LP(dev_priv)) { table->size = ARRAY_SIZE(broxton_mocs_table); + table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = broxton_mocs_table; result = true; } else { @@ -226,6 +306,19 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) } } +/* + * Get control_value from MOCS entry taking into account when it's not used: + * I915_MOCS_PTE's value is returned in this case. + */ +static u32 get_entry_control(const struct drm_i915_mocs_table *table, + unsigned int index) +{ + if (table->table[index].used) + return table->table[index].control_value; + + return table->table[I915_MOCS_PTE].control_value; +} + /** * intel_mocs_init_engine() - emit the mocs control table * @engine: The engine for whom to emit the registers. @@ -238,27 +331,23 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_mocs_table table; unsigned int index; + u32 unused_value; if (!get_mocs_settings(dev_priv, &table)) return; - GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES); - - for (index = 0; index < table.size; index++) - I915_WRITE(mocs_register(engine->id, index), - table.table[index].control_value); - - /* - * Ok, now set the unused entries to uncached. These entries - * are officially undefined and no contract for the contents - * and settings is given for these entries. - * - * Entry 0 in the table is uncached - so we are just writing - * that value to all the used entries. - */ - for (; index < GEN9_NUM_MOCS_ENTRIES; index++) - I915_WRITE(mocs_register(engine->id, index), - table.table[0].control_value); + /* Set unused values to PTE */ + unused_value = table.table[I915_MOCS_PTE].control_value; + + for (index = 0; index < table.size; index++) { + u32 value = get_entry_control(&table, index); + + I915_WRITE(mocs_register(engine->id, index), value); + } + + /* All remaining entries are also unused */ + for (; index < table.n_entries; index++) + I915_WRITE(mocs_register(engine->id, index), unused_value); } /** @@ -276,33 +365,32 @@ static int emit_mocs_control_table(struct i915_request *rq, { enum intel_engine_id engine = rq->engine->id; unsigned int index; + u32 unused_value; u32 *cs; - if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) + if (GEM_WARN_ON(table->size > table->n_entries)) return -ENODEV; - cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); + /* Set unused values to PTE */ + unused_value = table->table[I915_MOCS_PTE].control_value; + + cs = intel_ring_begin(rq, 2 + 2 * table->n_entries); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES); + *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries); for (index = 0; index < table->size; index++) { + u32 value = get_entry_control(table, index); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = table->table[index].control_value; + *cs++ = value; } - /* - * Ok, now set the unused entries to uncached. These entries - * are officially undefined and no contract for the contents - * and settings is given for these entries. - * - * Entry 0 in the table is uncached - so we are just writing - * that value to all the used entries. - */ - for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { + /* All remaining entries are also unused */ + for (; index < table->n_entries; index++) { *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = table->table[0].control_value; + *cs++ = unused_value; } *cs++ = MI_NOOP; @@ -311,12 +399,24 @@ static int emit_mocs_control_table(struct i915_request *rq, return 0; } +/* + * Get l3cc_value from MOCS entry taking into account when it's not used: + * I915_MOCS_PTE's value is returned in this case. + */ +static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, + unsigned int index) +{ + if (table->table[index].used) + return table->table[index].l3cc_value; + + return table->table[I915_MOCS_PTE].l3cc_value; +} + static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, u16 low, u16 high) { - return table->table[low].l3cc_value | - table->table[high].l3cc_value << 16; + return low | high << 16; } /** @@ -333,38 +433,43 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static int emit_mocs_l3cc_table(struct i915_request *rq, const struct drm_i915_mocs_table *table) { + u16 unused_value; unsigned int i; u32 *cs; - if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) + if (GEM_WARN_ON(table->size > table->n_entries)) return -ENODEV; - cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES); + /* Set unused values to PTE */ + unused_value = table->table[I915_MOCS_PTE].l3cc_value; + + cs = intel_ring_begin(rq, 2 + table->n_entries); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); + *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2); + + for (i = 0; i < table->size / 2; i++) { + u16 low = get_entry_l3cc(table, 2 * i); + u16 high = get_entry_l3cc(table, 2 * i + 1); - for (i = 0; i < table->size/2; i++) { *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, 2 * i, 2 * i + 1); + *cs++ = l3cc_combine(table, low, high); } + /* Odd table size - 1 left over */ if (table->size & 0x01) { - /* Odd table size - 1 left over */ + u16 low = get_entry_l3cc(table, 2 * i); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, 2 * i, 0); + *cs++ = l3cc_combine(table, low, unused_value); i++; } - /* - * Now set the rest of the table to uncached - use entry 0 as - * this will be uncached. Leave the last pair uninitialised as - * they are reserved by the hardware. - */ - for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { + /* All remaining entries are also unused */ + for (; i < table->n_entries / 2; i++) { *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, 0, 0); + *cs++ = l3cc_combine(table, unused_value, unused_value); } *cs++ = MI_NOOP; @@ -391,26 +496,35 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) { struct drm_i915_mocs_table table; unsigned int i; + u16 unused_value; if (!get_mocs_settings(dev_priv, &table)) return; - for (i = 0; i < table.size/2; i++) - I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 2*i+1)); + /* Set unused values to PTE */ + unused_value = table.table[I915_MOCS_PTE].l3cc_value; + + for (i = 0; i < table.size / 2; i++) { + u16 low = get_entry_l3cc(&table, 2 * i); + u16 high = get_entry_l3cc(&table, 2 * i + 1); + + I915_WRITE(GEN9_LNCFCMOCS(i), + l3cc_combine(&table, low, high)); + } /* Odd table size - 1 left over */ if (table.size & 0x01) { - I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 0)); + u16 low = get_entry_l3cc(&table, 2 * i); + + I915_WRITE(GEN9_LNCFCMOCS(i), + l3cc_combine(&table, low, unused_value)); i++; } - /* - * Now set the rest of the table to uncached - use entry 0 as - * this will be uncached. Leave the last pair as initialised as - * they are reserved by the hardware. - */ - for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++) - I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0)); + /* All remaining entries are also unused */ + for (; i < table.n_entries / 2; i++) + I915_WRITE(GEN9_LNCFCMOCS(i), + l3cc_combine(&table, unused_value, unused_value)); } /** diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index c300e5787b3c..c0df1dbb0069 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -186,7 +186,7 @@ struct intel_overlay { struct overlay_registers __iomem *regs; u32 flip_addr; /* flip handling */ - struct i915_gem_active last_flip; + struct i915_active_request last_flip; }; static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv, @@ -214,23 +214,23 @@ static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv, static void intel_overlay_submit_request(struct intel_overlay *overlay, struct i915_request *rq, - i915_gem_retire_fn retire) + i915_active_retire_fn retire) { - GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, - &overlay->i915->drm.struct_mutex)); - i915_gem_active_set_retire_fn(&overlay->last_flip, retire, - &overlay->i915->drm.struct_mutex); - i915_gem_active_set(&overlay->last_flip, rq); + GEM_BUG_ON(i915_active_request_peek(&overlay->last_flip, + &overlay->i915->drm.struct_mutex)); + i915_active_request_set_retire_fn(&overlay->last_flip, retire, + &overlay->i915->drm.struct_mutex); + __i915_active_request_set(&overlay->last_flip, rq); i915_request_add(rq); } static int intel_overlay_do_wait_request(struct intel_overlay *overlay, struct i915_request *rq, - i915_gem_retire_fn retire) + i915_active_retire_fn retire) { intel_overlay_submit_request(overlay, rq, retire); - return i915_gem_active_retire(&overlay->last_flip, - &overlay->i915->drm.struct_mutex); + return i915_active_request_retire(&overlay->last_flip, + &overlay->i915->drm.struct_mutex); } static struct i915_request *alloc_request(struct intel_overlay *overlay) @@ -351,8 +351,9 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay) i915_vma_put(vma); } -static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, - struct i915_request *rq) +static void +intel_overlay_release_old_vid_tail(struct i915_active_request *active, + struct i915_request *rq) { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); @@ -360,7 +361,7 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, intel_overlay_release_old_vma(overlay); } -static void intel_overlay_off_tail(struct i915_gem_active *active, +static void intel_overlay_off_tail(struct i915_active_request *active, struct i915_request *rq) { struct intel_overlay *overlay = @@ -423,8 +424,8 @@ static int intel_overlay_off(struct intel_overlay *overlay) * We have to be careful not to repeat work forever an make forward progess. */ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) { - return i915_gem_active_retire(&overlay->last_flip, - &overlay->i915->drm.struct_mutex); + return i915_active_request_retire(&overlay->last_flip, + &overlay->i915->drm.struct_mutex); } /* Wait for pending overlay flip and release old frame. @@ -480,8 +481,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv) if (!overlay) return; - intel_overlay_release_old_vid(overlay); - overlay->old_xscale = 0; overlay->old_yscale = 0; overlay->crtc = NULL; @@ -1359,7 +1358,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) overlay->contrast = 75; overlay->saturation = 146; - init_request_active(&overlay->last_flip, NULL); + INIT_ACTIVE_REQUEST(&overlay->last_flip); mutex_lock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index ee3e0842d542..beca98d2b035 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1087,20 +1087,11 @@ static void pwm_enable_backlight(const struct intel_crtc_state *crtc_state, intel_panel_actually_set_backlight(conn_state, panel->backlight.level); } -void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) +static void __intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; - - if (!panel->backlight.present) - return; - - DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe)); - - mutex_lock(&dev_priv->backlight_lock); WARN_ON(panel->backlight.max == 0); @@ -1117,6 +1108,24 @@ void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, panel->backlight.enabled = true; if (panel->backlight.device) panel->backlight.device->props.power = FB_BLANK_UNBLANK; +} + +void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_connector *connector = to_intel_connector(conn_state->connector); + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + + if (!panel->backlight.present) + return; + + DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe)); + + mutex_lock(&dev_priv->backlight_lock); + + __intel_panel_enable_backlight(crtc_state, conn_state); mutex_unlock(&dev_priv->backlight_lock); } @@ -1203,17 +1212,20 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd) struct intel_connector *connector = bl_get_data(bd); struct drm_device *dev = connector->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - u32 hw_level; - int ret; + intel_wakeref_t wakeref; + int ret = 0; - intel_runtime_pm_get(dev_priv); - drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + with_intel_runtime_pm(dev_priv, wakeref) { + u32 hw_level; - hw_level = intel_panel_get_backlight(connector); - ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness); + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); - drm_modeset_unlock(&dev->mode_config.connection_mutex); - intel_runtime_pm_put(dev_priv); + hw_level = intel_panel_get_backlight(connector); + ret = scale_hw_to_user(connector, + hw_level, bd->props.max_brightness); + + drm_modeset_unlock(&dev->mode_config.connection_mutex); + } return ret; } @@ -1484,8 +1496,8 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - u32 pch_ctl1, pch_ctl2, val; - bool alt; + u32 cpu_ctl2, pch_ctl1, pch_ctl2, val; + bool alt, cpu_mode; if (HAS_PCH_LPT(dev_priv)) alt = I915_READ(SOUTH_CHICKEN2) & LPT_PWM_GRANULARITY; @@ -1499,6 +1511,8 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus pch_ctl2 = I915_READ(BLC_PWM_PCH_CTL2); panel->backlight.max = pch_ctl2 >> 16; + cpu_ctl2 = I915_READ(BLC_PWM_CPU_CTL2); + if (!panel->backlight.max) panel->backlight.max = get_backlight_max_vbt(connector); @@ -1507,12 +1521,28 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus panel->backlight.min = get_backlight_min_vbt(connector); - val = lpt_get_backlight(connector); + panel->backlight.enabled = pch_ctl1 & BLM_PCH_PWM_ENABLE; + + cpu_mode = panel->backlight.enabled && HAS_PCH_LPT(dev_priv) && + !(pch_ctl1 & BLM_PCH_OVERRIDE_ENABLE) && + (cpu_ctl2 & BLM_PWM_ENABLE); + if (cpu_mode) + val = pch_get_backlight(connector); + else + val = lpt_get_backlight(connector); val = intel_panel_compute_brightness(connector, val); panel->backlight.level = clamp(val, panel->backlight.min, panel->backlight.max); - panel->backlight.enabled = pch_ctl1 & BLM_PCH_PWM_ENABLE; + if (cpu_mode) { + DRM_DEBUG_KMS("CPU backlight register was enabled, switching to PCH override\n"); + + /* Write converted CPU PWM value to PCH override register */ + lpt_set_backlight(connector->base.state, panel->backlight.level); + I915_WRITE(BLC_PWM_PCH_CTL1, pch_ctl1 | BLM_PCH_OVERRIDE_ENABLE); + + I915_WRITE(BLC_PWM_CPU_CTL2, cpu_ctl2 & ~BLM_PWM_ENABLE); + } return 0; } @@ -1773,6 +1803,24 @@ static int pwm_setup_backlight(struct intel_connector *connector, return 0; } +void intel_panel_update_backlight(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_connector *connector = to_intel_connector(conn_state->connector); + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + + if (!panel->backlight.present) + return; + + mutex_lock(&dev_priv->backlight_lock); + if (!panel->backlight.enabled) + __intel_panel_enable_backlight(crtc_state, conn_state); + + mutex_unlock(&dev_priv->backlight_lock); +} + int intel_panel_setup_backlight(struct drm_connector *connector, enum pipe pipe) { struct drm_i915_private *dev_priv = to_i915(connector->dev); diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index bdabcfab8090..a8554dc4f196 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -44,7 +44,7 @@ static const char * const pipe_crc_sources[] = { }; static int i8xx_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source, - uint32_t *val) + u32 *val) { if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) *source = INTEL_PIPE_CRC_SOURCE_PIPE; @@ -120,7 +120,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_i915_private *dev_priv, static int vlv_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source *source, - uint32_t *val) + u32 *val) { bool need_stable_symbols = false; @@ -165,7 +165,7 @@ static int vlv_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, * - DisplayPort scrambling: used for EMI reduction */ if (need_stable_symbols) { - uint32_t tmp = I915_READ(PORT_DFT2_G4X); + u32 tmp = I915_READ(PORT_DFT2_G4X); tmp |= DC_BALANCE_RESET_VLV; switch (pipe) { @@ -190,7 +190,7 @@ static int vlv_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, static int i9xx_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source *source, - uint32_t *val) + u32 *val) { bool need_stable_symbols = false; @@ -244,7 +244,7 @@ static int i9xx_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, * - DisplayPort scrambling: used for EMI reduction */ if (need_stable_symbols) { - uint32_t tmp = I915_READ(PORT_DFT2_G4X); + u32 tmp = I915_READ(PORT_DFT2_G4X); WARN_ON(!IS_G4X(dev_priv)); @@ -265,7 +265,7 @@ static int i9xx_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, static void vlv_undo_pipe_scramble_reset(struct drm_i915_private *dev_priv, enum pipe pipe) { - uint32_t tmp = I915_READ(PORT_DFT2_G4X); + u32 tmp = I915_READ(PORT_DFT2_G4X); switch (pipe) { case PIPE_A: @@ -289,7 +289,7 @@ static void vlv_undo_pipe_scramble_reset(struct drm_i915_private *dev_priv, static void g4x_undo_pipe_scramble_reset(struct drm_i915_private *dev_priv, enum pipe pipe) { - uint32_t tmp = I915_READ(PORT_DFT2_G4X); + u32 tmp = I915_READ(PORT_DFT2_G4X); if (pipe == PIPE_A) tmp &= ~PIPE_A_SCRAMBLE_RESET; @@ -304,7 +304,7 @@ static void g4x_undo_pipe_scramble_reset(struct drm_i915_private *dev_priv, } static int ilk_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source, - uint32_t *val) + u32 *val) { if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) *source = INTEL_PIPE_CRC_SOURCE_PIPE; @@ -392,7 +392,7 @@ unlock: static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source *source, - uint32_t *val, + u32 *val, bool set_wa) { if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) @@ -589,6 +589,7 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name) struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index]; enum intel_display_power_domain power_domain; enum intel_pipe_crc_source source; + intel_wakeref_t wakeref; u32 val = 0; /* shut up gcc */ int ret = 0; @@ -598,7 +599,8 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name) } power_domain = POWER_DOMAIN_PIPE(crtc->index); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) { + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) { DRM_DEBUG_KMS("Trying to capture CRC while pipe is off\n"); return -EIO; } @@ -624,7 +626,7 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name) pipe_crc->skipped = 0; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 48c755dc895b..54307f1df6cf 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -483,7 +483,7 @@ static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) int sprite0_start, sprite1_start; switch (pipe) { - uint32_t dsparb, dsparb2, dsparb3; + u32 dsparb, dsparb2, dsparb3; case PIPE_A: dsparb = I915_READ(DSPARB); dsparb2 = I915_READ(DSPARB2); @@ -516,7 +516,7 @@ static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, enum i9xx_plane_id i9xx_plane) { - uint32_t dsparb = I915_READ(DSPARB); + u32 dsparb = I915_READ(DSPARB); int size; size = dsparb & 0x7f; @@ -532,7 +532,7 @@ static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, static int i830_get_fifo_size(struct drm_i915_private *dev_priv, enum i9xx_plane_id i9xx_plane) { - uint32_t dsparb = I915_READ(DSPARB); + u32 dsparb = I915_READ(DSPARB); int size; size = dsparb & 0x1ff; @@ -549,7 +549,7 @@ static int i830_get_fifo_size(struct drm_i915_private *dev_priv, static int i845_get_fifo_size(struct drm_i915_private *dev_priv, enum i9xx_plane_id i9xx_plane) { - uint32_t dsparb = I915_READ(DSPARB); + u32 dsparb = I915_READ(DSPARB); int size; size = dsparb & 0x7f; @@ -670,9 +670,9 @@ static unsigned int intel_wm_method1(unsigned int pixel_rate, unsigned int cpp, unsigned int latency) { - uint64_t ret; + u64 ret; - ret = (uint64_t) pixel_rate * cpp * latency; + ret = (u64)pixel_rate * cpp * latency; ret = DIV_ROUND_UP_ULL(ret, 10000); return ret; @@ -1092,9 +1092,9 @@ static int g4x_fbc_fifo_size(int level) } } -static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - int level) +static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + int level) { struct intel_plane *plane = to_intel_plane(plane_state->base.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); @@ -1191,9 +1191,9 @@ static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state, return dirty; } -static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, - uint32_t pri_val); +static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate, + u32 pri_val); static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) @@ -1601,9 +1601,9 @@ static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv) } } -static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - int level) +static u16 vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + int level) { struct intel_plane *plane = to_intel_plane(plane_state->base.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); @@ -1971,7 +1971,7 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, spin_lock(&dev_priv->uncore.lock); switch (crtc->pipe) { - uint32_t dsparb, dsparb2, dsparb3; + u32 dsparb, dsparb2, dsparb3; case PIPE_A: dsparb = I915_READ_FW(DSPARB); dsparb2 = I915_READ_FW(DSPARB2); @@ -2265,8 +2265,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) { struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); const struct intel_watermark_params *wm_info; - uint32_t fwater_lo; - uint32_t fwater_hi; + u32 fwater_lo; + u32 fwater_hi; int cwm, srwm = 1; int fifo_size; int planea_wm, planeb_wm; @@ -2409,7 +2409,7 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); struct intel_crtc *crtc; const struct drm_display_mode *adjusted_mode; - uint32_t fwater_lo; + u32 fwater_lo; int planea_wm; crtc = single_enabled_crtc(dev_priv); @@ -2458,8 +2458,7 @@ static unsigned int ilk_wm_method2(unsigned int pixel_rate, return ret; } -static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, - uint8_t cpp) +static u32 ilk_wm_fbc(u32 pri_val, u32 horiz_pixels, u8 cpp) { /* * Neither of these should be possible since this function shouldn't be @@ -2476,22 +2475,21 @@ static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, } struct ilk_wm_maximums { - uint16_t pri; - uint16_t spr; - uint16_t cur; - uint16_t fbc; + u16 pri; + u16 spr; + u16 cur; + u16 fbc; }; /* * For both WM_PIPE and WM_LP. * mem_value must be in 0.1us units. */ -static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, - uint32_t mem_value, - bool is_lp) +static u32 ilk_compute_pri_wm(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate, + u32 mem_value, bool is_lp) { - uint32_t method1, method2; + u32 method1, method2; int cpp; if (mem_value == 0) @@ -2519,11 +2517,11 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, * For both WM_PIPE and WM_LP. * mem_value must be in 0.1us units. */ -static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, - uint32_t mem_value) +static u32 ilk_compute_spr_wm(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate, + u32 mem_value) { - uint32_t method1, method2; + u32 method1, method2; int cpp; if (mem_value == 0) @@ -2546,9 +2544,9 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, * For both WM_PIPE and WM_LP. * mem_value must be in 0.1us units. */ -static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, - uint32_t mem_value) +static u32 ilk_compute_cur_wm(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate, + u32 mem_value) { int cpp; @@ -2566,9 +2564,9 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, } /* Only for WM_LP. */ -static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, - uint32_t pri_val) +static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate, + u32 pri_val) { int cpp; @@ -2734,9 +2732,9 @@ static bool ilk_validate_wm_level(int level, DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n", level, result->cur_val, max->cur); - result->pri_val = min_t(uint32_t, result->pri_val, max->pri); - result->spr_val = min_t(uint32_t, result->spr_val, max->spr); - result->cur_val = min_t(uint32_t, result->cur_val, max->cur); + result->pri_val = min_t(u32, result->pri_val, max->pri); + result->spr_val = min_t(u32, result->spr_val, max->spr); + result->cur_val = min_t(u32, result->cur_val, max->cur); result->enable = true; } @@ -2752,9 +2750,9 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, const struct intel_plane_state *curstate, struct intel_wm_level *result) { - uint16_t pri_latency = dev_priv->wm.pri_latency[level]; - uint16_t spr_latency = dev_priv->wm.spr_latency[level]; - uint16_t cur_latency = dev_priv->wm.cur_latency[level]; + u16 pri_latency = dev_priv->wm.pri_latency[level]; + u16 spr_latency = dev_priv->wm.spr_latency[level]; + u16 cur_latency = dev_priv->wm.cur_latency[level]; /* WM1+ latency values stored in 0.5us units */ if (level > 0) { @@ -2778,7 +2776,7 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, result->enable = true; } -static uint32_t +static u32 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) { const struct intel_atomic_state *intel_state = @@ -2807,10 +2805,10 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) } static void intel_read_wm_latency(struct drm_i915_private *dev_priv, - uint16_t wm[8]) + u16 wm[8]) { if (INTEL_GEN(dev_priv) >= 9) { - uint32_t val; + u32 val; int ret, i; int level, max_level = ilk_wm_max_level(dev_priv); @@ -2894,7 +2892,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, wm[0] += 1; } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - uint64_t sskpd = I915_READ64(MCH_SSKPD); + u64 sskpd = I915_READ64(MCH_SSKPD); wm[0] = (sskpd >> 56) & 0xFF; if (wm[0] == 0) @@ -2904,14 +2902,14 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, wm[3] = (sskpd >> 20) & 0x1FF; wm[4] = (sskpd >> 32) & 0x1FF; } else if (INTEL_GEN(dev_priv) >= 6) { - uint32_t sskpd = I915_READ(MCH_SSKPD); + u32 sskpd = I915_READ(MCH_SSKPD); wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK; wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK; wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK; wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK; } else if (INTEL_GEN(dev_priv) >= 5) { - uint32_t mltr = I915_READ(MLTR_ILK); + u32 mltr = I915_READ(MLTR_ILK); /* ILK primary LP0 latency is 700 ns */ wm[0] = 7; @@ -2923,7 +2921,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, } static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv, - uint16_t wm[5]) + u16 wm[5]) { /* ILK sprite LP0 latency is 1300 ns */ if (IS_GEN(dev_priv, 5)) @@ -2931,7 +2929,7 @@ static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv, } static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv, - uint16_t wm[5]) + u16 wm[5]) { /* ILK cursor LP0 latency is 1300 ns */ if (IS_GEN(dev_priv, 5)) @@ -2953,7 +2951,7 @@ int ilk_wm_max_level(const struct drm_i915_private *dev_priv) static void intel_print_wm_latency(struct drm_i915_private *dev_priv, const char *name, - const uint16_t wm[8]) + const u16 wm[8]) { int level, max_level = ilk_wm_max_level(dev_priv); @@ -2982,7 +2980,7 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv, } static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, - uint16_t wm[5], uint16_t min) + u16 wm[5], u16 min) { int level, max_level = ilk_wm_max_level(dev_priv); @@ -2991,7 +2989,7 @@ static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, wm[0] = max(wm[0], min); for (level = 1; level <= max_level; level++) - wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5)); + wm[level] = max_t(u16, wm[level], DIV_ROUND_UP(min, 5)); return true; } @@ -3538,7 +3536,7 @@ static void ilk_write_wm_values(struct drm_i915_private *dev_priv, { struct ilk_wm_values *previous = &dev_priv->wm.hw; unsigned int dirty; - uint32_t val; + u32 val; dirty = ilk_compute_wm_dirty(dev_priv, previous, results); if (!dirty) @@ -3636,14 +3634,9 @@ static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv) * FIXME: We still don't have the proper code detect if we need to apply the WA, * so assume we'll always need it in order to avoid underruns. */ -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) +static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - - if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) - return true; - - return false; + return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv); } static bool @@ -3675,25 +3668,25 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) if (dev_priv->sagv_status == I915_SAGV_ENABLED) return 0; - DRM_DEBUG_KMS("Enabling the SAGV\n"); + DRM_DEBUG_KMS("Enabling SAGV\n"); mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_ENABLE); - /* We don't need to wait for the SAGV when enabling */ + /* We don't need to wait for SAGV when enabling */ mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, - * don't actually have an SAGV. + * don't actually have SAGV. */ if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; } else if (ret < 0) { - DRM_ERROR("Failed to enable the SAGV\n"); + DRM_ERROR("Failed to enable SAGV\n"); return ret; } @@ -3712,7 +3705,7 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) if (dev_priv->sagv_status == I915_SAGV_DISABLED) return 0; - DRM_DEBUG_KMS("Disabling the SAGV\n"); + DRM_DEBUG_KMS("Disabling SAGV\n"); mutex_lock(&dev_priv->pcu_lock); /* bspec says to keep retrying for at least 1 ms */ @@ -3724,14 +3717,14 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) /* * Some skl systems, pre-release machines in particular, - * don't actually have an SAGV. + * don't actually have SAGV. */ if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; } else if (ret < 0) { - DRM_ERROR("Failed to disable the SAGV (%d)\n", ret); + DRM_ERROR("Failed to disable SAGV (%d)\n", ret); return ret; } @@ -3762,7 +3755,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) sagv_block_time_us = 10; /* - * SKL+ workaround: bspec recommends we disable the SAGV when we have + * SKL+ workaround: bspec recommends we disable SAGV when we have * more then one pipe enabled * * If there are no active CRTCs, no additional checks need be performed @@ -3795,7 +3788,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) latency = dev_priv->wm.skl_latency[level]; - if (skl_needs_memory_bw_wa(intel_state) && + if (skl_needs_memory_bw_wa(dev_priv) && plane->base.state->fb->modifier == I915_FORMAT_MOD_X_TILED) latency += 15; @@ -3803,7 +3796,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) /* * If any of the planes on this pipe don't enable wm levels that * incur memory latencies higher than sagv_block_time_us we - * can't enable the SAGV. + * can't enable SAGV. */ if (latency < sagv_block_time_us) return false; @@ -3832,8 +3825,13 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv, /* * 12GB/s is maximum BW supported by single DBuf slice. + * + * FIXME dbuf slice code is broken: + * - must wait for planes to stop using the slice before powering it off + * - plane straddling both slices is illegal in multi-pipe scenarios + * - should validate we stay within the hw bandwidth limits */ - if (num_active > 1 || total_data_bw >= GBps(12)) { + if (0 && (num_active > 1 || total_data_bw >= GBps(12))) { ddb->enabled_slices = 2; } else { ddb->enabled_slices = 1; @@ -3932,14 +3930,9 @@ static unsigned int skl_cursor_allocation(int num_active) static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv, struct skl_ddb_entry *entry, u32 reg) { - u16 mask; - if (INTEL_GEN(dev_priv) >= 11) - mask = ICL_DDB_ENTRY_MASK; - else - mask = SKL_DDB_ENTRY_MASK; - entry->start = reg & mask; - entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask; + entry->start = reg & DDB_ENTRY_MASK; + entry->end = (reg >> DDB_ENTRY_END_SHIFT) & DDB_ENTRY_MASK; if (entry->end) entry->end += 1; @@ -3992,10 +3985,12 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum intel_display_power_domain power_domain; enum pipe pipe = crtc->pipe; + intel_wakeref_t wakeref; enum plane_id plane_id; power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return; for_each_plane_id_on_crtc(crtc, plane_id) @@ -4004,7 +3999,7 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, &ddb_y[plane_id], &ddb_uv[plane_id]); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); } void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, @@ -4034,7 +4029,7 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate, const struct intel_plane_state *pstate) { struct intel_plane *plane = to_intel_plane(pstate->base.plane); - uint32_t src_w, src_h, dst_w, dst_h; + u32 src_w, src_h, dst_w, dst_h; uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; uint_fixed_16_16_t downscale_h, downscale_w; @@ -4080,8 +4075,8 @@ skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) return pipe_downscale; if (crtc_state->pch_pfit.enabled) { - uint32_t src_w, src_h, dst_w, dst_h; - uint32_t pfit_size = crtc_state->pch_pfit.size; + u32 src_w, src_h, dst_w, dst_h; + u32 pfit_size = crtc_state->pch_pfit.size; uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; uint_fixed_16_16_t downscale_h, downscale_w; @@ -4114,7 +4109,7 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, const struct drm_plane_state *pstate; struct intel_plane_state *intel_pstate; int crtc_clock, dotclk; - uint32_t pipe_max_pixel_rate; + u32 pipe_max_pixel_rate; uint_fixed_16_16_t pipe_downscale; uint_fixed_16_16_t max_downscale = u32_to_fixed16(1); @@ -4170,8 +4165,8 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, { struct intel_plane *intel_plane = to_intel_plane(intel_pstate->base.plane); - uint32_t data_rate; - uint32_t width = 0, height = 0; + u32 data_rate; + u32 width = 0, height = 0; struct drm_framebuffer *fb; u32 format; uint_fixed_16_16_t down_scale_amount; @@ -4314,15 +4309,15 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; struct skl_plane_wm *wm; - uint16_t alloc_size, start = 0; - uint16_t total[I915_MAX_PLANES] = {}; - uint16_t uv_total[I915_MAX_PLANES] = {}; + u16 alloc_size, start = 0; + u16 total[I915_MAX_PLANES] = {}; + u16 uv_total[I915_MAX_PLANES] = {}; u64 total_data_rate; enum plane_id plane_id; int num_active; u64 plane_data_rate[I915_MAX_PLANES] = {}; u64 uv_plane_data_rate[I915_MAX_PLANES] = {}; - uint16_t blocks = 0; + u32 blocks; int level; /* Clear the partitioning for disabled planes. */ @@ -4374,8 +4369,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, continue; wm = &cstate->wm.skl.optimal.planes[plane_id]; - blocks += wm->wm[level].plane_res_b; - blocks += wm->uv_wm[level].plane_res_b; + blocks += wm->wm[level].min_ddb_alloc; + blocks += wm->uv_wm[level].min_ddb_alloc; } if (blocks < alloc_size) { @@ -4416,7 +4411,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, extra = min_t(u16, alloc_size, DIV64_U64_ROUND_UP(alloc_size * rate, total_data_rate)); - total[plane_id] = wm->wm[level].plane_res_b + extra; + total[plane_id] = wm->wm[level].min_ddb_alloc + extra; alloc_size -= extra; total_data_rate -= rate; @@ -4427,7 +4422,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, extra = min_t(u16, alloc_size, DIV64_U64_ROUND_UP(alloc_size * rate, total_data_rate)); - uv_total[plane_id] = wm->uv_wm[level].plane_res_b + extra; + uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra; alloc_size -= extra; total_data_rate -= rate; } @@ -4480,7 +4475,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, */ for_each_plane_id_on_crtc(intel_crtc, plane_id) { wm = &cstate->wm.skl.optimal.planes[plane_id]; - if (wm->trans_wm.plane_res_b > total[plane_id]) + if (wm->trans_wm.plane_res_b >= total[plane_id]) memset(&wm->trans_wm, 0, sizeof(wm->trans_wm)); } @@ -4494,10 +4489,10 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. */ static uint_fixed_16_16_t -skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate, - uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size) +skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate, + u8 cpp, u32 latency, u32 dbuf_block_size) { - uint32_t wm_intermediate_val; + u32 wm_intermediate_val; uint_fixed_16_16_t ret; if (latency == 0) @@ -4512,12 +4507,11 @@ skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate, return ret; } -static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate, - uint32_t pipe_htotal, - uint32_t latency, - uint_fixed_16_16_t plane_blocks_per_line) +static uint_fixed_16_16_t +skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency, + uint_fixed_16_16_t plane_blocks_per_line) { - uint32_t wm_intermediate_val; + u32 wm_intermediate_val; uint_fixed_16_16_t ret; if (latency == 0) @@ -4533,8 +4527,8 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate, static uint_fixed_16_16_t intel_get_linetime_us(const struct intel_crtc_state *cstate) { - uint32_t pixel_rate; - uint32_t crtc_htotal; + u32 pixel_rate; + u32 crtc_htotal; uint_fixed_16_16_t linetime_us; if (!cstate->base.active) @@ -4551,11 +4545,11 @@ intel_get_linetime_us(const struct intel_crtc_state *cstate) return linetime_us; } -static uint32_t +static u32 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, const struct intel_plane_state *pstate) { - uint64_t adjusted_pixel_rate; + u64 adjusted_pixel_rate; uint_fixed_16_16_t downscale_amount; /* Shouldn't reach here on disabled planes... */ @@ -4582,10 +4576,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); const struct drm_plane_state *pstate = &intel_pstate->base; const struct drm_framebuffer *fb = pstate->fb; - uint32_t interm_pbpl; - struct intel_atomic_state *state = - to_intel_atomic_state(cstate->base.state); - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); + u32 interm_pbpl; /* only NV12 format has two planes */ if (color_plane == 1 && fb->format->format != DRM_FORMAT_NV12) { @@ -4621,7 +4612,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, intel_pstate); if (INTEL_GEN(dev_priv) >= 11 && - fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8) + fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 1) wp->dbuf_block_size = 256; else wp->dbuf_block_size = 512; @@ -4646,7 +4637,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, wp->y_min_scanlines = 4; } - if (apply_memory_bw_wa) + if (skl_needs_memory_bw_wa(dev_priv)) wp->y_min_scanlines *= 2; wp->plane_bytes_per_line = wp->width * wp->cpp; @@ -4678,6 +4669,15 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, return 0; } +static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level) +{ + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + return true; + + /* The number of lines are ignored for the level 0 watermark. */ + return level > 0; +} + static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, int level, @@ -4687,13 +4687,16 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, { struct drm_i915_private *dev_priv = to_i915(intel_pstate->base.plane->dev); - uint32_t latency = dev_priv->wm.skl_latency[level]; + u32 latency = dev_priv->wm.skl_latency[level]; uint_fixed_16_16_t method1, method2; uint_fixed_16_16_t selected_result; - uint32_t res_blocks, res_lines; - struct intel_atomic_state *state = - to_intel_atomic_state(cstate->base.state); - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); + u32 res_blocks, res_lines, min_ddb_alloc = 0; + + if (latency == 0) { + /* reject it */ + result->min_ddb_alloc = U16_MAX; + return; + } /* Display WA #1141: kbl,cfl */ if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) || @@ -4701,7 +4704,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, dev_priv->ipc_enabled) latency += 4; - if (apply_memory_bw_wa && wp->x_tiled) + if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled) latency += 15; method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate, @@ -4760,9 +4763,32 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, } } - /* The number of lines are ignored for the level 0 watermark. */ - if (level > 0 && res_lines > 31) + if (INTEL_GEN(dev_priv) >= 11) { + if (wp->y_tiled) { + int extra_lines; + + if (res_lines % wp->y_min_scanlines == 0) + extra_lines = wp->y_min_scanlines; + else + extra_lines = wp->y_min_scanlines * 2 - + res_lines % wp->y_min_scanlines; + + min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines, + wp->plane_blocks_per_line); + } else { + min_ddb_alloc = res_blocks + + DIV_ROUND_UP(res_blocks, 10); + } + } + + if (!skl_wm_has_lines(dev_priv, level)) + res_lines = 0; + + if (res_lines > 31) { + /* reject it */ + result->min_ddb_alloc = U16_MAX; return; + } /* * If res_lines is valid, assume we can use this watermark level @@ -4772,6 +4798,8 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, */ result->plane_res_b = res_blocks; result->plane_res_l = res_lines; + /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */ + result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1; result->plane_en = true; } @@ -4796,24 +4824,19 @@ skl_compute_wm_levels(const struct intel_crtc_state *cstate, } } -static uint32_t +static u32 skl_compute_linetime_wm(const struct intel_crtc_state *cstate) { struct drm_atomic_state *state = cstate->base.state; struct drm_i915_private *dev_priv = to_i915(state->dev); uint_fixed_16_16_t linetime_us; - uint32_t linetime_wm; + u32 linetime_wm; linetime_us = intel_get_linetime_us(cstate); - - if (is_fixed16_zero(linetime_us)) - return 0; - linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us)); - /* Display WA #1135: bxt:ALL GLK:ALL */ - if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) && - dev_priv->ipc_enabled) + /* Display WA #1135: BXT:ALL GLK:ALL */ + if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled) linetime_wm /= 2; return linetime_wm; @@ -4825,9 +4848,9 @@ static void skl_compute_transition_wm(const struct intel_crtc_state *cstate, { struct drm_device *dev = cstate->base.crtc->dev; const struct drm_i915_private *dev_priv = to_i915(dev); - uint16_t trans_min, trans_y_tile_min; - const uint16_t trans_amount = 10; /* This is configurable amount */ - uint16_t wm0_sel_res_b, trans_offset_b, res_blocks; + u16 trans_min, trans_y_tile_min; + const u16 trans_amount = 10; /* This is configurable amount */ + u16 wm0_sel_res_b, trans_offset_b, res_blocks; /* Transition WM are not recommended by HW team for GEN9 */ if (INTEL_GEN(dev_priv) <= 9) @@ -4856,8 +4879,8 @@ static void skl_compute_transition_wm(const struct intel_crtc_state *cstate, wm0_sel_res_b = wm->wm[0].plane_res_b - 1; if (wp->y_tiled) { - trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2, - wp->y_tile_minimum); + trans_y_tile_min = + (u16)mul_round_up_u32_fixed16(2, wp->y_tile_minimum); res_blocks = max(wm0_sel_res_b, trans_y_tile_min) + trans_offset_b; } else { @@ -5031,7 +5054,7 @@ static void skl_write_wm_level(struct drm_i915_private *dev_priv, i915_reg_t reg, const struct skl_wm_level *level) { - uint32_t val = 0; + u32 val = 0; if (level->plane_en) { val |= PLANE_WM_EN; @@ -5122,6 +5145,23 @@ static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv, return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm); } +static bool skl_pipe_wm_equals(struct intel_crtc *crtc, + const struct skl_pipe_wm *wm1, + const struct skl_pipe_wm *wm2) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) { + if (!skl_plane_wm_equals(dev_priv, + &wm1->planes[plane_id], + &wm2->planes[plane_id])) + return false; + } + + return wm1->linetime == wm2->linetime; +} + static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, const struct skl_ddb_entry *b) { @@ -5148,26 +5188,24 @@ static int skl_update_pipe_wm(struct intel_crtc_state *cstate, struct skl_pipe_wm *pipe_wm, /* out */ bool *changed /* out */) { + struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc); int ret; ret = skl_build_pipe_wm(cstate, pipe_wm); if (ret) return ret; - if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm))) - *changed = false; - else - *changed = true; + *changed = !skl_pipe_wm_equals(crtc, old_pipe_wm, pipe_wm); return 0; } -static uint32_t +static u32 pipes_modified(struct intel_atomic_state *state) { struct intel_crtc *crtc; struct intel_crtc_state *cstate; - uint32_t i, ret = 0; + u32 i, ret = 0; for_each_new_intel_crtc_in_state(state, crtc, cstate, i) ret |= drm_crtc_mask(&crtc->base); @@ -5268,7 +5306,7 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed) const struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; - uint32_t realloc_pipes = pipes_modified(state); + u32 realloc_pipes = pipes_modified(state); int ret, i; /* @@ -5567,7 +5605,7 @@ static void ilk_optimize_watermarks(struct intel_atomic_state *state, mutex_unlock(&dev_priv->wm.wm_mutex); } -static inline void skl_wm_level_from_reg_val(uint32_t val, +static inline void skl_wm_level_from_reg_val(u32 val, struct skl_wm_level *level) { level->plane_en = val & PLANE_WM_EN; @@ -5583,7 +5621,7 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, enum pipe pipe = crtc->pipe; int level, max_level; enum plane_id plane_id; - uint32_t val; + u32 val; max_level = ilk_wm_max_level(dev_priv); @@ -5695,7 +5733,7 @@ static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc) static void g4x_read_wm_values(struct drm_i915_private *dev_priv, struct g4x_wm_values *wm) { - uint32_t tmp; + u32 tmp; tmp = I915_READ(DSPFW1); wm->sr.plane = _FW_WM(tmp, SR); @@ -5722,7 +5760,7 @@ static void vlv_read_wm_values(struct drm_i915_private *dev_priv, struct vlv_wm_values *wm) { enum pipe pipe; - uint32_t tmp; + u32 tmp; for_each_pipe(dev_priv, pipe) { tmp = I915_READ(VLV_DDL(pipe)); @@ -6204,10 +6242,6 @@ void intel_init_ipc(struct drm_i915_private *dev_priv) */ DEFINE_SPINLOCK(mchdev_lock); -/* Global for IPS driver to get at the current i915 device. Protected by - * mchdev_lock. */ -static struct drm_i915_private *i915_mch_dev; - bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val) { u16 rgvswctl; @@ -6670,7 +6704,7 @@ void gen6_rps_boost(struct i915_request *rq, if (!rps->enabled) return; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + if (i915_request_signaled(rq)) return; /* Serializes with i915_request_retire() */ @@ -7850,16 +7884,17 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) { - unsigned long val; + intel_wakeref_t wakeref; + unsigned long val = 0; if (!IS_GEN(dev_priv, 5)) return 0; - spin_lock_irq(&mchdev_lock); - - val = __i915_chipset_val(dev_priv); - - spin_unlock_irq(&mchdev_lock); + with_intel_runtime_pm(dev_priv, wakeref) { + spin_lock_irq(&mchdev_lock); + val = __i915_chipset_val(dev_priv); + spin_unlock_irq(&mchdev_lock); + } return val; } @@ -7936,14 +7971,16 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) void i915_update_gfx_val(struct drm_i915_private *dev_priv) { + intel_wakeref_t wakeref; + if (!IS_GEN(dev_priv, 5)) return; - spin_lock_irq(&mchdev_lock); - - __i915_update_gfx_val(dev_priv); - - spin_unlock_irq(&mchdev_lock); + with_intel_runtime_pm(dev_priv, wakeref) { + spin_lock_irq(&mchdev_lock); + __i915_update_gfx_val(dev_priv); + spin_unlock_irq(&mchdev_lock); + } } static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) @@ -7985,18 +8022,34 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) { - unsigned long val; + intel_wakeref_t wakeref; + unsigned long val = 0; if (!IS_GEN(dev_priv, 5)) return 0; - spin_lock_irq(&mchdev_lock); + with_intel_runtime_pm(dev_priv, wakeref) { + spin_lock_irq(&mchdev_lock); + val = __i915_gfx_val(dev_priv); + spin_unlock_irq(&mchdev_lock); + } - val = __i915_gfx_val(dev_priv); + return val; +} - spin_unlock_irq(&mchdev_lock); +static struct drm_i915_private *i915_mch_dev; - return val; +static struct drm_i915_private *mchdev_get(void) +{ + struct drm_i915_private *i915; + + rcu_read_lock(); + i915 = i915_mch_dev; + if (!kref_get_unless_zero(&i915->drm.ref)) + i915 = NULL; + rcu_read_unlock(); + + return i915; } /** @@ -8007,23 +8060,24 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) */ unsigned long i915_read_mch_val(void) { - struct drm_i915_private *dev_priv; - unsigned long chipset_val, graphics_val, ret = 0; - - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) - goto out_unlock; - dev_priv = i915_mch_dev; - - chipset_val = __i915_chipset_val(dev_priv); - graphics_val = __i915_gfx_val(dev_priv); + struct drm_i915_private *i915; + unsigned long chipset_val = 0; + unsigned long graphics_val = 0; + intel_wakeref_t wakeref; - ret = chipset_val + graphics_val; + i915 = mchdev_get(); + if (!i915) + return 0; -out_unlock: - spin_unlock_irq(&mchdev_lock); + with_intel_runtime_pm(i915, wakeref) { + spin_lock_irq(&mchdev_lock); + chipset_val = __i915_chipset_val(i915); + graphics_val = __i915_gfx_val(i915); + spin_unlock_irq(&mchdev_lock); + } - return ret; + drm_dev_put(&i915->drm); + return chipset_val + graphics_val; } EXPORT_SYMBOL_GPL(i915_read_mch_val); @@ -8034,23 +8088,19 @@ EXPORT_SYMBOL_GPL(i915_read_mch_val); */ bool i915_gpu_raise(void) { - struct drm_i915_private *dev_priv; - bool ret = true; - - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) { - ret = false; - goto out_unlock; - } - dev_priv = i915_mch_dev; + struct drm_i915_private *i915; - if (dev_priv->ips.max_delay > dev_priv->ips.fmax) - dev_priv->ips.max_delay--; + i915 = mchdev_get(); + if (!i915) + return false; -out_unlock: + spin_lock_irq(&mchdev_lock); + if (i915->ips.max_delay > i915->ips.fmax) + i915->ips.max_delay--; spin_unlock_irq(&mchdev_lock); - return ret; + drm_dev_put(&i915->drm); + return true; } EXPORT_SYMBOL_GPL(i915_gpu_raise); @@ -8062,23 +8112,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise); */ bool i915_gpu_lower(void) { - struct drm_i915_private *dev_priv; - bool ret = true; + struct drm_i915_private *i915; - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) { - ret = false; - goto out_unlock; - } - dev_priv = i915_mch_dev; - - if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) - dev_priv->ips.max_delay++; + i915 = mchdev_get(); + if (!i915) + return false; -out_unlock: + spin_lock_irq(&mchdev_lock); + if (i915->ips.max_delay < i915->ips.min_delay) + i915->ips.max_delay++; spin_unlock_irq(&mchdev_lock); - return ret; + drm_dev_put(&i915->drm); + return true; } EXPORT_SYMBOL_GPL(i915_gpu_lower); @@ -8089,13 +8135,16 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower); */ bool i915_gpu_busy(void) { - bool ret = false; + struct drm_i915_private *i915; + bool ret; - spin_lock_irq(&mchdev_lock); - if (i915_mch_dev) - ret = i915_mch_dev->gt.awake; - spin_unlock_irq(&mchdev_lock); + i915 = mchdev_get(); + if (!i915) + return false; + ret = i915->gt.awake; + + drm_dev_put(&i915->drm); return ret; } EXPORT_SYMBOL_GPL(i915_gpu_busy); @@ -8108,24 +8157,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy); */ bool i915_gpu_turbo_disable(void) { - struct drm_i915_private *dev_priv; - bool ret = true; - - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) { - ret = false; - goto out_unlock; - } - dev_priv = i915_mch_dev; - - dev_priv->ips.max_delay = dev_priv->ips.fstart; + struct drm_i915_private *i915; + bool ret; - if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart)) - ret = false; + i915 = mchdev_get(); + if (!i915) + return false; -out_unlock: + spin_lock_irq(&mchdev_lock); + i915->ips.max_delay = i915->ips.fstart; + ret = ironlake_set_drps(i915, i915->ips.fstart); spin_unlock_irq(&mchdev_lock); + drm_dev_put(&i915->drm); return ret; } EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); @@ -8154,18 +8198,14 @@ void intel_gpu_ips_init(struct drm_i915_private *dev_priv) { /* We only register the i915 ips part with intel-ips once everything is * set up, to avoid intel-ips sneaking in and reading bogus values. */ - spin_lock_irq(&mchdev_lock); - i915_mch_dev = dev_priv; - spin_unlock_irq(&mchdev_lock); + rcu_assign_pointer(i915_mch_dev, dev_priv); ips_ping_for_i915_load(); } void intel_gpu_ips_teardown(void) { - spin_lock_irq(&mchdev_lock); - i915_mch_dev = NULL; - spin_unlock_irq(&mchdev_lock); + rcu_assign_pointer(i915_mch_dev, NULL); } static void intel_init_emon(struct drm_i915_private *dev_priv) @@ -8504,7 +8544,7 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) { - uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; + u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; /* * Required for FBC @@ -8576,7 +8616,7 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) { int pipe; - uint32_t val; + u32 val; /* * On Ibex Peak and Cougar Point, we need to disable clock @@ -8611,7 +8651,7 @@ static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) static void gen6_check_mch_setup(struct drm_i915_private *dev_priv) { - uint32_t tmp; + u32 tmp; tmp = I915_READ(MCH_SSKPD); if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) @@ -8621,7 +8661,7 @@ static void gen6_check_mch_setup(struct drm_i915_private *dev_priv) static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) { - uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; + u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); @@ -8715,7 +8755,7 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) { - uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); + u32 reg = I915_READ(GEN7_FF_THREAD_MODE); /* * WaVSThreadDispatchOverride:ivb,vlv @@ -8751,7 +8791,7 @@ static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) static void lpt_suspend_hw(struct drm_i915_private *dev_priv) { if (HAS_PCH_LPT_LP(dev_priv)) { - uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); + u32 val = I915_READ(SOUTH_DSPCLK_GATE_D); val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; I915_WRITE(SOUTH_DSPCLK_GATE_D, val); @@ -8989,7 +9029,7 @@ static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) { - uint32_t snpcr; + u32 snpcr; I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); @@ -9198,7 +9238,7 @@ static void chv_init_clock_gating(struct drm_i915_private *dev_priv) static void g4x_init_clock_gating(struct drm_i915_private *dev_priv) { - uint32_t dspclk_gate; + u32 dspclk_gate; I915_WRITE(RENCLK_GATE_D1, 0); I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | @@ -9448,7 +9488,7 @@ void intel_init_pm(struct drm_i915_private *dev_priv) static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) { - uint32_t flags = + u32 flags = I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; switch (flags) { @@ -9471,7 +9511,7 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv) { - uint32_t flags = + u32 flags = I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; switch (flags) { diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 0f6b2b4702e3..84a0fb981561 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -70,17 +70,17 @@ static bool psr_global_enabled(u32 debug) static bool intel_psr2_enabled(struct drm_i915_private *dev_priv, const struct intel_crtc_state *crtc_state) { - /* Disable PSR2 by default for all platforms */ - if (i915_modparams.enable_psr == -1) - return false; - /* Cannot enable DSC and PSR2 simultaneously */ WARN_ON(crtc_state->dsc_params.compression_enable && crtc_state->has_psr2); switch (dev_priv->psr.debug & I915_PSR_DEBUG_MODE_MASK) { + case I915_PSR_DEBUG_DISABLE: case I915_PSR_DEBUG_FORCE_PSR1: return false; + case I915_PSR_DEBUG_DEFAULT: + if (i915_modparams.enable_psr <= 0) + return false; default: return crtc_state->has_psr2; } @@ -230,7 +230,7 @@ void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) { - uint8_t dprx = 0; + u8 dprx = 0; if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST, &dprx) != 1) @@ -240,7 +240,7 @@ static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) { - uint8_t alpm_caps = 0; + u8 alpm_caps = 0; if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, &alpm_caps) != 1) @@ -384,7 +384,7 @@ static void hsw_psr_setup_aux(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); u32 aux_clock_divider, aux_ctl; int i; - static const uint8_t aux_msg[] = { + static const u8 aux_msg[] = { [0] = DP_AUX_NATIVE_WRITE << 4, [1] = DP_SET_POWER >> 8, [2] = DP_SET_POWER & 0xff, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 26b7274a2d43..7f841dba87b3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -33,6 +33,7 @@ #include "i915_drv.h" #include "i915_gem_render_state.h" +#include "i915_reset.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_workarounds.h" @@ -42,17 +43,10 @@ */ #define LEGACY_REQUEST_SIZE 200 -static unsigned int __intel_ring_space(unsigned int head, - unsigned int tail, - unsigned int size) +static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) { - /* - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the - * same cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - */ - GEM_BUG_ON(!is_power_of_2(size)); - return (head - tail - CACHELINE_BYTES) & (size - 1); + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_INDEX_ADDR); } unsigned int intel_ring_update_space(struct intel_ring *ring) @@ -299,7 +293,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode) return 0; } -static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { /* First we do the gen6_emit_post_sync_nonzero_flush w/a */ *cs++ = GFX_OP_PIPE_CONTROL(4); @@ -319,6 +313,11 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); + *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->global_seqno; @@ -327,8 +326,9 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } -static const int gen6_rcs_emit_breadcrumb_sz = 14; static int gen7_render_ring_cs_stall_wa(struct i915_request *rq) @@ -409,7 +409,7 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) return 0; } -static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { *cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | @@ -419,6 +419,13 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL); + *cs++ = rq->timeline->hwsp_offset; + *cs++ = rq->fence.seqno; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL); *cs++ = intel_hws_seqno_address(rq->engine); *cs++ = rq->global_seqno; @@ -427,34 +434,52 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } -static const int gen7_rcs_emit_breadcrumb_sz = 6; -static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; - *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->global_seqno; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } -static const int gen6_xcs_emit_breadcrumb_sz = 4; #define GEN7_XCS_WA 32 -static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; - *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->global_seqno; for (i = 0; i < GEN7_XCS_WA; i++) { *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR; - *cs++ = rq->global_seqno; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; } *cs++ = MI_FLUSH_DW; @@ -462,12 +487,12 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } -static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3; #undef GEN7_XCS_WA static void set_hwstam(struct intel_engine_cs *engine, u32 mask) @@ -498,12 +523,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) I915_WRITE(HWS_PGA, addr); } -static void ring_setup_phys_status_page(struct intel_engine_cs *engine) +static struct page *status_page(struct intel_engine_cs *engine) { - struct page *page = virt_to_page(engine->status_page.page_addr); - phys_addr_t phys = PFN_PHYS(page_to_pfn(page)); + struct drm_i915_gem_object *obj = engine->status_page.vma->obj; - set_hws_pga(engine, phys); + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + return sg_page(obj->mm.pages->sgl); +} + +static void ring_setup_phys_status_page(struct intel_engine_cs *engine) +{ + set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine)))); set_hwstam(engine, ~0u); } @@ -570,7 +600,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine) static void ring_setup_status_page(struct intel_engine_cs *engine) { - set_hwsp(engine, engine->status_page.ggtt_offset); + set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma)); set_hwstam(engine, ~0u); flush_cs_tlb(engine); @@ -700,59 +730,87 @@ static int init_ring_common(struct intel_engine_cs *engine) } /* Papering over lost _interrupts_ immediately following the restart */ - intel_engine_wakeup(engine); + intel_engine_queue_breadcrumbs(engine); out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; } -static struct i915_request *reset_prepare(struct intel_engine_cs *engine) +static void reset_prepare(struct intel_engine_cs *engine) { intel_engine_stop_cs(engine); - return i915_gem_find_active_request(engine); } -static void skip_request(struct i915_request *rq) +static void reset_ring(struct intel_engine_cs *engine, bool stalled) { - void *vaddr = rq->ring->vaddr; + struct i915_timeline *tl = &engine->timeline; + struct i915_request *pos, *rq; + unsigned long flags; u32 head; - head = rq->infix; - if (rq->postfix < head) { - memset32(vaddr + head, MI_NOOP, - (rq->ring->size - head) / sizeof(u32)); - head = 0; + rq = NULL; + spin_lock_irqsave(&tl->lock, flags); + list_for_each_entry(pos, &tl->requests, link) { + if (!i915_request_completed(pos)) { + rq = pos; + break; + } } - memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32)); -} - -static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq) -{ - GEM_TRACE("%s request global=%d, current=%d\n", - engine->name, rq ? rq->global_seqno : 0, - intel_engine_get_seqno(engine)); + GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", + engine->name, + rq ? rq->global_seqno : 0, + intel_engine_get_seqno(engine), + yesno(stalled)); /* - * Try to restore the logical GPU state to match the continuation - * of the request queue. If we skip the context/PD restore, then - * the next request may try to execute assuming that its context - * is valid and loaded on the GPU and so may try to access invalid - * memory, prompting repeated GPU hangs. + * The guilty request will get skipped on a hung engine. * - * If the request was guilty, we still restore the logical state - * in case the next request requires it (e.g. the aliasing ppgtt), - * but skip over the hung batch. + * Users of client default contexts do not rely on logical + * state preserved between batches so it is safe to execute + * queued requests following the hang. Non default contexts + * rely on preserved state, so skipping a batch loses the + * evolution of the state and it needs to be considered corrupted. + * Executing more queued batches on top of corrupted state is + * risky. But we take the risk by trying to advance through + * the queued requests in order to make the client behaviour + * more predictable around resets, by not throwing away random + * amount of batches it has prepared for execution. Sophisticated + * clients can use gem_reset_stats_ioctl and dma fence status + * (exported via sync_file info ioctl on explicit fences) to observe + * when it loses the context state and should rebuild accordingly. * - * If the request was innocent, we try to replay the request with - * the restored context. + * The context ban, and ultimately the client ban, mechanism are safety + * valves if client submission ends up resulting in nothing more than + * subsequent hangs. */ + if (rq) { - /* If the rq hung, jump to its breadcrumb and skip the batch */ - rq->ring->head = intel_ring_wrap(rq->ring, rq->head); - if (rq->fence.error == -EIO) - skip_request(rq); + /* + * Try to restore the logical GPU state to match the + * continuation of the request queue. If we skip the + * context/PD restore, then the next request may try to execute + * assuming that its context is valid and loaded on the GPU and + * so may try to access invalid memory, prompting repeated GPU + * hangs. + * + * If the request was guilty, we still restore the logical + * state in case the next request requires it (e.g. the + * aliasing ppgtt), but skip over the hung batch. + * + * If the request was innocent, we try to replay the request + * with the restored context. + */ + i915_reset_request(rq, stalled); + + GEM_BUG_ON(rq->ring != engine->buffer); + head = rq->head; + } else { + head = engine->buffer->tail; } + engine->buffer->head = intel_ring_wrap(engine->buffer, head); + + spin_unlock_irqrestore(&tl->lock, flags); } static void reset_finish(struct intel_engine_cs *engine) @@ -836,11 +894,10 @@ static void cancel_requests(struct intel_engine_cs *engine) list_for_each_entry(request, &engine->timeline.requests, link) { GEM_BUG_ON(!request->global_seqno); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &request->fence.flags)) - continue; + if (!i915_request_signaled(request)) + dma_fence_set_error(&request->fence, -EIO); - dma_fence_set_error(&request->fence, -EIO); + i915_request_mark_complete(request); } intel_write_status_page(engine, @@ -862,29 +919,43 @@ static void i9xx_submit_request(struct i915_request *request) intel_ring_set_tail(request->ring, request->tail)); } -static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + *cs++ = MI_FLUSH; *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + + *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_INDEX_ADDR; *cs++ = rq->global_seqno; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } -static const int i9xx_emit_breadcrumb_sz = 6; #define GEN5_WA_STORES 8 /* must be at least 1! */ -static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + *cs++ = MI_FLUSH; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + BUILD_BUG_ON(GEN5_WA_STORES < 1); for (i = 0; i < GEN5_WA_STORES; i++) { *cs++ = MI_STORE_DWORD_INDEX; @@ -893,11 +964,13 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) } *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } -static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2; #undef GEN5_WA_STORES static void @@ -1124,6 +1197,10 @@ int intel_ring_pin(struct intel_ring *ring) GEM_BUG_ON(ring->vaddr); + ret = i915_timeline_pin(ring->timeline); + if (ret) + return ret; + flags = PIN_GLOBAL; /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ @@ -1140,28 +1217,32 @@ int intel_ring_pin(struct intel_ring *ring) else ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); if (unlikely(ret)) - return ret; + goto unpin_timeline; } ret = i915_vma_pin(vma, 0, 0, flags); if (unlikely(ret)) - return ret; + goto unpin_timeline; if (i915_vma_is_map_and_fenceable(vma)) addr = (void __force *)i915_vma_pin_iomap(vma); else addr = i915_gem_object_pin_map(vma->obj, map); - if (IS_ERR(addr)) - goto err; + if (IS_ERR(addr)) { + ret = PTR_ERR(addr); + goto unpin_ring; + } vma->obj->pin_global++; ring->vaddr = addr; return 0; -err: +unpin_ring: i915_vma_unpin(vma); - return PTR_ERR(addr); +unpin_timeline: + i915_timeline_unpin(ring->timeline); + return ret; } void intel_ring_reset(struct intel_ring *ring, u32 tail) @@ -1190,6 +1271,8 @@ void intel_ring_unpin(struct intel_ring *ring) ring->vma->obj->pin_global--; i915_vma_unpin(ring->vma); + + i915_timeline_unpin(ring->timeline); } static struct i915_vma * @@ -1500,13 +1583,18 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) struct intel_ring *ring; int err; - intel_engine_setup_common(engine); + err = intel_engine_setup_common(engine); + if (err) + return err; - timeline = i915_timeline_create(engine->i915, engine->name); + timeline = i915_timeline_create(engine->i915, + engine->name, + engine->status_page.vma); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); goto err; } + GEM_BUG_ON(timeline->has_initial_breadcrumb); ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); i915_timeline_put(timeline); @@ -1526,6 +1614,8 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (err) goto err_unpin; + GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma); + return 0; err_unpin: @@ -1858,6 +1948,7 @@ static int ring_request_alloc(struct i915_request *request) int ret; GEM_BUG_ON(!request->hw_context->pin_count); + GEM_BUG_ON(request->timeline->has_initial_breadcrumb); /* * Flush enough space to reduce the likelihood of waiting after @@ -2194,12 +2285,14 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->context_pin = intel_ring_context_pin; engine->request_alloc = ring_request_alloc; - engine->emit_breadcrumb = i9xx_emit_breadcrumb; - engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; - if (IS_GEN(dev_priv, 5)) { - engine->emit_breadcrumb = gen5_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz; - } + /* + * Using a global execution timeline; the previous final breadcrumb is + * equivalent to our next initial bread so we can elide + * engine->emit_init_breadcrumb(). + */ + engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb; + if (IS_GEN(dev_priv, 5)) + engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; engine->set_default_submission = i9xx_set_default_submission; @@ -2228,13 +2321,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 7) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; - engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz; + engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb; } else if (IS_GEN(dev_priv, 6)) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen6_render_ring_flush; - engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz; + engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb; } else if (IS_GEN(dev_priv, 5)) { engine->emit_flush = gen4_render_ring_flush; } else { @@ -2270,13 +2361,10 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) engine->emit_flush = gen6_bsd_ring_flush; engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; - if (IS_GEN(dev_priv, 6)) { - engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; - } else { - engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz; - } + if (IS_GEN(dev_priv, 6)) + engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; + else + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; } else { engine->emit_flush = bsd_ring_flush; if (IS_GEN(dev_priv, 5)) @@ -2299,13 +2387,10 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) engine->emit_flush = gen6_ring_flush; engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - if (IS_GEN(dev_priv, 6)) { - engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; - } else { - engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz; - } + if (IS_GEN(dev_priv, 6)) + engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; + else + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; return intel_init_ring_buffer(engine); } @@ -2323,8 +2408,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) engine->irq_enable = hsw_vebox_irq_enable; engine->irq_disable = hsw_vebox_irq_disable; - engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz; + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; return intel_init_ring_buffer(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 3c1366c58cf3..710ffb221775 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,7 @@ #include <drm/drm_util.h> #include <linux/hashtable.h> +#include <linux/irq_work.h> #include <linux/seqlock.h> #include "i915_gem_batch_pool.h" @@ -28,12 +29,11 @@ struct i915_sched_attr; * workarounds! */ #define CACHELINE_BYTES 64 -#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) +#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) struct intel_hw_status_page { struct i915_vma *vma; - u32 *page_addr; - u32 ggtt_offset; + u32 *addr; }; #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) @@ -120,13 +120,8 @@ struct intel_instdone { struct intel_engine_hangcheck { u64 acthd; u32 seqno; - enum intel_engine_hangcheck_action action; unsigned long action_timestamp; - int deadlock; struct intel_instdone instdone; - struct i915_request *active_request; - bool stalled:1; - bool wedged:1; }; struct intel_ring { @@ -209,6 +204,7 @@ struct i915_priolist { struct st_preempt_hang { struct completion completion; + unsigned int count; bool inject_hang; }; @@ -299,14 +295,18 @@ struct intel_engine_execlists { unsigned int port_mask; /** - * @queue_priority: Highest pending priority. + * @queue_priority_hint: Highest pending priority. * * When we add requests into the queue, or adjust the priority of * executing requests, we compute the maximum priority of those * pending requests. We can then use this value to determine if * we need to preempt the executing requests to service the queue. + * However, since the we may have recorded the priority of an inflight + * request we wanted to preempt but since completed, at the time of + * dequeuing the priority hint may no longer may match the highest + * available request priority. */ - int queue_priority; + int queue_priority_hint; /** * @queue: queue of requests, in priority lists @@ -382,23 +382,14 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - spinlock_t irq_lock; /* protects irq_*; irqsafe */ - struct intel_wait *irq_wait; /* oldest waiter by retirement */ + spinlock_t irq_lock; + struct list_head signalers; - spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ - struct rb_root waiters; /* sorted by retirement, priority */ - struct list_head signals; /* sorted by retirement */ - struct task_struct *signaler; /* used for fence signalling */ + struct irq_work irq_work; /* for use from inside irq_lock */ - struct timer_list fake_irq; /* used after a missed interrupt */ - struct timer_list hangcheck; /* detect missed interrupts */ - - unsigned int hangcheck_interrupts; unsigned int irq_enabled; - unsigned int irq_count; - bool irq_armed : 1; - I915_SELFTEST_DECLARE(bool mock : 1); + bool irq_armed; } breadcrumbs; struct { @@ -412,16 +403,17 @@ struct intel_engine_cs { /** * @enable_count: Reference count for the enabled samplers. * - * Index number corresponds to the bit number from @enable. + * Index number corresponds to @enum drm_i915_pmu_engine_sample. */ - unsigned int enable_count[I915_PMU_SAMPLE_BITS]; + unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; /** * @sample: Counter values for sampling events. * * Our internal timer stores the current counters in this field. + * + * Index number corresponds to @enum drm_i915_pmu_engine_sample. */ -#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) - struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; + struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; } pmu; /* @@ -445,9 +437,8 @@ struct intel_engine_cs { int (*init_hw)(struct intel_engine_cs *engine); struct { - struct i915_request *(*prepare)(struct intel_engine_cs *engine); - void (*reset)(struct intel_engine_cs *engine, - struct i915_request *rq); + void (*prepare)(struct intel_engine_cs *engine); + void (*reset)(struct intel_engine_cs *engine, bool stalled); void (*finish)(struct intel_engine_cs *engine); } reset; @@ -471,8 +462,10 @@ struct intel_engine_cs { unsigned int dispatch_flags); #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) - void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); - int emit_breadcrumb_sz; + int (*emit_init_breadcrumb)(struct i915_request *rq); + u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, + u32 *cs); + unsigned int emit_fini_breadcrumb_dw; /* Pass the request to the hardware queue (e.g. directly into * the legacy ringbuffer or to the end of an execlist). @@ -600,7 +593,20 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine) static inline bool __execlists_need_preempt(int prio, int last) { - return prio > max(0, last); + /* + * Allow preemption of low -> normal -> high, but we do + * not allow low priority tasks to preempt other low priority + * tasks under the impression that latency for low priority + * tasks does not matter (as much as background throughput), + * so kiss. + * + * More naturally we would write + * prio >= max(0, last); + * except that we wish to prevent triggering preemption at the same + * priority level: the task that is running should remain running + * to preserve FIFO ordering of dependencies. + */ + return prio > max(I915_PRIORITY_NORMAL - 1, last); } static inline void @@ -678,7 +684,7 @@ static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { /* Ensure that the compiler doesn't optimize away the load. */ - return READ_ONCE(engine->status_page.page_addr[reg]); + return READ_ONCE(engine->status_page.addr[reg]); } static inline void @@ -691,12 +697,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) */ if (static_cpu_has(X86_FEATURE_CLFLUSH)) { mb(); - clflush(&engine->status_page.page_addr[reg]); - engine->status_page.page_addr[reg] = value; - clflush(&engine->status_page.page_addr[reg]); + clflush(&engine->status_page.addr[reg]); + engine->status_page.addr[reg] = value; + clflush(&engine->status_page.addr[reg]); mb(); } else { - WRITE_ONCE(engine->status_page.page_addr[reg], value); + WRITE_ONCE(engine->status_page.addr[reg], value); } } @@ -717,11 +723,13 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) * The area from dword 0x30 to 0x3ff is available for driver usage. */ #define I915_GEM_HWS_INDEX 0x30 -#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_PREEMPT_INDEX 0x32 -#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_SCRATCH_INDEX 0x40 -#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) +#define I915_GEM_HWS_PREEMPT 0x32 +#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) +#define I915_GEM_HWS_SEQNO 0x40 +#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) +#define I915_GEM_HWS_SCRATCH 0x80 +#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) #define I915_HWS_CSB_BUF0_INDEX 0x10 #define I915_HWS_CSB_WRITE_INDEX 0x1f @@ -824,9 +832,21 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) return tail; } +static inline unsigned int +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) +{ + /* + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + */ + GEM_BUG_ON(!is_power_of_2(size)); + return (head - tail - CACHELINE_BYTES) & (size - 1); +} + void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); -void intel_engine_setup_common(struct intel_engine_cs *engine); +int intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); @@ -884,93 +904,29 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine, void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ - return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; -} - -static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) -{ - return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; -} - -/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ -int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); - -static inline void intel_wait_init(struct intel_wait *wait) -{ - wait->tsk = current; - wait->request = NULL; -} - -static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) -{ - wait->tsk = current; - wait->seqno = seqno; -} - -static inline bool intel_wait_has_seqno(const struct intel_wait *wait) -{ - return wait->seqno; -} - -static inline bool -intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) -{ - wait->seqno = seqno; - return intel_wait_has_seqno(wait); -} - -static inline bool -intel_wait_update_request(struct intel_wait *wait, - const struct i915_request *rq) -{ - return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); -} - -static inline bool -intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) -{ - return wait->seqno == seqno; -} - -static inline bool -intel_wait_check_request(const struct intel_wait *wait, - const struct i915_request *rq) -{ - return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); -} +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -static inline bool intel_wait_complete(const struct intel_wait *wait) -{ - return RB_EMPTY_NODE(&wait->node); -} +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); -bool intel_engine_add_wait(struct intel_engine_cs *engine, - struct intel_wait *wait); -void intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait); -bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); -void intel_engine_cancel_signaling(struct i915_request *request); +bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); -static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) +static inline void +intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine) { - return READ_ONCE(engine->breadcrumbs.irq_wait); + irq_work_queue(&engine->breadcrumbs.irq_work); } -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); -#define ENGINE_WAKEUP_WAITER BIT(0) -#define ENGINE_WAKEUP_ASLEEP BIT(1) - -void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); -void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); - -void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); +bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine); void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p); + static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) { memset(batch, 0, 6 * sizeof(u32)); @@ -1019,6 +975,13 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) return cs; } +static inline void intel_engine_reset(struct intel_engine_cs *engine, + bool stalled) +{ + if (engine->reset.reset) + engine->reset.reset(engine, stalled); +} + void intel_engines_sanitize(struct drm_i915_private *i915, bool force); bool intel_engine_is_idle(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 9e9501f82f06..a017a4232c0f 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -29,6 +29,8 @@ #include <linux/pm_runtime.h> #include <linux/vgaarb.h> +#include <drm/drm_print.h> + #include "i915_drv.h" #include "intel_drv.h" @@ -49,6 +51,268 @@ * present for a given platform. */ +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) + +#include <linux/sort.h> + +#define STACKDEPTH 8 + +static noinline depot_stack_handle_t __save_depot_stack(void) +{ + unsigned long entries[STACKDEPTH]; + struct stack_trace trace = { + .entries = entries, + .max_entries = ARRAY_SIZE(entries), + .skip = 1, + }; + + save_stack_trace(&trace); + if (trace.nr_entries && + trace.entries[trace.nr_entries - 1] == ULONG_MAX) + trace.nr_entries--; + + return depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN); +} + +static void __print_depot_stack(depot_stack_handle_t stack, + char *buf, int sz, int indent) +{ + unsigned long entries[STACKDEPTH]; + struct stack_trace trace = { + .entries = entries, + .max_entries = ARRAY_SIZE(entries), + }; + + depot_fetch_stack(stack, &trace); + snprint_stack_trace(buf, sz, &trace, indent); +} + +static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +{ + struct i915_runtime_pm *rpm = &i915->runtime_pm; + + spin_lock_init(&rpm->debug.lock); +} + +static noinline depot_stack_handle_t +track_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +{ + struct i915_runtime_pm *rpm = &i915->runtime_pm; + depot_stack_handle_t stack, *stacks; + unsigned long flags; + + atomic_inc(&rpm->wakeref_count); + assert_rpm_wakelock_held(i915); + + if (!HAS_RUNTIME_PM(i915)) + return -1; + + stack = __save_depot_stack(); + if (!stack) + return -1; + + spin_lock_irqsave(&rpm->debug.lock, flags); + + if (!rpm->debug.count) + rpm->debug.last_acquire = stack; + + stacks = krealloc(rpm->debug.owners, + (rpm->debug.count + 1) * sizeof(*stacks), + GFP_NOWAIT | __GFP_NOWARN); + if (stacks) { + stacks[rpm->debug.count++] = stack; + rpm->debug.owners = stacks; + } else { + stack = -1; + } + + spin_unlock_irqrestore(&rpm->debug.lock, flags); + + return stack; +} + +static void cancel_intel_runtime_pm_wakeref(struct drm_i915_private *i915, + depot_stack_handle_t stack) +{ + struct i915_runtime_pm *rpm = &i915->runtime_pm; + unsigned long flags, n; + bool found = false; + + if (unlikely(stack == -1)) + return; + + spin_lock_irqsave(&rpm->debug.lock, flags); + for (n = rpm->debug.count; n--; ) { + if (rpm->debug.owners[n] == stack) { + memmove(rpm->debug.owners + n, + rpm->debug.owners + n + 1, + (--rpm->debug.count - n) * sizeof(stack)); + found = true; + break; + } + } + spin_unlock_irqrestore(&rpm->debug.lock, flags); + + if (WARN(!found, + "Unmatched wakeref (tracking %lu), count %u\n", + rpm->debug.count, atomic_read(&rpm->wakeref_count))) { + char *buf; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return; + + __print_depot_stack(stack, buf, PAGE_SIZE, 2); + DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf); + + stack = READ_ONCE(rpm->debug.last_release); + if (stack) { + __print_depot_stack(stack, buf, PAGE_SIZE, 2); + DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf); + } + + kfree(buf); + } +} + +static int cmphandle(const void *_a, const void *_b) +{ + const depot_stack_handle_t * const a = _a, * const b = _b; + + if (*a < *b) + return -1; + else if (*a > *b) + return 1; + else + return 0; +} + +static void +__print_intel_runtime_pm_wakeref(struct drm_printer *p, + const struct intel_runtime_pm_debug *dbg) +{ + unsigned long i; + char *buf; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return; + + if (dbg->last_acquire) { + __print_depot_stack(dbg->last_acquire, buf, PAGE_SIZE, 2); + drm_printf(p, "Wakeref last acquired:\n%s", buf); + } + + if (dbg->last_release) { + __print_depot_stack(dbg->last_release, buf, PAGE_SIZE, 2); + drm_printf(p, "Wakeref last released:\n%s", buf); + } + + drm_printf(p, "Wakeref count: %lu\n", dbg->count); + + sort(dbg->owners, dbg->count, sizeof(*dbg->owners), cmphandle, NULL); + + for (i = 0; i < dbg->count; i++) { + depot_stack_handle_t stack = dbg->owners[i]; + unsigned long rep; + + rep = 1; + while (i + 1 < dbg->count && dbg->owners[i + 1] == stack) + rep++, i++; + __print_depot_stack(stack, buf, PAGE_SIZE, 2); + drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf); + } + + kfree(buf); +} + +static noinline void +untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +{ + struct i915_runtime_pm *rpm = &i915->runtime_pm; + struct intel_runtime_pm_debug dbg = {}; + struct drm_printer p; + unsigned long flags; + + assert_rpm_wakelock_held(i915); + if (atomic_dec_and_lock_irqsave(&rpm->wakeref_count, + &rpm->debug.lock, + flags)) { + dbg = rpm->debug; + + rpm->debug.owners = NULL; + rpm->debug.count = 0; + rpm->debug.last_release = __save_depot_stack(); + + spin_unlock_irqrestore(&rpm->debug.lock, flags); + } + if (!dbg.count) + return; + + p = drm_debug_printer("i915"); + __print_intel_runtime_pm_wakeref(&p, &dbg); + + kfree(dbg.owners); +} + +void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915, + struct drm_printer *p) +{ + struct intel_runtime_pm_debug dbg = {}; + + do { + struct i915_runtime_pm *rpm = &i915->runtime_pm; + unsigned long alloc = dbg.count; + depot_stack_handle_t *s; + + spin_lock_irq(&rpm->debug.lock); + dbg.count = rpm->debug.count; + if (dbg.count <= alloc) { + memcpy(dbg.owners, + rpm->debug.owners, + dbg.count * sizeof(*s)); + } + dbg.last_acquire = rpm->debug.last_acquire; + dbg.last_release = rpm->debug.last_release; + spin_unlock_irq(&rpm->debug.lock); + if (dbg.count <= alloc) + break; + + s = krealloc(dbg.owners, dbg.count * sizeof(*s), GFP_KERNEL); + if (!s) + goto out; + + dbg.owners = s; + } while (1); + + __print_intel_runtime_pm_wakeref(p, &dbg); + +out: + kfree(dbg.owners); +} + +#else + +static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +{ +} + +static depot_stack_handle_t +track_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +{ + atomic_inc(&i915->runtime_pm.wakeref_count); + assert_rpm_wakelock_held(i915); + return -1; +} + +static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +{ + assert_rpm_wakelock_held(i915); + atomic_dec(&i915->runtime_pm.wakeref_count); +} + +#endif + bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, enum i915_power_well_id power_well_id); @@ -639,10 +903,10 @@ void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv) * back on and register state is restored. This is guaranteed by the MMIO write * to DC_STATE_EN blocking until the state is restored. */ -static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state) +static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state) { - uint32_t val; - uint32_t mask; + u32 val; + u32 mask; if (WARN_ON_ONCE(state & ~dev_priv->csr.allowed_dc_mask)) state &= dev_priv->csr.allowed_dc_mask; @@ -1274,7 +1538,7 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, { enum dpio_phy phy; enum pipe pipe; - uint32_t tmp; + u32 tmp; WARN_ON_ONCE(power_well->desc->id != VLV_DISP_PW_DPIO_CMN_BC && power_well->desc->id != CHV_DISP_PW_DPIO_CMN_D); @@ -1591,18 +1855,19 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv, * Any power domain reference obtained by this function must have a symmetric * call to intel_display_power_put() to release the reference again. */ -void intel_display_power_get(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain) { struct i915_power_domains *power_domains = &dev_priv->power_domains; - - intel_runtime_pm_get(dev_priv); + intel_wakeref_t wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&power_domains->lock); __intel_display_power_get_domain(dev_priv, domain); mutex_unlock(&power_domains->lock); + + return wakeref; } /** @@ -1617,13 +1882,16 @@ void intel_display_power_get(struct drm_i915_private *dev_priv, * Any power domain reference obtained by this function must have a symmetric * call to intel_display_power_put() to release the reference again. */ -bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +intel_wakeref_t +intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain) { struct i915_power_domains *power_domains = &dev_priv->power_domains; + intel_wakeref_t wakeref; bool is_enabled; - if (!intel_runtime_pm_get_if_in_use(dev_priv)) + wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + if (!wakeref) return false; mutex_lock(&power_domains->lock); @@ -1637,23 +1905,16 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, mutex_unlock(&power_domains->lock); - if (!is_enabled) - intel_runtime_pm_put(dev_priv); + if (!is_enabled) { + intel_runtime_pm_put(dev_priv, wakeref); + wakeref = 0; + } - return is_enabled; + return wakeref; } -/** - * intel_display_power_put - release a power domain reference - * @dev_priv: i915 device instance - * @domain: power domain to reference - * - * This function drops the power domain reference obtained by - * intel_display_power_get() and might power down the corresponding hardware - * block right away if this is the last reference. - */ -void intel_display_power_put(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +static void __intel_display_power_put(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain) { struct i915_power_domains *power_domains; struct i915_power_well *power_well; @@ -1671,9 +1932,33 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, intel_power_well_put(dev_priv, power_well); mutex_unlock(&power_domains->lock); +} + +/** + * intel_display_power_put - release a power domain reference + * @dev_priv: i915 device instance + * @domain: power domain to reference + * + * This function drops the power domain reference obtained by + * intel_display_power_get() and might power down the corresponding hardware + * block right away if this is the last reference. + */ +void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain) +{ + __intel_display_power_put(dev_priv, domain); + intel_runtime_pm_put_unchecked(dev_priv); +} - intel_runtime_pm_put(dev_priv); +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) +void intel_display_power_put(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain, + intel_wakeref_t wakeref) +{ + __intel_display_power_put(dev_priv, domain); + intel_runtime_pm_put(dev_priv, wakeref); } +#endif #define I830_PIPES_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_PIPE_A) | \ @@ -3043,10 +3328,10 @@ sanitize_disable_power_well_option(const struct drm_i915_private *dev_priv, return 1; } -static uint32_t get_allowed_dc_mask(const struct drm_i915_private *dev_priv, - int enable_dc) +static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, + int enable_dc) { - uint32_t mask; + u32 mask; int requested_dc; int max_dc; @@ -3311,7 +3596,7 @@ static void icl_dbuf_disable(struct drm_i915_private *dev_priv) static void icl_mbus_init(struct drm_i915_private *dev_priv) { - uint32_t val; + u32 val; val = MBUS_ABOX_BT_CREDIT_POOL1(16) | MBUS_ABOX_BT_CREDIT_POOL2(16) | @@ -3622,7 +3907,7 @@ static void chv_phy_control_init(struct drm_i915_private *dev_priv) * current lane status. */ if (cmn_bc->desc->ops->is_enabled(dev_priv, cmn_bc)) { - uint32_t status = I915_READ(DPLL(PIPE_A)); + u32 status = I915_READ(DPLL(PIPE_A)); unsigned int mask; mask = status & DPLL_PORTB_READY_MASK; @@ -3653,7 +3938,7 @@ static void chv_phy_control_init(struct drm_i915_private *dev_priv) } if (cmn_d->desc->ops->is_enabled(dev_priv, cmn_d)) { - uint32_t status = I915_READ(DPIO_PHY_STATUS); + u32 status = I915_READ(DPIO_PHY_STATUS); unsigned int mask; mask = status & DPLL_PORTD_READY_MASK; @@ -3712,7 +3997,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv); /** * intel_power_domains_init_hw - initialize hardware power domain state - * @dev_priv: i915 device instance + * @i915: i915 device instance * @resume: Called from resume code paths or not * * This function initializes the hardware power domain state and enables all @@ -3726,30 +4011,31 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv); * intel_power_domains_enable()) and must be paired with * intel_power_domains_fini_hw(). */ -void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) +void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_domains *power_domains = &i915->power_domains; power_domains->initializing = true; - if (IS_ICELAKE(dev_priv)) { - icl_display_core_init(dev_priv, resume); - } else if (IS_CANNONLAKE(dev_priv)) { - cnl_display_core_init(dev_priv, resume); - } else if (IS_GEN9_BC(dev_priv)) { - skl_display_core_init(dev_priv, resume); - } else if (IS_GEN9_LP(dev_priv)) { - bxt_display_core_init(dev_priv, resume); - } else if (IS_CHERRYVIEW(dev_priv)) { + if (IS_ICELAKE(i915)) { + icl_display_core_init(i915, resume); + } else if (IS_CANNONLAKE(i915)) { + cnl_display_core_init(i915, resume); + } else if (IS_GEN9_BC(i915)) { + skl_display_core_init(i915, resume); + } else if (IS_GEN9_LP(i915)) { + bxt_display_core_init(i915, resume); + } else if (IS_CHERRYVIEW(i915)) { mutex_lock(&power_domains->lock); - chv_phy_control_init(dev_priv); + chv_phy_control_init(i915); mutex_unlock(&power_domains->lock); - } else if (IS_VALLEYVIEW(dev_priv)) { + } else if (IS_VALLEYVIEW(i915)) { mutex_lock(&power_domains->lock); - vlv_cmnlane_wa(dev_priv); + vlv_cmnlane_wa(i915); mutex_unlock(&power_domains->lock); - } else if (IS_IVYBRIDGE(dev_priv) || INTEL_GEN(dev_priv) >= 7) - intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); + } else if (IS_IVYBRIDGE(i915) || INTEL_GEN(i915) >= 7) { + intel_pch_reset_handshake(i915, !HAS_PCH_NOP(i915)); + } /* * Keep all power wells enabled for any dependent HW access during @@ -3757,18 +4043,20 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) * resources powered until display HW readout is complete. We drop * this reference in intel_power_domains_enable(). */ - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + power_domains->wakeref = + intel_display_power_get(i915, POWER_DOMAIN_INIT); + /* Disable power support if the user asked so. */ if (!i915_modparams.disable_power_well) - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); - intel_power_domains_sync_hw(dev_priv); + intel_display_power_get(i915, POWER_DOMAIN_INIT); + intel_power_domains_sync_hw(i915); power_domains->initializing = false; } /** * intel_power_domains_fini_hw - deinitialize hw power domain state - * @dev_priv: i915 device instance + * @i915: i915 device instance * * De-initializes the display power domain HW state. It also ensures that the * device stays powered up so that the driver can be reloaded. @@ -3777,21 +4065,24 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) * intel_power_domains_disable()) and must be paired with * intel_power_domains_init_hw(). */ -void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv) +void intel_power_domains_fini_hw(struct drm_i915_private *i915) { - /* Keep the power well enabled, but cancel its rpm wakeref. */ - intel_runtime_pm_put(dev_priv); + intel_wakeref_t wakeref __maybe_unused = + fetch_and_zero(&i915->power_domains.wakeref); /* Remove the refcount we took to keep power well support disabled. */ if (!i915_modparams.disable_power_well) - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT); + + intel_power_domains_verify_state(i915); - intel_power_domains_verify_state(dev_priv); + /* Keep the power well enabled, but cancel its rpm wakeref. */ + intel_runtime_pm_put(i915, wakeref); } /** * intel_power_domains_enable - enable toggling of display power wells - * @dev_priv: i915 device instance + * @i915: i915 device instance * * Enable the ondemand enabling/disabling of the display power wells. Note that * power wells not belonging to POWER_DOMAIN_INIT are allowed to be toggled @@ -3801,30 +4092,36 @@ void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv) * of display HW readout (which will acquire the power references reflecting * the current HW state). */ -void intel_power_domains_enable(struct drm_i915_private *dev_priv) +void intel_power_domains_enable(struct drm_i915_private *i915) { - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_wakeref_t wakeref __maybe_unused = + fetch_and_zero(&i915->power_domains.wakeref); - intel_power_domains_verify_state(dev_priv); + intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref); + intel_power_domains_verify_state(i915); } /** * intel_power_domains_disable - disable toggling of display power wells - * @dev_priv: i915 device instance + * @i915: i915 device instance * * Disable the ondemand enabling/disabling of the display power wells. See * intel_power_domains_enable() for which power wells this call controls. */ -void intel_power_domains_disable(struct drm_i915_private *dev_priv) +void intel_power_domains_disable(struct drm_i915_private *i915) { - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + struct i915_power_domains *power_domains = &i915->power_domains; - intel_power_domains_verify_state(dev_priv); + WARN_ON(power_domains->wakeref); + power_domains->wakeref = + intel_display_power_get(i915, POWER_DOMAIN_INIT); + + intel_power_domains_verify_state(i915); } /** * intel_power_domains_suspend - suspend power domain state - * @dev_priv: i915 device instance + * @i915: i915 device instance * @suspend_mode: specifies the target suspend state (idle, mem, hibernation) * * This function prepares the hardware power domain state before entering @@ -3833,12 +4130,14 @@ void intel_power_domains_disable(struct drm_i915_private *dev_priv) * It must be called with power domains already disabled (after a call to * intel_power_domains_disable()) and paired with intel_power_domains_resume(). */ -void intel_power_domains_suspend(struct drm_i915_private *dev_priv, +void intel_power_domains_suspend(struct drm_i915_private *i915, enum i915_drm_suspend_mode suspend_mode) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_domains *power_domains = &i915->power_domains; + intel_wakeref_t wakeref __maybe_unused = + fetch_and_zero(&power_domains->wakeref); - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref); /* * In case of suspend-to-idle (aka S0ix) on a DMC platform without DC9 @@ -3847,10 +4146,10 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv, * resources as required and also enable deeper system power states * that would be blocked if the firmware was inactive. */ - if (!(dev_priv->csr.allowed_dc_mask & DC_STATE_EN_DC9) && + if (!(i915->csr.allowed_dc_mask & DC_STATE_EN_DC9) && suspend_mode == I915_DRM_SUSPEND_IDLE && - dev_priv->csr.dmc_payload != NULL) { - intel_power_domains_verify_state(dev_priv); + i915->csr.dmc_payload) { + intel_power_domains_verify_state(i915); return; } @@ -3859,25 +4158,25 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv, * power wells if power domains must be deinitialized for suspend. */ if (!i915_modparams.disable_power_well) { - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - intel_power_domains_verify_state(dev_priv); + intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT); + intel_power_domains_verify_state(i915); } - if (IS_ICELAKE(dev_priv)) - icl_display_core_uninit(dev_priv); - else if (IS_CANNONLAKE(dev_priv)) - cnl_display_core_uninit(dev_priv); - else if (IS_GEN9_BC(dev_priv)) - skl_display_core_uninit(dev_priv); - else if (IS_GEN9_LP(dev_priv)) - bxt_display_core_uninit(dev_priv); + if (IS_ICELAKE(i915)) + icl_display_core_uninit(i915); + else if (IS_CANNONLAKE(i915)) + cnl_display_core_uninit(i915); + else if (IS_GEN9_BC(i915)) + skl_display_core_uninit(i915); + else if (IS_GEN9_LP(i915)) + bxt_display_core_uninit(i915); power_domains->display_core_suspended = true; } /** * intel_power_domains_resume - resume power domain state - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function resume the hardware power domain state during system resume. * @@ -3885,28 +4184,30 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv, * intel_power_domains_enable()) and must be paired with * intel_power_domains_suspend(). */ -void intel_power_domains_resume(struct drm_i915_private *dev_priv) +void intel_power_domains_resume(struct drm_i915_private *i915) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_domains *power_domains = &i915->power_domains; if (power_domains->display_core_suspended) { - intel_power_domains_init_hw(dev_priv, true); + intel_power_domains_init_hw(i915, true); power_domains->display_core_suspended = false; } else { - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + WARN_ON(power_domains->wakeref); + power_domains->wakeref = + intel_display_power_get(i915, POWER_DOMAIN_INIT); } - intel_power_domains_verify_state(dev_priv); + intel_power_domains_verify_state(i915); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) -static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv) +static void intel_power_domains_dump_info(struct drm_i915_private *i915) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_domains *power_domains = &i915->power_domains; struct i915_power_well *power_well; - for_each_power_well(dev_priv, power_well) { + for_each_power_well(i915, power_well) { enum intel_display_power_domain domain; DRM_DEBUG_DRIVER("%-25s %d\n", @@ -3921,7 +4222,7 @@ static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv) /** * intel_power_domains_verify_state - verify the HW/SW state for all power wells - * @dev_priv: i915 device instance + * @i915: i915 device instance * * Verify if the reference count of each power well matches its HW enabled * state and the total refcount of the domains it belongs to. This must be @@ -3929,22 +4230,21 @@ static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv) * acquiring reference counts for any power wells in use and disabling the * ones left on by BIOS but not required by any active output. */ -static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) +static void intel_power_domains_verify_state(struct drm_i915_private *i915) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_domains *power_domains = &i915->power_domains; struct i915_power_well *power_well; bool dump_domain_info; mutex_lock(&power_domains->lock); dump_domain_info = false; - for_each_power_well(dev_priv, power_well) { + for_each_power_well(i915, power_well) { enum intel_display_power_domain domain; int domains_count; bool enabled; - enabled = power_well->desc->ops->is_enabled(dev_priv, - power_well); + enabled = power_well->desc->ops->is_enabled(i915, power_well); if ((power_well->count || power_well->desc->always_on) != enabled) DRM_ERROR("power well %s state mismatch (refcount %d/enabled %d)", @@ -3968,7 +4268,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) static bool dumped; if (!dumped) { - intel_power_domains_dump_info(dev_priv); + intel_power_domains_dump_info(i915); dumped = true; } } @@ -3978,7 +4278,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) #else -static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) +static void intel_power_domains_verify_state(struct drm_i915_private *i915) { } @@ -3986,30 +4286,31 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) /** * intel_runtime_pm_get - grab a runtime pm reference - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function grabs a device-level runtime pm reference (mostly used for GEM * code to ensure the GTT or GT is on) and ensures that it is powered up. * * Any runtime pm reference obtained by this function must have a symmetric * call to intel_runtime_pm_put() to release the reference again. + * + * Returns: the wakeref cookie to pass to intel_runtime_pm_put() */ -void intel_runtime_pm_get(struct drm_i915_private *dev_priv) +intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915) { - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; int ret; ret = pm_runtime_get_sync(kdev); WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); - atomic_inc(&dev_priv->runtime_pm.wakeref_count); - assert_rpm_wakelock_held(dev_priv); + return track_intel_runtime_pm_wakeref(i915); } /** * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function grabs a device-level runtime pm reference if the device is * already in use and ensures that it is powered up. It is illegal to try @@ -4018,12 +4319,13 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) * Any runtime pm reference obtained by this function must have a symmetric * call to intel_runtime_pm_put() to release the reference again. * - * Returns: True if the wakeref was acquired, or False otherwise. + * Returns: the wakeref cookie to pass to intel_runtime_pm_put(), evaluates + * as True if the wakeref was acquired, or False otherwise. */ -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) +intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915) { if (IS_ENABLED(CONFIG_PM)) { - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; /* @@ -4033,18 +4335,15 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) * atm to the late/early system suspend/resume handlers. */ if (pm_runtime_get_if_in_use(kdev) <= 0) - return false; + return 0; } - atomic_inc(&dev_priv->runtime_pm.wakeref_count); - assert_rpm_wakelock_held(dev_priv); - - return true; + return track_intel_runtime_pm_wakeref(i915); } /** * intel_runtime_pm_get_noresume - grab a runtime pm reference - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function grabs a device-level runtime pm reference (mostly used for GEM * code to ensure the GTT or GT is on). @@ -4058,41 +4357,50 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) * * Any runtime pm reference obtained by this function must have a symmetric * call to intel_runtime_pm_put() to release the reference again. + * + * Returns: the wakeref cookie to pass to intel_runtime_pm_put() */ -void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv) +intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915) { - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; - assert_rpm_wakelock_held(dev_priv); + assert_rpm_wakelock_held(i915); pm_runtime_get_noresume(kdev); - atomic_inc(&dev_priv->runtime_pm.wakeref_count); + return track_intel_runtime_pm_wakeref(i915); } /** * intel_runtime_pm_put - release a runtime pm reference - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function drops the device-level runtime pm reference obtained by * intel_runtime_pm_get() and might power down the corresponding * hardware block right away if this is the last reference. */ -void intel_runtime_pm_put(struct drm_i915_private *dev_priv) +void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915) { - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; - assert_rpm_wakelock_held(dev_priv); - atomic_dec(&dev_priv->runtime_pm.wakeref_count); + untrack_intel_runtime_pm_wakeref(i915); pm_runtime_mark_last_busy(kdev); pm_runtime_put_autosuspend(kdev); } +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) +void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref) +{ + cancel_intel_runtime_pm_wakeref(i915, wref); + intel_runtime_pm_put_unchecked(i915); +} +#endif + /** * intel_runtime_pm_enable - enable runtime pm - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function enables runtime pm at the end of the driver load sequence. * @@ -4100,9 +4408,9 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv) * subordinate display power domains. That is done by * intel_power_domains_enable(). */ -void intel_runtime_pm_enable(struct drm_i915_private *dev_priv) +void intel_runtime_pm_enable(struct drm_i915_private *i915) { - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; /* @@ -4124,7 +4432,7 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv) * so the driver's own RPM reference tracking asserts also work on * platforms without RPM support. */ - if (!HAS_RUNTIME_PM(dev_priv)) { + if (!HAS_RUNTIME_PM(i915)) { int ret; pm_runtime_dont_use_autosuspend(kdev); @@ -4142,17 +4450,35 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv) pm_runtime_put_autosuspend(kdev); } -void intel_runtime_pm_disable(struct drm_i915_private *dev_priv) +void intel_runtime_pm_disable(struct drm_i915_private *i915) { - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; /* Transfer rpm ownership back to core */ - WARN(pm_runtime_get_sync(&dev_priv->drm.pdev->dev) < 0, + WARN(pm_runtime_get_sync(kdev) < 0, "Failed to pass rpm ownership back to core\n"); pm_runtime_dont_use_autosuspend(kdev); - if (!HAS_RUNTIME_PM(dev_priv)) + if (!HAS_RUNTIME_PM(i915)) pm_runtime_put(kdev); } + +void intel_runtime_pm_cleanup(struct drm_i915_private *i915) +{ + struct i915_runtime_pm *rpm = &i915->runtime_pm; + int count; + + count = atomic_fetch_inc(&rpm->wakeref_count); /* balance untrack */ + WARN(count, + "i915->runtime_pm.wakeref_count=%d on cleanup\n", + count); + + untrack_intel_runtime_pm_wakeref(i915); +} + +void intel_runtime_pm_init_early(struct drm_i915_private *i915) +{ + init_intel_runtime_pm_wakeref(i915); +} diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index df2d830a7405..e7b0884ba5a5 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -76,7 +76,7 @@ struct intel_sdvo { i915_reg_t sdvo_reg; /* Active outputs controlled by this SDVO output */ - uint16_t controlled_output; + u16 controlled_output; /* * Capabilities of the SDVO device returned by @@ -91,12 +91,12 @@ struct intel_sdvo { * For multiple function SDVO device, * this is for current attached outputs. */ - uint16_t attached_output; + u16 attached_output; /* * Hotplug activation bits for this device */ - uint16_t hotplug_active; + u16 hotplug_active; enum port port; @@ -104,19 +104,19 @@ struct intel_sdvo { bool has_hdmi_audio; /* DDC bus used by this SDVO encoder */ - uint8_t ddc_bus; + u8 ddc_bus; /* * the sdvo flag gets lost in round trip: dtd->adjusted_mode->dtd */ - uint8_t dtd_sdvo_flags; + u8 dtd_sdvo_flags; }; struct intel_sdvo_connector { struct intel_connector base; /* Mark the type of connector */ - uint16_t output_flag; + u16 output_flag; /* This contains all current supported TV format */ u8 tv_format_supported[TV_FORMAT_NUM]; @@ -184,7 +184,7 @@ to_intel_sdvo_connector(struct drm_connector *connector) container_of((conn_state), struct intel_sdvo_connector_state, base.base) static bool -intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, uint16_t flags); +intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags); static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, struct intel_sdvo_connector *intel_sdvo_connector, @@ -746,9 +746,9 @@ static bool intel_sdvo_get_input_timing(struct intel_sdvo *intel_sdvo, static bool intel_sdvo_create_preferred_input_timing(struct intel_sdvo *intel_sdvo, struct intel_sdvo_connector *intel_sdvo_connector, - uint16_t clock, - uint16_t width, - uint16_t height) + u16 clock, + u16 width, + u16 height) { struct intel_sdvo_preferred_input_timing_args args; @@ -791,9 +791,9 @@ static bool intel_sdvo_set_clock_rate_mult(struct intel_sdvo *intel_sdvo, u8 val static void intel_sdvo_get_dtd_from_mode(struct intel_sdvo_dtd *dtd, const struct drm_display_mode *mode) { - uint16_t width, height; - uint16_t h_blank_len, h_sync_len, v_blank_len, v_sync_len; - uint16_t h_sync_offset, v_sync_offset; + u16 width, height; + u16 h_blank_len, h_sync_len, v_blank_len, v_sync_len; + u16 h_sync_offset, v_sync_offset; int mode_clock; memset(dtd, 0, sizeof(*dtd)); @@ -898,13 +898,13 @@ static bool intel_sdvo_check_supp_encode(struct intel_sdvo *intel_sdvo) } static bool intel_sdvo_set_encode(struct intel_sdvo *intel_sdvo, - uint8_t mode) + u8 mode) { return intel_sdvo_set_value(intel_sdvo, SDVO_CMD_SET_ENCODE, &mode, 1); } static bool intel_sdvo_set_colorimetry(struct intel_sdvo *intel_sdvo, - uint8_t mode) + u8 mode) { return intel_sdvo_set_value(intel_sdvo, SDVO_CMD_SET_COLORIMETRY, &mode, 1); } @@ -913,11 +913,11 @@ static bool intel_sdvo_set_colorimetry(struct intel_sdvo *intel_sdvo, static void intel_sdvo_dump_hdmi_buf(struct intel_sdvo *intel_sdvo) { int i, j; - uint8_t set_buf_index[2]; - uint8_t av_split; - uint8_t buf_size; - uint8_t buf[48]; - uint8_t *pos; + u8 set_buf_index[2]; + u8 av_split; + u8 buf_size; + u8 buf[48]; + u8 *pos; intel_sdvo_get_value(encoder, SDVO_CMD_GET_HBUF_AV_SPLIT, &av_split, 1); @@ -940,11 +940,11 @@ static void intel_sdvo_dump_hdmi_buf(struct intel_sdvo *intel_sdvo) #endif static bool intel_sdvo_write_infoframe(struct intel_sdvo *intel_sdvo, - unsigned if_index, uint8_t tx_rate, - const uint8_t *data, unsigned length) + unsigned int if_index, u8 tx_rate, + const u8 *data, unsigned int length) { - uint8_t set_buf_index[2] = { if_index, 0 }; - uint8_t hbuf_size, tmp[8]; + u8 set_buf_index[2] = { if_index, 0 }; + u8 hbuf_size, tmp[8]; int i; if (!intel_sdvo_set_value(intel_sdvo, @@ -984,7 +984,7 @@ static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo, { const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - uint8_t sdvo_data[HDMI_INFOFRAME_SIZE(AVI)]; + u8 sdvo_data[HDMI_INFOFRAME_SIZE(AVI)]; union hdmi_infoframe frame; int ret; ssize_t len; @@ -1017,7 +1017,7 @@ static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo, const struct drm_connector_state *conn_state) { struct intel_sdvo_tv_format format; - uint32_t format_map; + u32 format_map; format_map = 1 << conn_state->tv.mode; memset(&format, 0, sizeof(format)); @@ -1208,7 +1208,7 @@ static void intel_sdvo_update_props(struct intel_sdvo *intel_sdvo, const struct drm_connector_state *conn_state = &sdvo_state->base.base; struct intel_sdvo_connector *intel_sdvo_conn = to_intel_sdvo_connector(conn_state->connector); - uint16_t val; + u16 val; if (intel_sdvo_conn->left) UPDATE_PROPERTY(sdvo_state->tv.overscan_h, OVERSCAN_H); @@ -1692,10 +1692,10 @@ static bool intel_sdvo_get_capabilities(struct intel_sdvo *intel_sdvo, struct in return true; } -static uint16_t intel_sdvo_get_hotplug_support(struct intel_sdvo *intel_sdvo) +static u16 intel_sdvo_get_hotplug_support(struct intel_sdvo *intel_sdvo) { struct drm_i915_private *dev_priv = to_i915(intel_sdvo->base.base.dev); - uint16_t hotplug; + u16 hotplug; if (!I915_HAS_HOTPLUG(dev_priv)) return 0; @@ -1826,7 +1826,7 @@ intel_sdvo_connector_matches_edid(struct intel_sdvo_connector *sdvo, static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector, bool force) { - uint16_t response; + u16 response; struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); enum drm_connector_status ret; @@ -1977,7 +1977,7 @@ static void intel_sdvo_get_tv_modes(struct drm_connector *connector) struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); const struct drm_connector_state *conn_state = connector->state; struct intel_sdvo_sdtv_resolution_request tv_res; - uint32_t reply = 0, format_map = 0; + u32 reply = 0, format_map = 0; int i; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", @@ -2062,7 +2062,7 @@ static int intel_sdvo_connector_atomic_get_property(struct drm_connector *connector, const struct drm_connector_state *state, struct drm_property *property, - uint64_t *val) + u64 *val) { struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); const struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state((void *)state); @@ -2121,7 +2121,7 @@ static int intel_sdvo_connector_atomic_set_property(struct drm_connector *connector, struct drm_connector_state *state, struct drm_property *property, - uint64_t val) + u64 val) { struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(state); @@ -2270,7 +2270,7 @@ static const struct drm_encoder_funcs intel_sdvo_enc_funcs = { static void intel_sdvo_guess_ddc_bus(struct intel_sdvo *sdvo) { - uint16_t mask = 0; + u16 mask = 0; unsigned int num_bits; /* @@ -2671,7 +2671,7 @@ err: } static bool -intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, uint16_t flags) +intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) { /* SDVO requires XXX1 function may not exist unless it has XXX0 function.*/ @@ -2747,7 +2747,7 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, { struct drm_device *dev = intel_sdvo->base.base.dev; struct intel_sdvo_tv_format format; - uint32_t format_map, i; + u32 format_map, i; if (!intel_sdvo_set_target_output(intel_sdvo, type)) return false; @@ -2814,7 +2814,7 @@ intel_sdvo_create_enhance_property_tv(struct intel_sdvo *intel_sdvo, struct drm_connector_state *conn_state = connector->state; struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(conn_state); - uint16_t response, data_value[2]; + u16 response, data_value[2]; /* when horizontal overscan is supported, Add the left/right property */ if (enhancements.overscan_h) { @@ -2925,7 +2925,7 @@ intel_sdvo_create_enhance_property_lvds(struct intel_sdvo *intel_sdvo, { struct drm_device *dev = intel_sdvo->base.base.dev; struct drm_connector *connector = &intel_sdvo_connector->base.base; - uint16_t response, data_value[2]; + u16 response, data_value[2]; ENHANCEMENT(&connector->state->tv, brightness, BRIGHTNESS); @@ -2939,7 +2939,7 @@ static bool intel_sdvo_create_enhance_property(struct intel_sdvo *intel_sdvo, { union { struct intel_sdvo_enhancements_reply reply; - uint16_t response; + u16 response; } enhancements; BUILD_BUG_ON(sizeof(enhancements) != 2); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 8f3982c03925..b56a1a9ad01d 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -321,8 +321,8 @@ skl_program_scaler(struct intel_plane *plane, &crtc_state->scaler_state.scalers[scaler_id]; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + u32 crtc_w = drm_rect_width(&plane_state->base.dst); + u32 crtc_h = drm_rect_height(&plane_state->base.dst); u16 y_hphase, uv_rgb_hphase; u16 y_vphase, uv_rgb_vphase; int hscale, vscale; @@ -477,23 +477,30 @@ skl_program_plane(struct intel_plane *plane, u32 aux_stride = skl_plane_stride(plane_state, 1); int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; - uint32_t x = plane_state->color_plane[color_plane].x; - uint32_t y = plane_state->color_plane[color_plane].y; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 x = plane_state->color_plane[color_plane].x; + u32 y = plane_state->color_plane[color_plane].y; + u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; + u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; struct intel_plane *linked = plane_state->linked_plane; const struct drm_framebuffer *fb = plane_state->base.fb; u8 alpha = plane_state->base.alpha >> 8; + u32 plane_color_ctl = 0; unsigned long irqflags; u32 keymsk, keymax; + plane_ctl |= skl_plane_ctl_crtc(crtc_state); + + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + plane_color_ctl = plane_state->color_ctl | + glk_plane_color_ctl_crtc(crtc_state); + /* Sizes are 0 based */ src_w--; src_h--; keymax = (key->max_value & 0xffffff) | PLANE_KEYMAX_ALPHA(alpha); - keymsk = key->channel_mask & 0x3ffffff; + keymsk = key->channel_mask & 0x7ffffff; if (alpha < 0xff) keymsk |= PLANE_KEYMSK_ALPHA_ENABLE; @@ -533,8 +540,7 @@ skl_program_plane(struct intel_plane *plane, } if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) - I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), - plane_state->color_ctl); + I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), plane_color_ctl); if (fb->format->is_yuv && icl_is_hdr_plane(plane)) icl_program_input_csc(plane, crtc_state, plane_state); @@ -618,17 +624,19 @@ skl_plane_get_hw_state(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; enum plane_id plane_id = plane->id; + intel_wakeref_t wakeref; bool ret; power_domain = POWER_DOMAIN_PIPE(plane->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; ret = I915_READ(PLANE_CTL(plane->pipe, plane_id)) & PLANE_CTL_ENABLE; *pipe = plane->pipe; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -731,6 +739,11 @@ vlv_update_clrc(const struct intel_plane_state *plane_state) SP_SH_SIN(sh_sin) | SP_SH_COS(sh_cos)); } +static u32 vlv_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + return SP_GAMMA_ENABLE; +} + static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -739,7 +752,7 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 sprctl; - sprctl = SP_ENABLE | SP_GAMMA_ENABLE; + sprctl = SP_ENABLE; switch (fb->format->format) { case DRM_FORMAT_YUYV: @@ -806,17 +819,19 @@ vlv_update_plane(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; - u32 sprctl = plane_state->ctl; u32 sprsurf_offset = plane_state->color_plane[0].offset; u32 linear_offset; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->color_plane[0].x; - uint32_t y = plane_state->color_plane[0].y; + u32 crtc_w = drm_rect_width(&plane_state->base.dst); + u32 crtc_h = drm_rect_height(&plane_state->base.dst); + u32 x = plane_state->color_plane[0].x; + u32 y = plane_state->color_plane[0].y; unsigned long irqflags; + u32 sprctl; + + sprctl = plane_state->ctl | vlv_sprite_ctl_crtc(crtc_state); /* Sizes are 0 based */ crtc_w--; @@ -882,21 +897,36 @@ vlv_plane_get_hw_state(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; enum plane_id plane_id = plane->id; + intel_wakeref_t wakeref; bool ret; power_domain = POWER_DOMAIN_PIPE(plane->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; ret = I915_READ(SPCNTR(plane->pipe, plane_id)) & SP_ENABLE; *pipe = plane->pipe; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } +static u32 ivb_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + u32 sprctl = 0; + + sprctl |= SPRITE_GAMMA_ENABLE; + + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + sprctl |= SPRITE_PIPE_CSC_ENABLE; + + return sprctl; +} + static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -907,14 +937,11 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 sprctl; - sprctl = SPRITE_ENABLE | SPRITE_GAMMA_ENABLE; + sprctl = SPRITE_ENABLE; if (IS_IVYBRIDGE(dev_priv)) sprctl |= SPRITE_TRICKLE_FEED_DISABLE; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - sprctl |= SPRITE_PIPE_CSC_ENABLE; - switch (fb->format->format) { case DRM_FORMAT_XBGR8888: sprctl |= SPRITE_FORMAT_RGBX888 | SPRITE_RGB_ORDER_RGBX; @@ -966,20 +993,22 @@ ivb_update_plane(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; - u32 sprctl = plane_state->ctl, sprscale = 0; u32 sprsurf_offset = plane_state->color_plane[0].offset; u32 linear_offset; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->color_plane[0].x; - uint32_t y = plane_state->color_plane[0].y; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 crtc_w = drm_rect_width(&plane_state->base.dst); + u32 crtc_h = drm_rect_height(&plane_state->base.dst); + u32 x = plane_state->color_plane[0].x; + u32 y = plane_state->color_plane[0].y; + u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; + u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 sprctl, sprscale = 0; unsigned long irqflags; + sprctl = plane_state->ctl | ivb_sprite_ctl_crtc(crtc_state); + /* Sizes are 0 based */ src_w--; src_h--; @@ -1051,17 +1080,19 @@ ivb_plane_get_hw_state(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; bool ret; power_domain = POWER_DOMAIN_PIPE(plane->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; ret = I915_READ(SPRCTL(plane->pipe)) & SPRITE_ENABLE; *pipe = plane->pipe; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -1074,6 +1105,11 @@ g4x_sprite_max_stride(struct intel_plane *plane, return 16384; } +static u32 g4x_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + return DVS_GAMMA_ENABLE; +} + static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -1084,7 +1120,7 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 dvscntr; - dvscntr = DVS_ENABLE | DVS_GAMMA_ENABLE; + dvscntr = DVS_ENABLE; if (IS_GEN(dev_priv, 6)) dvscntr |= DVS_TRICKLE_FEED_DISABLE; @@ -1140,20 +1176,22 @@ g4x_update_plane(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; - u32 dvscntr = plane_state->ctl, dvsscale = 0; u32 dvssurf_offset = plane_state->color_plane[0].offset; u32 linear_offset; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->color_plane[0].x; - uint32_t y = plane_state->color_plane[0].y; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 crtc_w = drm_rect_width(&plane_state->base.dst); + u32 crtc_h = drm_rect_height(&plane_state->base.dst); + u32 x = plane_state->color_plane[0].x; + u32 y = plane_state->color_plane[0].y; + u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; + u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 dvscntr, dvsscale = 0; unsigned long irqflags; + dvscntr = plane_state->ctl | g4x_sprite_ctl_crtc(crtc_state); + /* Sizes are 0 based */ src_w--; src_h--; @@ -1217,17 +1255,19 @@ g4x_plane_get_hw_state(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; bool ret; power_domain = POWER_DOMAIN_PIPE(plane->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; ret = I915_READ(DVSCNTR(plane->pipe)) & DVS_ENABLE; *pipe = plane->pipe; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -1698,7 +1738,7 @@ out: return ret; } -static const uint32_t g4x_plane_formats[] = { +static const u32 g4x_plane_formats[] = { DRM_FORMAT_XRGB8888, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, @@ -1706,13 +1746,13 @@ static const uint32_t g4x_plane_formats[] = { DRM_FORMAT_VYUY, }; -static const uint64_t i9xx_plane_format_modifiers[] = { +static const u64 i9xx_plane_format_modifiers[] = { I915_FORMAT_MOD_X_TILED, DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; -static const uint32_t snb_plane_formats[] = { +static const u32 snb_plane_formats[] = { DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB8888, DRM_FORMAT_YUYV, @@ -1721,7 +1761,7 @@ static const uint32_t snb_plane_formats[] = { DRM_FORMAT_VYUY, }; -static const uint32_t vlv_plane_formats[] = { +static const u32 vlv_plane_formats[] = { DRM_FORMAT_RGB565, DRM_FORMAT_ABGR8888, DRM_FORMAT_ARGB8888, @@ -1735,7 +1775,7 @@ static const uint32_t vlv_plane_formats[] = { DRM_FORMAT_VYUY, }; -static const uint32_t skl_plane_formats[] = { +static const u32 skl_plane_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -1750,7 +1790,7 @@ static const uint32_t skl_plane_formats[] = { DRM_FORMAT_VYUY, }; -static const uint32_t skl_planar_formats[] = { +static const u32 skl_planar_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -1766,7 +1806,7 @@ static const uint32_t skl_planar_formats[] = { DRM_FORMAT_NV12, }; -static const uint64_t skl_plane_format_modifiers_noccs[] = { +static const u64 skl_plane_format_modifiers_noccs[] = { I915_FORMAT_MOD_Yf_TILED, I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED, @@ -1774,7 +1814,7 @@ static const uint64_t skl_plane_format_modifiers_noccs[] = { DRM_FORMAT_MOD_INVALID }; -static const uint64_t skl_plane_format_modifiers_ccs[] = { +static const u64 skl_plane_format_modifiers_ccs[] = { I915_FORMAT_MOD_Yf_TILED_CCS, I915_FORMAT_MOD_Y_TILED_CCS, I915_FORMAT_MOD_Yf_TILED, diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index bd5536f0ec92..3924c4944e1f 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -306,7 +306,7 @@ struct tv_mode { u32 clock; u16 refresh; /* in millihertz (for precision) */ - u32 oversample; + u8 oversample; u8 hsync_end; u16 hblank_start, hblank_end, htotal; bool progressive : 1, trilevel_sync : 1, component_only : 1; @@ -339,7 +339,6 @@ struct tv_mode { const struct video_levels *composite_levels, *svideo_levels; const struct color_conversion *composite_color, *svideo_color; const u32 *filter_table; - u16 max_srcw; }; @@ -378,8 +377,8 @@ static const struct tv_mode tv_modes[] = { .name = "NTSC-M", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_8X, - .component_only = 0, + .oversample = 8, + .component_only = false, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ .hsync_end = 64, .hblank_end = 124, @@ -421,8 +420,8 @@ static const struct tv_mode tv_modes[] = { .name = "NTSC-443", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_8X, - .component_only = 0, + .oversample = 8, + .component_only = false, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 4.43MHz */ .hsync_end = 64, .hblank_end = 124, .hblank_start = 836, .htotal = 857, @@ -463,8 +462,8 @@ static const struct tv_mode tv_modes[] = { .name = "NTSC-J", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_8X, - .component_only = 0, + .oversample = 8, + .component_only = false, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ .hsync_end = 64, .hblank_end = 124, @@ -506,8 +505,8 @@ static const struct tv_mode tv_modes[] = { .name = "PAL-M", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_8X, - .component_only = 0, + .oversample = 8, + .component_only = false, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ .hsync_end = 64, .hblank_end = 124, @@ -550,8 +549,8 @@ static const struct tv_mode tv_modes[] = { .name = "PAL-N", .clock = 108000, .refresh = 50000, - .oversample = TV_OVERSAMPLE_8X, - .component_only = 0, + .oversample = 8, + .component_only = false, .hsync_end = 64, .hblank_end = 128, .hblank_start = 844, .htotal = 863, @@ -595,8 +594,8 @@ static const struct tv_mode tv_modes[] = { .name = "PAL", .clock = 108000, .refresh = 50000, - .oversample = TV_OVERSAMPLE_8X, - .component_only = 0, + .oversample = 8, + .component_only = false, .hsync_end = 64, .hblank_end = 142, .hblank_start = 844, .htotal = 863, @@ -635,10 +634,10 @@ static const struct tv_mode tv_modes[] = { }, { .name = "480p", - .clock = 107520, + .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_4X, - .component_only = 1, + .oversample = 4, + .component_only = true, .hsync_end = 64, .hblank_end = 122, .hblank_start = 842, .htotal = 857, @@ -659,10 +658,10 @@ static const struct tv_mode tv_modes[] = { }, { .name = "576p", - .clock = 107520, + .clock = 108000, .refresh = 50000, - .oversample = TV_OVERSAMPLE_4X, - .component_only = 1, + .oversample = 4, + .component_only = true, .hsync_end = 64, .hblank_end = 139, .hblank_start = 859, .htotal = 863, @@ -683,10 +682,10 @@ static const struct tv_mode tv_modes[] = { }, { .name = "720p@60Hz", - .clock = 148800, + .clock = 148500, .refresh = 60000, - .oversample = TV_OVERSAMPLE_2X, - .component_only = 1, + .oversample = 2, + .component_only = true, .hsync_end = 80, .hblank_end = 300, .hblank_start = 1580, .htotal = 1649, @@ -707,10 +706,10 @@ static const struct tv_mode tv_modes[] = { }, { .name = "720p@50Hz", - .clock = 148800, + .clock = 148500, .refresh = 50000, - .oversample = TV_OVERSAMPLE_2X, - .component_only = 1, + .oversample = 2, + .component_only = true, .hsync_end = 80, .hblank_end = 300, .hblank_start = 1580, .htotal = 1979, @@ -728,14 +727,13 @@ static const struct tv_mode tv_modes[] = { .burst_ena = false, .filter_table = filter_table, - .max_srcw = 800 }, { .name = "1080i@50Hz", - .clock = 148800, + .clock = 148500, .refresh = 50000, - .oversample = TV_OVERSAMPLE_2X, - .component_only = 1, + .oversample = 2, + .component_only = true, .hsync_end = 88, .hblank_end = 235, .hblank_start = 2155, .htotal = 2639, @@ -758,10 +756,10 @@ static const struct tv_mode tv_modes[] = { }, { .name = "1080i@60Hz", - .clock = 148800, + .clock = 148500, .refresh = 60000, - .oversample = TV_OVERSAMPLE_2X, - .component_only = 1, + .oversample = 2, + .component_only = true, .hsync_end = 88, .hblank_end = 235, .hblank_start = 2155, .htotal = 2199, @@ -782,8 +780,115 @@ static const struct tv_mode tv_modes[] = { .filter_table = filter_table, }, + + { + .name = "1080p@30Hz", + .clock = 148500, + .refresh = 30000, + .oversample = 2, + .component_only = true, + + .hsync_end = 88, .hblank_end = 235, + .hblank_start = 2155, .htotal = 2199, + + .progressive = true, .trilevel_sync = true, + + .vsync_start_f1 = 8, .vsync_start_f2 = 8, + .vsync_len = 10, + + .veq_ena = false, .veq_start_f1 = 0, + .veq_start_f2 = 0, .veq_len = 0, + + .vi_end_f1 = 44, .vi_end_f2 = 44, + .nbr_end = 1079, + + .burst_ena = false, + + .filter_table = filter_table, + }, + + { + .name = "1080p@50Hz", + .clock = 148500, + .refresh = 50000, + .oversample = 1, + .component_only = true, + + .hsync_end = 88, .hblank_end = 235, + .hblank_start = 2155, .htotal = 2639, + + .progressive = true, .trilevel_sync = true, + + .vsync_start_f1 = 8, .vsync_start_f2 = 8, + .vsync_len = 10, + + .veq_ena = false, .veq_start_f1 = 0, + .veq_start_f2 = 0, .veq_len = 0, + + .vi_end_f1 = 44, .vi_end_f2 = 44, + .nbr_end = 1079, + + .burst_ena = false, + + .filter_table = filter_table, + }, + + { + .name = "1080p@60Hz", + .clock = 148500, + .refresh = 60000, + .oversample = 1, + .component_only = true, + + .hsync_end = 88, .hblank_end = 235, + .hblank_start = 2155, .htotal = 2199, + + .progressive = true, .trilevel_sync = true, + + .vsync_start_f1 = 8, .vsync_start_f2 = 8, + .vsync_len = 10, + + .veq_ena = false, .veq_start_f1 = 0, + .veq_start_f2 = 0, .veq_len = 0, + + .vi_end_f1 = 44, .vi_end_f2 = 44, + .nbr_end = 1079, + + .burst_ena = false, + + .filter_table = filter_table, + }, +}; + +struct intel_tv_connector_state { + struct drm_connector_state base; + + /* + * May need to override the user margins for + * gen3 >1024 wide source vertical centering. + */ + struct { + u16 top, bottom; + } margins; + + bool bypass_vfilter; }; +#define to_intel_tv_connector_state(x) container_of(x, struct intel_tv_connector_state, base) + +static struct drm_connector_state * +intel_tv_connector_duplicate_state(struct drm_connector *connector) +{ + struct intel_tv_connector_state *state; + + state = kmemdup(connector->state, sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_connector_duplicate_state(connector, &state->base); + return &state->base; +} + static struct intel_tv *enc_to_tv(struct intel_encoder *encoder) { return container_of(encoder, struct intel_tv, base); @@ -859,14 +964,215 @@ intel_tv_mode_valid(struct drm_connector *connector, return MODE_CLOCK_RANGE; } +static int +intel_tv_mode_vdisplay(const struct tv_mode *tv_mode) +{ + if (tv_mode->progressive) + return tv_mode->nbr_end + 1; + else + return 2 * (tv_mode->nbr_end + 1); +} + +static void +intel_tv_mode_to_mode(struct drm_display_mode *mode, + const struct tv_mode *tv_mode) +{ + mode->clock = tv_mode->clock / + (tv_mode->oversample >> !tv_mode->progressive); + + /* + * tv_mode horizontal timings: + * + * hsync_end + * | hblank_end + * | | hblank_start + * | | | htotal + * | _______ | + * ____/ \___ + * \__/ \ + */ + mode->hdisplay = + tv_mode->hblank_start - tv_mode->hblank_end; + mode->hsync_start = mode->hdisplay + + tv_mode->htotal - tv_mode->hblank_start; + mode->hsync_end = mode->hsync_start + + tv_mode->hsync_end; + mode->htotal = tv_mode->htotal + 1; + + /* + * tv_mode vertical timings: + * + * vsync_start + * | vsync_end + * | | vi_end nbr_end + * | | | | + * | | _______ + * \__ ____/ \ + * \__/ + */ + mode->vdisplay = intel_tv_mode_vdisplay(tv_mode); + if (tv_mode->progressive) { + mode->vsync_start = mode->vdisplay + + tv_mode->vsync_start_f1 + 1; + mode->vsync_end = mode->vsync_start + + tv_mode->vsync_len; + mode->vtotal = mode->vdisplay + + tv_mode->vi_end_f1 + 1; + } else { + mode->vsync_start = mode->vdisplay + + tv_mode->vsync_start_f1 + 1 + + tv_mode->vsync_start_f2 + 1; + mode->vsync_end = mode->vsync_start + + 2 * tv_mode->vsync_len; + mode->vtotal = mode->vdisplay + + tv_mode->vi_end_f1 + 1 + + tv_mode->vi_end_f2 + 1; + } + + /* TV has it's own notion of sync and other mode flags, so clear them. */ + mode->flags = 0; + + mode->vrefresh = 0; + mode->vrefresh = drm_mode_vrefresh(mode); + + snprintf(mode->name, sizeof(mode->name), + "%dx%d%c (%s)", + mode->hdisplay, mode->vdisplay, + tv_mode->progressive ? 'p' : 'i', + tv_mode->name); +} + +static void intel_tv_scale_mode_horiz(struct drm_display_mode *mode, + int hdisplay, int left_margin, + int right_margin) +{ + int hsync_start = mode->hsync_start - mode->hdisplay + right_margin; + int hsync_end = mode->hsync_end - mode->hdisplay + right_margin; + int new_htotal = mode->htotal * hdisplay / + (mode->hdisplay - left_margin - right_margin); + + mode->clock = mode->clock * new_htotal / mode->htotal; + + mode->hdisplay = hdisplay; + mode->hsync_start = hdisplay + hsync_start * new_htotal / mode->htotal; + mode->hsync_end = hdisplay + hsync_end * new_htotal / mode->htotal; + mode->htotal = new_htotal; +} + +static void intel_tv_scale_mode_vert(struct drm_display_mode *mode, + int vdisplay, int top_margin, + int bottom_margin) +{ + int vsync_start = mode->vsync_start - mode->vdisplay + bottom_margin; + int vsync_end = mode->vsync_end - mode->vdisplay + bottom_margin; + int new_vtotal = mode->vtotal * vdisplay / + (mode->vdisplay - top_margin - bottom_margin); + + mode->clock = mode->clock * new_vtotal / mode->vtotal; + + mode->vdisplay = vdisplay; + mode->vsync_start = vdisplay + vsync_start * new_vtotal / mode->vtotal; + mode->vsync_end = vdisplay + vsync_end * new_vtotal / mode->vtotal; + mode->vtotal = new_vtotal; +} static void intel_tv_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct drm_display_mode *adjusted_mode = + &pipe_config->base.adjusted_mode; + struct drm_display_mode mode = {}; + u32 tv_ctl, hctl1, hctl3, vctl1, vctl2, tmp; + struct tv_mode tv_mode = {}; + int hdisplay = adjusted_mode->crtc_hdisplay; + int vdisplay = adjusted_mode->crtc_vdisplay; + int xsize, ysize, xpos, ypos; + pipe_config->output_types |= BIT(INTEL_OUTPUT_TVOUT); - pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; + tv_ctl = I915_READ(TV_CTL); + hctl1 = I915_READ(TV_H_CTL_1); + hctl3 = I915_READ(TV_H_CTL_3); + vctl1 = I915_READ(TV_V_CTL_1); + vctl2 = I915_READ(TV_V_CTL_2); + + tv_mode.htotal = (hctl1 & TV_HTOTAL_MASK) >> TV_HTOTAL_SHIFT; + tv_mode.hsync_end = (hctl1 & TV_HSYNC_END_MASK) >> TV_HSYNC_END_SHIFT; + + tv_mode.hblank_start = (hctl3 & TV_HBLANK_START_MASK) >> TV_HBLANK_START_SHIFT; + tv_mode.hblank_end = (hctl3 & TV_HSYNC_END_MASK) >> TV_HBLANK_END_SHIFT; + + tv_mode.nbr_end = (vctl1 & TV_NBR_END_MASK) >> TV_NBR_END_SHIFT; + tv_mode.vi_end_f1 = (vctl1 & TV_VI_END_F1_MASK) >> TV_VI_END_F1_SHIFT; + tv_mode.vi_end_f2 = (vctl1 & TV_VI_END_F2_MASK) >> TV_VI_END_F2_SHIFT; + + tv_mode.vsync_len = (vctl2 & TV_VSYNC_LEN_MASK) >> TV_VSYNC_LEN_SHIFT; + tv_mode.vsync_start_f1 = (vctl2 & TV_VSYNC_START_F1_MASK) >> TV_VSYNC_START_F1_SHIFT; + tv_mode.vsync_start_f2 = (vctl2 & TV_VSYNC_START_F2_MASK) >> TV_VSYNC_START_F2_SHIFT; + + tv_mode.clock = pipe_config->port_clock; + + tv_mode.progressive = tv_ctl & TV_PROGRESSIVE; + + switch (tv_ctl & TV_OVERSAMPLE_MASK) { + case TV_OVERSAMPLE_8X: + tv_mode.oversample = 8; + break; + case TV_OVERSAMPLE_4X: + tv_mode.oversample = 4; + break; + case TV_OVERSAMPLE_2X: + tv_mode.oversample = 2; + break; + default: + tv_mode.oversample = 1; + break; + } + + tmp = I915_READ(TV_WIN_POS); + xpos = tmp >> 16; + ypos = tmp & 0xffff; + + tmp = I915_READ(TV_WIN_SIZE); + xsize = tmp >> 16; + ysize = tmp & 0xffff; + + intel_tv_mode_to_mode(&mode, &tv_mode); + + DRM_DEBUG_KMS("TV mode:\n"); + drm_mode_debug_printmodeline(&mode); + + intel_tv_scale_mode_horiz(&mode, hdisplay, + xpos, mode.hdisplay - xsize - xpos); + intel_tv_scale_mode_vert(&mode, vdisplay, + ypos, mode.vdisplay - ysize - ypos); + + adjusted_mode->crtc_clock = mode.clock; + if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) + adjusted_mode->crtc_clock /= 2; + + /* pixel counter doesn't work on i965gm TV output */ + if (IS_I965GM(dev_priv)) + adjusted_mode->private_flags |= + I915_MODE_FLAG_USE_SCANLINE_COUNTER; +} + +static bool intel_tv_source_too_wide(struct drm_i915_private *dev_priv, + int hdisplay) +{ + return IS_GEN(dev_priv, 3) && hdisplay > 1024; +} + +static bool intel_tv_vert_scaling(const struct drm_display_mode *tv_mode, + const struct drm_connector_state *conn_state, + int vdisplay) +{ + return tv_mode->crtc_vdisplay - + conn_state->tv.margins.top - + conn_state->tv.margins.bottom != + vdisplay; } static int @@ -874,9 +1180,14 @@ intel_tv_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_tv_connector_state *tv_conn_state = + to_intel_tv_connector_state(conn_state); const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + int hdisplay = adjusted_mode->crtc_hdisplay; + int vdisplay = adjusted_mode->crtc_vdisplay; if (!tv_mode) return -EINVAL; @@ -885,17 +1196,136 @@ intel_tv_compute_config(struct intel_encoder *encoder, return -EINVAL; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; - adjusted_mode->crtc_clock = tv_mode->clock; + DRM_DEBUG_KMS("forcing bpc to 8 for TV\n"); pipe_config->pipe_bpp = 8*3; - /* TV has it's own notion of sync and other mode flags, so clear them. */ - adjusted_mode->flags = 0; + pipe_config->port_clock = tv_mode->clock; + + intel_tv_mode_to_mode(adjusted_mode, tv_mode); + drm_mode_set_crtcinfo(adjusted_mode, 0); + + if (intel_tv_source_too_wide(dev_priv, hdisplay) || + !intel_tv_vert_scaling(adjusted_mode, conn_state, vdisplay)) { + int extra, top, bottom; + + extra = adjusted_mode->crtc_vdisplay - vdisplay; + + if (extra < 0) { + DRM_DEBUG_KMS("No vertical scaling for >1024 pixel wide modes\n"); + return -EINVAL; + } + + /* Need to turn off the vertical filter and center the image */ + + /* Attempt to maintain the relative sizes of the margins */ + top = conn_state->tv.margins.top; + bottom = conn_state->tv.margins.bottom; + + if (top + bottom) + top = extra * top / (top + bottom); + else + top = extra / 2; + bottom = extra - top; + + tv_conn_state->margins.top = top; + tv_conn_state->margins.bottom = bottom; + + tv_conn_state->bypass_vfilter = true; + + if (!tv_mode->progressive) { + adjusted_mode->clock /= 2; + adjusted_mode->crtc_clock /= 2; + adjusted_mode->flags |= DRM_MODE_FLAG_INTERLACE; + } + } else { + tv_conn_state->margins.top = conn_state->tv.margins.top; + tv_conn_state->margins.bottom = conn_state->tv.margins.bottom; + + tv_conn_state->bypass_vfilter = false; + } + + DRM_DEBUG_KMS("TV mode:\n"); + drm_mode_debug_printmodeline(adjusted_mode); /* - * FIXME: We don't check whether the input mode is actually what we want - * or whether userspace is doing something stupid. + * The pipe scanline counter behaviour looks as follows when + * using the TV encoder: + * + * time -> + * + * dsl=vtotal-1 | | + * || || + * ___| | ___| | + * / | / | + * / | / | + * dsl=0 ___/ |_____/ | + * | | | | | | + * ^ ^ ^ ^ ^ + * | | | | pipe vblank/first part of tv vblank + * | | | bottom margin + * | | active + * | top margin + * remainder of tv vblank + * + * When the TV encoder is used the pipe wants to run faster + * than expected rate. During the active portion the TV + * encoder stalls the pipe every few lines to keep it in + * check. When the TV encoder reaches the bottom margin the + * pipe simply stops. Once we reach the TV vblank the pipe is + * no longer stalled and it runs at the max rate (apparently + * oversample clock on gen3, cdclk on gen4). Once the pipe + * reaches the pipe vtotal the pipe stops for the remainder + * of the TV vblank/top margin. The pipe starts up again when + * the TV encoder exits the top margin. + * + * To avoid huge hassles for vblank timestamping we scale + * the pipe timings as if the pipe always runs at the average + * rate it maintains during the active period. This also + * gives us a reasonable guesstimate as to the pixel rate. + * Due to the variation in the actual pipe speed the scanline + * counter will give us slightly erroneous results during the + * TV vblank/margins. But since vtotal was selected such that + * it matches the average rate of the pipe during the active + * portion the error shouldn't cause any serious grief to + * vblank timestamps. + * + * For posterity here is the empirically derived formula + * that gives us the maximum length of the pipe vblank + * we can use without causing display corruption. Following + * this would allow us to have a ticking scanline counter + * everywhere except during the bottom margin (there the + * pipe always stops). Ie. this would eliminate the second + * flat portion of the above graph. However this would also + * complicate vblank timestamping as the pipe vtotal would + * no longer match the average rate the pipe runs at during + * the active portion. Hence following this formula seems + * more trouble that it's worth. + * + * if (IS_GEN(dev_priv, 4)) { + * num = cdclk * (tv_mode->oversample >> !tv_mode->progressive); + * den = tv_mode->clock; + * } else { + * num = tv_mode->oversample >> !tv_mode->progressive; + * den = 1; + * } + * max_pipe_vblank_len ~= + * (num * tv_htotal * (tv_vblank_len + top_margin)) / + * (den * pipe_htotal); */ + intel_tv_scale_mode_horiz(adjusted_mode, hdisplay, + conn_state->tv.margins.left, + conn_state->tv.margins.right); + intel_tv_scale_mode_vert(adjusted_mode, vdisplay, + tv_conn_state->margins.top, + tv_conn_state->margins.bottom); + drm_mode_set_crtcinfo(adjusted_mode, 0); + adjusted_mode->name[0] = '\0'; + + /* pixel counter doesn't work on i965gm TV output */ + if (IS_I965GM(dev_priv)) + adjusted_mode->private_flags |= + I915_MODE_FLAG_USE_SCANLINE_COUNTER; return 0; } @@ -986,14 +1416,16 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_tv *intel_tv = enc_to_tv(encoder); + const struct intel_tv_connector_state *tv_conn_state = + to_intel_tv_connector_state(conn_state); const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); - u32 tv_ctl; + u32 tv_ctl, tv_filter_ctl; u32 scctl1, scctl2, scctl3; int i, j; const struct video_levels *video_levels; const struct color_conversion *color_conversion; bool burst_ena; - int xpos = 0x0, ypos = 0x0; + int xpos, ypos; unsigned int xsize, ysize; if (!tv_mode) @@ -1029,7 +1461,21 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, } tv_ctl |= TV_ENC_PIPE_SEL(intel_crtc->pipe); - tv_ctl |= tv_mode->oversample; + + switch (tv_mode->oversample) { + case 8: + tv_ctl |= TV_OVERSAMPLE_8X; + break; + case 4: + tv_ctl |= TV_OVERSAMPLE_4X; + break; + case 2: + tv_ctl |= TV_OVERSAMPLE_2X; + break; + default: + tv_ctl |= TV_OVERSAMPLE_NONE; + break; + } if (tv_mode->progressive) tv_ctl |= TV_PROGRESSIVE; @@ -1081,19 +1527,20 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, assert_pipe_disabled(dev_priv, intel_crtc->pipe); /* Filter ctl must be set before TV_WIN_SIZE */ - I915_WRITE(TV_FILTER_CTL_1, TV_AUTO_SCALE); + tv_filter_ctl = TV_AUTO_SCALE; + if (tv_conn_state->bypass_vfilter) + tv_filter_ctl |= TV_V_FILTER_BYPASS; + I915_WRITE(TV_FILTER_CTL_1, tv_filter_ctl); + xsize = tv_mode->hblank_start - tv_mode->hblank_end; - if (tv_mode->progressive) - ysize = tv_mode->nbr_end + 1; - else - ysize = 2*tv_mode->nbr_end + 1; + ysize = intel_tv_mode_vdisplay(tv_mode); - xpos += conn_state->tv.margins.left; - ypos += conn_state->tv.margins.top; + xpos = conn_state->tv.margins.left; + ypos = tv_conn_state->margins.top; xsize -= (conn_state->tv.margins.left + conn_state->tv.margins.right); - ysize -= (conn_state->tv.margins.top + - conn_state->tv.margins.bottom); + ysize -= (tv_conn_state->margins.top + + tv_conn_state->margins.bottom); I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos); I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize); @@ -1110,23 +1557,6 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, I915_WRITE(TV_CTL, tv_ctl); } -static const struct drm_display_mode reported_modes[] = { - { - .name = "NTSC 480i", - .clock = 107520, - .hdisplay = 1280, - .hsync_start = 1368, - .hsync_end = 1496, - .htotal = 1712, - - .vdisplay = 1024, - .vsync_start = 1027, - .vsync_end = 1034, - .vtotal = 1104, - .type = DRM_MODE_TYPE_DRIVER, - }, -}; - static int intel_tv_detect_type(struct intel_tv *intel_tv, struct drm_connector *connector) @@ -1233,16 +1663,18 @@ static void intel_tv_find_better_format(struct drm_connector *connector) const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int i; - if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == - tv_mode->component_only) + /* Component supports everything so we can keep the current mode */ + if (intel_tv->type == DRM_MODE_CONNECTOR_Component) return; + /* If the current mode is fine don't change it */ + if (!tv_mode->component_only) + return; for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { - tv_mode = tv_modes + i; + tv_mode = &tv_modes[i]; - if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == - tv_mode->component_only) + if (!tv_mode->component_only) break; } @@ -1254,7 +1686,6 @@ intel_tv_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) { - struct drm_display_mode mode; struct intel_tv *intel_tv = intel_attached_tv(connector); enum drm_connector_status status; int type; @@ -1263,13 +1694,11 @@ intel_tv_detect(struct drm_connector *connector, connector->base.id, connector->name, force); - mode = reported_modes[0]; - if (force) { struct intel_load_detect_pipe tmp; int ret; - ret = intel_get_load_detect_pipe(connector, &mode, &tmp, ctx); + ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx); if (ret < 0) return ret; @@ -1293,84 +1722,85 @@ intel_tv_detect(struct drm_connector *connector, } static const struct input_res { - const char *name; - int w, h; + u16 w, h; } input_res_table[] = { - {"640x480", 640, 480}, - {"800x600", 800, 600}, - {"1024x768", 1024, 768}, - {"1280x1024", 1280, 1024}, - {"848x480", 848, 480}, - {"1280x720", 1280, 720}, - {"1920x1080", 1920, 1080}, + { 640, 480 }, + { 800, 600 }, + { 1024, 768 }, + { 1280, 1024 }, + { 848, 480 }, + { 1280, 720 }, + { 1920, 1080 }, }; -/* - * Chose preferred mode according to line number of TV format - */ +/* Choose preferred mode according to line number of TV format */ +static bool +intel_tv_is_preferred_mode(const struct drm_display_mode *mode, + const struct tv_mode *tv_mode) +{ + int vdisplay = intel_tv_mode_vdisplay(tv_mode); + + /* prefer 480 line modes for all SD TV modes */ + if (vdisplay <= 576) + vdisplay = 480; + + return vdisplay == mode->vdisplay; +} + static void -intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode, - struct drm_display_mode *mode_ptr) +intel_tv_set_mode_type(struct drm_display_mode *mode, + const struct tv_mode *tv_mode) { - if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480) - mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; - else if (tv_mode->nbr_end > 480) { - if (tv_mode->progressive == true && tv_mode->nbr_end < 720) { - if (mode_ptr->vdisplay == 720) - mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; - } else if (mode_ptr->vdisplay == 1080) - mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; - } + mode->type = DRM_MODE_TYPE_DRIVER; + + if (intel_tv_is_preferred_mode(mode, tv_mode)) + mode->type |= DRM_MODE_TYPE_PREFERRED; } static int intel_tv_get_modes(struct drm_connector *connector) { - struct drm_display_mode *mode_ptr; + struct drm_i915_private *dev_priv = to_i915(connector->dev); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); - int j, count = 0; - u64 tmp; + int i, count = 0; - for (j = 0; j < ARRAY_SIZE(input_res_table); - j++) { - const struct input_res *input = &input_res_table[j]; - unsigned int hactive_s = input->w; - unsigned int vactive_s = input->h; + for (i = 0; i < ARRAY_SIZE(input_res_table); i++) { + const struct input_res *input = &input_res_table[i]; + struct drm_display_mode *mode; - if (tv_mode->max_srcw && input->w > tv_mode->max_srcw) + if (input->w > 1024 && + !tv_mode->progressive && + !tv_mode->component_only) continue; - if (input->w > 1024 && (!tv_mode->progressive - && !tv_mode->component_only)) + /* no vertical scaling with wide sources on gen3 */ + if (IS_GEN(dev_priv, 3) && input->w > 1024 && + input->h > intel_tv_mode_vdisplay(tv_mode)) continue; - mode_ptr = drm_mode_create(connector->dev); - if (!mode_ptr) + mode = drm_mode_create(connector->dev); + if (!mode) continue; - strlcpy(mode_ptr->name, input->name, DRM_DISPLAY_MODE_LEN); - - mode_ptr->hdisplay = hactive_s; - mode_ptr->hsync_start = hactive_s + 1; - mode_ptr->hsync_end = hactive_s + 64; - if (mode_ptr->hsync_end <= mode_ptr->hsync_start) - mode_ptr->hsync_end = mode_ptr->hsync_start + 1; - mode_ptr->htotal = hactive_s + 96; - - mode_ptr->vdisplay = vactive_s; - mode_ptr->vsync_start = vactive_s + 1; - mode_ptr->vsync_end = vactive_s + 32; - if (mode_ptr->vsync_end <= mode_ptr->vsync_start) - mode_ptr->vsync_end = mode_ptr->vsync_start + 1; - mode_ptr->vtotal = vactive_s + 33; - - tmp = mul_u32_u32(tv_mode->refresh, mode_ptr->vtotal); - tmp *= mode_ptr->htotal; - tmp = div_u64(tmp, 1000000); - mode_ptr->clock = (int) tmp; - - mode_ptr->type = DRM_MODE_TYPE_DRIVER; - intel_tv_choose_preferred_modes(tv_mode, mode_ptr); - drm_mode_probed_add(connector, mode_ptr); + + /* + * We take the TV mode and scale it to look + * like it had the expected h/vdisplay. This + * provides the most information to userspace + * about the actual timings of the mode. We + * do ignore the margins though. + */ + intel_tv_mode_to_mode(mode, tv_mode); + if (count == 0) { + DRM_DEBUG_KMS("TV mode:\n"); + drm_mode_debug_printmodeline(mode); + } + intel_tv_scale_mode_horiz(mode, input->w, 0, 0); + intel_tv_scale_mode_vert(mode, input->h, 0, 0); + intel_tv_set_mode_type(mode, tv_mode); + + drm_mode_set_name(mode); + + drm_mode_probed_add(connector, mode); count++; } @@ -1383,7 +1813,7 @@ static const struct drm_connector_funcs intel_tv_connector_funcs = { .destroy = intel_connector_destroy, .fill_modes = drm_helper_probe_single_connector_modes, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_duplicate_state = intel_tv_connector_duplicate_state, }; static int intel_tv_atomic_check(struct drm_connector *connector, @@ -1530,11 +1960,15 @@ intel_tv_init(struct drm_i915_private *dev_priv) connector->doublescan_allowed = false; /* Create TV properties then attach current values */ - for (i = 0; i < ARRAY_SIZE(tv_modes); i++) + for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { + /* 1080p50/1080p60 not supported on gen3 */ + if (IS_GEN(dev_priv, 3) && + tv_modes[i].oversample == 1) + break; + tv_format_names[i] = tv_modes[i].name; - drm_mode_create_tv_properties(dev, - ARRAY_SIZE(tv_modes), - tv_format_names); + } + drm_mode_create_tv_properties(dev, i, tv_format_names); drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property, state->tv.mode); diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 731b82afe636..e711eb3268bc 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -26,6 +26,7 @@ #include "intel_guc_submission.h" #include "intel_guc.h" #include "i915_drv.h" +#include "i915_reset.h" static void guc_free_load_err_log(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index fff468f17d2d..75646a1e0051 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1670,6 +1670,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_reg_read *reg = data; struct reg_whitelist const *entry; + intel_wakeref_t wakeref; unsigned int flags; int remain; int ret = 0; @@ -1695,388 +1696,22 @@ int i915_reg_read_ioctl(struct drm_device *dev, flags = reg->offset & (entry->size - 1); - intel_runtime_pm_get(dev_priv); - if (entry->size == 8 && flags == I915_REG_READ_8B_WA) - reg->val = I915_READ64_2x32(entry->offset_ldw, - entry->offset_udw); - else if (entry->size == 8 && flags == 0) - reg->val = I915_READ64(entry->offset_ldw); - else if (entry->size == 4 && flags == 0) - reg->val = I915_READ(entry->offset_ldw); - else if (entry->size == 2 && flags == 0) - reg->val = I915_READ16(entry->offset_ldw); - else if (entry->size == 1 && flags == 0) - reg->val = I915_READ8(entry->offset_ldw); - else - ret = -EINVAL; - intel_runtime_pm_put(dev_priv); - - return ret; -} - -static void gen3_stop_engine(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - const u32 base = engine->mmio_base; - - if (intel_engine_stop_cs(engine)) - DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name); - - I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base))); - POSTING_READ_FW(RING_HEAD(base)); /* paranoia */ - - I915_WRITE_FW(RING_HEAD(base), 0); - I915_WRITE_FW(RING_TAIL(base), 0); - POSTING_READ_FW(RING_TAIL(base)); - - /* The ring must be empty before it is disabled */ - I915_WRITE_FW(RING_CTL(base), 0); - - /* Check acts as a post */ - if (I915_READ_FW(RING_HEAD(base)) != 0) - DRM_DEBUG_DRIVER("%s: ring head not parked\n", - engine->name); -} - -static void i915_stop_engines(struct drm_i915_private *dev_priv, - unsigned int engine_mask) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (INTEL_GEN(dev_priv) < 3) - return; - - for_each_engine_masked(engine, dev_priv, engine_mask, id) - gen3_stop_engine(engine); -} - -static bool i915_in_reset(struct pci_dev *pdev) -{ - u8 gdrst; - - pci_read_config_byte(pdev, I915_GDRST, &gdrst); - return gdrst & GRDOM_RESET_STATUS; -} - -static int i915_do_reset(struct drm_i915_private *dev_priv, - unsigned int engine_mask, - unsigned int retry) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - int err; - - /* Assert reset for at least 20 usec, and wait for acknowledgement. */ - pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - usleep_range(50, 200); - err = wait_for(i915_in_reset(pdev), 500); - - /* Clear the reset request. */ - pci_write_config_byte(pdev, I915_GDRST, 0); - usleep_range(50, 200); - if (!err) - err = wait_for(!i915_in_reset(pdev), 500); - - return err; -} - -static bool g4x_reset_complete(struct pci_dev *pdev) -{ - u8 gdrst; - - pci_read_config_byte(pdev, I915_GDRST, &gdrst); - return (gdrst & GRDOM_RESET_ENABLE) == 0; -} - -static int g33_do_reset(struct drm_i915_private *dev_priv, - unsigned int engine_mask, - unsigned int retry) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - - pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - return wait_for(g4x_reset_complete(pdev), 500); -} - -static int g4x_do_reset(struct drm_i915_private *dev_priv, - unsigned int engine_mask, - unsigned int retry) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - int ret; - - /* WaVcpClkGateDisableForMediaReset:ctg,elk */ - I915_WRITE(VDECCLK_GATE_D, - I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); - POSTING_READ(VDECCLK_GATE_D); - - pci_write_config_byte(pdev, I915_GDRST, - GRDOM_MEDIA | GRDOM_RESET_ENABLE); - ret = wait_for(g4x_reset_complete(pdev), 500); - if (ret) { - DRM_DEBUG_DRIVER("Wait for media reset failed\n"); - goto out; - } - - pci_write_config_byte(pdev, I915_GDRST, - GRDOM_RENDER | GRDOM_RESET_ENABLE); - ret = wait_for(g4x_reset_complete(pdev), 500); - if (ret) { - DRM_DEBUG_DRIVER("Wait for render reset failed\n"); - goto out; + with_intel_runtime_pm(dev_priv, wakeref) { + if (entry->size == 8 && flags == I915_REG_READ_8B_WA) + reg->val = I915_READ64_2x32(entry->offset_ldw, + entry->offset_udw); + else if (entry->size == 8 && flags == 0) + reg->val = I915_READ64(entry->offset_ldw); + else if (entry->size == 4 && flags == 0) + reg->val = I915_READ(entry->offset_ldw); + else if (entry->size == 2 && flags == 0) + reg->val = I915_READ16(entry->offset_ldw); + else if (entry->size == 1 && flags == 0) + reg->val = I915_READ8(entry->offset_ldw); + else + ret = -EINVAL; } -out: - pci_write_config_byte(pdev, I915_GDRST, 0); - - I915_WRITE(VDECCLK_GATE_D, - I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); - POSTING_READ(VDECCLK_GATE_D); - - return ret; -} - -static int ironlake_do_reset(struct drm_i915_private *dev_priv, - unsigned int engine_mask, - unsigned int retry) -{ - int ret; - - I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); - ret = intel_wait_for_register(dev_priv, - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, - 500); - if (ret) { - DRM_DEBUG_DRIVER("Wait for render reset failed\n"); - goto out; - } - - I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); - ret = intel_wait_for_register(dev_priv, - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, - 500); - if (ret) { - DRM_DEBUG_DRIVER("Wait for media reset failed\n"); - goto out; - } - -out: - I915_WRITE(ILK_GDSR, 0); - POSTING_READ(ILK_GDSR); - return ret; -} - -/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ -static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv, - u32 hw_domain_mask) -{ - int err; - - /* GEN6_GDRST is not in the gt power well, no need to check - * for fifo space for the write or forcewake the chip for - * the read - */ - __raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask); - - /* Wait for the device to ack the reset requests */ - err = __intel_wait_for_register_fw(dev_priv, - GEN6_GDRST, hw_domain_mask, 0, - 500, 0, - NULL); - if (err) - DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n", - hw_domain_mask); - - return err; -} - -/** - * gen6_reset_engines - reset individual engines - * @dev_priv: i915 device - * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset - * @retry: the count of of previous attempts to reset. - * - * This function will reset the individual engines that are set in engine_mask. - * If you provide ALL_ENGINES as mask, full global domain reset will be issued. - * - * Note: It is responsibility of the caller to handle the difference between - * asking full domain reset versus reset for all available individual engines. - * - * Returns 0 on success, nonzero on error. - */ -static int gen6_reset_engines(struct drm_i915_private *dev_priv, - unsigned int engine_mask, - unsigned int retry) -{ - struct intel_engine_cs *engine; - const u32 hw_engine_mask[I915_NUM_ENGINES] = { - [RCS] = GEN6_GRDOM_RENDER, - [BCS] = GEN6_GRDOM_BLT, - [VCS] = GEN6_GRDOM_MEDIA, - [VCS2] = GEN8_GRDOM_MEDIA2, - [VECS] = GEN6_GRDOM_VECS, - }; - u32 hw_mask; - - if (engine_mask == ALL_ENGINES) { - hw_mask = GEN6_GRDOM_FULL; - } else { - unsigned int tmp; - - hw_mask = 0; - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) - hw_mask |= hw_engine_mask[engine->id]; - } - - return gen6_hw_domain_reset(dev_priv, hw_mask); -} - -static u32 gen11_lock_sfc(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine) -{ - u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access; - i915_reg_t sfc_forced_lock, sfc_forced_lock_ack; - u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit; - i915_reg_t sfc_usage; - u32 sfc_usage_bit; - u32 sfc_reset_bit; - - switch (engine->class) { - case VIDEO_DECODE_CLASS: - if ((BIT(engine->instance) & vdbox_sfc_access) == 0) - return 0; - - sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; - - sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine); - sfc_forced_lock_ack_bit = GEN11_VCS_SFC_LOCK_ACK_BIT; - - sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine); - sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT; - sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance); - break; - - case VIDEO_ENHANCEMENT_CLASS: - sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; - - sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine); - sfc_forced_lock_ack_bit = GEN11_VECS_SFC_LOCK_ACK_BIT; - - sfc_usage = GEN11_VECS_SFC_USAGE(engine); - sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT; - sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance); - break; - - default: - return 0; - } - - /* - * Tell the engine that a software reset is going to happen. The engine - * will then try to force lock the SFC (if currently locked, it will - * remain so until we tell the engine it is safe to unlock; if currently - * unlocked, it will ignore this and all new lock requests). If SFC - * ends up being locked to the engine we want to reset, we have to reset - * it as well (we will unlock it once the reset sequence is completed). - */ - I915_WRITE_FW(sfc_forced_lock, - I915_READ_FW(sfc_forced_lock) | sfc_forced_lock_bit); - - if (__intel_wait_for_register_fw(dev_priv, - sfc_forced_lock_ack, - sfc_forced_lock_ack_bit, - sfc_forced_lock_ack_bit, - 1000, 0, NULL)) { - DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); - return 0; - } - - if (I915_READ_FW(sfc_usage) & sfc_usage_bit) - return sfc_reset_bit; - - return 0; -} - -static void gen11_unlock_sfc(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine) -{ - u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access; - i915_reg_t sfc_forced_lock; - u32 sfc_forced_lock_bit; - - switch (engine->class) { - case VIDEO_DECODE_CLASS: - if ((BIT(engine->instance) & vdbox_sfc_access) == 0) - return; - - sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; - break; - - case VIDEO_ENHANCEMENT_CLASS: - sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; - break; - - default: - return; - } - - I915_WRITE_FW(sfc_forced_lock, - I915_READ_FW(sfc_forced_lock) & ~sfc_forced_lock_bit); -} - -/** - * gen11_reset_engines - reset individual engines - * @dev_priv: i915 device - * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset - * - * This function will reset the individual engines that are set in engine_mask. - * If you provide ALL_ENGINES as mask, full global domain reset will be issued. - * - * Note: It is responsibility of the caller to handle the difference between - * asking full domain reset versus reset for all available individual engines. - * - * Returns 0 on success, nonzero on error. - */ -static int gen11_reset_engines(struct drm_i915_private *dev_priv, - unsigned int engine_mask) -{ - const u32 hw_engine_mask[I915_NUM_ENGINES] = { - [RCS] = GEN11_GRDOM_RENDER, - [BCS] = GEN11_GRDOM_BLT, - [VCS] = GEN11_GRDOM_MEDIA, - [VCS2] = GEN11_GRDOM_MEDIA2, - [VCS3] = GEN11_GRDOM_MEDIA3, - [VCS4] = GEN11_GRDOM_MEDIA4, - [VECS] = GEN11_GRDOM_VECS, - [VECS2] = GEN11_GRDOM_VECS2, - }; - struct intel_engine_cs *engine; - unsigned int tmp; - u32 hw_mask; - int ret; - - BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES); - - if (engine_mask == ALL_ENGINES) { - hw_mask = GEN11_GRDOM_FULL; - } else { - hw_mask = 0; - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { - hw_mask |= hw_engine_mask[engine->id]; - hw_mask |= gen11_lock_sfc(dev_priv, engine); - } - } - - ret = gen6_hw_domain_reset(dev_priv, hw_mask); - - if (engine_mask != ALL_ENGINES) - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) - gen11_unlock_sfc(dev_priv, engine); - return ret; } @@ -2184,202 +1819,15 @@ int __intel_wait_for_register(struct drm_i915_private *dev_priv, (reg_value & mask) == value, slow_timeout_ms * 1000, 10, 1000); + /* just trace the final value */ + trace_i915_reg_rw(false, reg, reg_value, sizeof(reg_value), true); + if (out_value) *out_value = reg_value; return ret; } -static int gen8_engine_reset_prepare(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base), - _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET)); - - ret = __intel_wait_for_register_fw(dev_priv, - RING_RESET_CTL(engine->mmio_base), - RESET_CTL_READY_TO_RESET, - RESET_CTL_READY_TO_RESET, - 700, 0, - NULL); - if (ret) - DRM_ERROR("%s: reset request timeout\n", engine->name); - - return ret; -} - -static void gen8_engine_reset_cancel(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base), - _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)); -} - -static int reset_engines(struct drm_i915_private *i915, - unsigned int engine_mask, - unsigned int retry) -{ - if (INTEL_GEN(i915) >= 11) - return gen11_reset_engines(i915, engine_mask); - else - return gen6_reset_engines(i915, engine_mask, retry); -} - -static int gen8_reset_engines(struct drm_i915_private *dev_priv, - unsigned int engine_mask, - unsigned int retry) -{ - struct intel_engine_cs *engine; - const bool reset_non_ready = retry >= 1; - unsigned int tmp; - int ret; - - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { - ret = gen8_engine_reset_prepare(engine); - if (ret && !reset_non_ready) - goto skip_reset; - - /* - * If this is not the first failed attempt to prepare, - * we decide to proceed anyway. - * - * By doing so we risk context corruption and with - * some gens (kbl), possible system hang if reset - * happens during active bb execution. - * - * We rather take context corruption instead of - * failed reset with a wedged driver/gpu. And - * active bb execution case should be covered by - * i915_stop_engines we have before the reset. - */ - } - - ret = reset_engines(dev_priv, engine_mask, retry); - -skip_reset: - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) - gen8_engine_reset_cancel(engine); - - return ret; -} - -typedef int (*reset_func)(struct drm_i915_private *, - unsigned int engine_mask, unsigned int retry); - -static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv) -{ - if (!i915_modparams.reset) - return NULL; - - if (INTEL_GEN(dev_priv) >= 8) - return gen8_reset_engines; - else if (INTEL_GEN(dev_priv) >= 6) - return gen6_reset_engines; - else if (IS_GEN(dev_priv, 5)) - return ironlake_do_reset; - else if (IS_G4X(dev_priv)) - return g4x_do_reset; - else if (IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) - return g33_do_reset; - else if (INTEL_GEN(dev_priv) >= 3) - return i915_do_reset; - else - return NULL; -} - -int intel_gpu_reset(struct drm_i915_private *dev_priv, - const unsigned int engine_mask) -{ - reset_func reset = intel_get_gpu_reset(dev_priv); - unsigned int retry; - int ret; - - GEM_BUG_ON(!engine_mask); - - /* - * We want to perform per-engine reset from atomic context (e.g. - * softirq), which imposes the constraint that we cannot sleep. - * However, experience suggests that spending a bit of time waiting - * for a reset helps in various cases, so for a full-device reset - * we apply the opposite rule and wait if we want to. As we should - * always follow up a failed per-engine reset with a full device reset, - * being a little faster, stricter and more error prone for the - * atomic case seems an acceptable compromise. - * - * Unfortunately this leads to a bimodal routine, when the goal was - * to have a single reset function that worked for resetting any - * number of engines simultaneously. - */ - might_sleep_if(engine_mask == ALL_ENGINES); - - /* - * If the power well sleeps during the reset, the reset - * request may be dropped and never completes (causing -EIO). - */ - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - for (retry = 0; retry < 3; retry++) { - - /* - * We stop engines, otherwise we might get failed reset and a - * dead gpu (on elk). Also as modern gpu as kbl can suffer - * from system hang if batchbuffer is progressing when - * the reset is issued, regardless of READY_TO_RESET ack. - * Thus assume it is best to stop engines on all gens - * where we have a gpu reset. - * - * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) - * - * WaMediaResetMainRingCleanup:ctg,elk (presumably) - * - * FIXME: Wa for more modern gens needs to be validated - */ - i915_stop_engines(dev_priv, engine_mask); - - ret = -ENODEV; - if (reset) { - ret = reset(dev_priv, engine_mask, retry); - GEM_TRACE("engine_mask=%x, ret=%d, retry=%d\n", - engine_mask, ret, retry); - } - if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES) - break; - - cond_resched(); - } - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - - return ret; -} - -bool intel_has_gpu_reset(struct drm_i915_private *dev_priv) -{ - return intel_get_gpu_reset(dev_priv) != NULL; -} - -bool intel_has_reset_engine(struct drm_i915_private *dev_priv) -{ - return (INTEL_INFO(dev_priv)->has_reset_engine && - i915_modparams.reset >= 2); -} - -int intel_reset_guc(struct drm_i915_private *dev_priv) -{ - u32 guc_domain = INTEL_GEN(dev_priv) >= 11 ? GEN11_GRDOM_GUC : - GEN9_GRDOM_GUC; - int ret; - - GEM_BUG_ON(!HAS_GUC(dev_priv)); - - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - ret = gen6_hw_domain_reset(dev_priv, guc_domain); - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - - return ret; -} - bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv) { return check_for_unclaimed_mmio(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_vdsc.c b/drivers/gpu/drm/i915/intel_vdsc.c index 48537827616f..23abf03736e7 100644 --- a/drivers/gpu/drm/i915/intel_vdsc.c +++ b/drivers/gpu/drm/i915/intel_vdsc.c @@ -1082,6 +1082,6 @@ void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state) I915_WRITE(dss_ctl2_reg, dss_ctl2_val); /* Disable Power wells for VDSC/joining */ - intel_display_power_put(dev_priv, - intel_dsc_power_domain(old_crtc_state)); + intel_display_power_put_unchecked(dev_priv, + intel_dsc_power_domain(old_crtc_state)); } diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 3210ad4e08f7..15f4a6dee5aa 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -142,7 +142,8 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) } static void -__wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) +wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, + u32 val) { struct i915_wa wa = { .reg = reg, @@ -153,16 +154,32 @@ __wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) _wa_add(wal, &wa); } -#define WA_REG(addr, mask, val) __wa_add(wal, (addr), (mask), (val)) +static void +wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); +} + +static void +wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, ~0, val); +} + +static void +wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, val, val); +} #define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) + wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) #define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) + wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask)) #define WA_SET_FIELD_MASKED(addr, mask, value) \ - WA_REG(addr, (mask), _MASKED_FIELD(mask, value)) + wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine) { @@ -532,6 +549,12 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine) if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); + + /* WaEnableFloatBlendOptimization:icl */ + wa_write_masked_or(wal, + GEN10_CACHE_MODE_SS, + 0, /* write-only, so skip validation */ + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); } void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) @@ -603,43 +626,6 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) } static void -wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ - struct i915_wa wa = { - .reg = reg, - .mask = val, - .val = _MASKED_BIT_ENABLE(val) - }; - - _wa_add(wal, &wa); -} - -static void -wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, - u32 val) -{ - struct i915_wa wa = { - .reg = reg, - .mask = mask, - .val = val - }; - - _wa_add(wal, &wa); -} - -static void -wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ - wa_write_masked_or(wal, reg, ~0, val); -} - -static void -wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ - wa_write_masked_or(wal, reg, val, val); -} - -static void gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { /* WaDisableKillLogic:bxt,skl,kbl */ diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 6c10734e948d..a9a2fa35876f 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1449,7 +1449,7 @@ static int igt_ppgtt_pin_update(void *arg) * huge-gtt-pages. */ - if (!HAS_FULL_48BIT_PPGTT(dev_priv)) { + if (!ppgtt || !i915_vm_is_48bit(&ppgtt->vm)) { pr_info("48b PPGTT not supported, skipping\n"); return 0; } @@ -1756,6 +1756,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) }; struct drm_file *file; struct i915_gem_context *ctx; + intel_wakeref_t wakeref; int err; if (!HAS_PPGTT(dev_priv)) { @@ -1771,7 +1772,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) return PTR_ERR(file); mutex_lock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); ctx = live_context(dev_priv, file); if (IS_ERR(ctx)) { @@ -1785,7 +1786,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) err = i915_subtests(tests, ctx); out_unlock: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); mutex_unlock(&dev_priv->drm.struct_mutex); mock_file_free(dev_priv, file); diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c new file mode 100644 index 000000000000..337b1f98b923 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -0,0 +1,157 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_selftest.h" + +#include "igt_flush_test.h" +#include "lib_sw_fence.h" + +struct live_active { + struct i915_active base; + bool retired; +}; + +static void __live_active_retire(struct i915_active *base) +{ + struct live_active *active = container_of(base, typeof(*active), base); + + active->retired = true; +} + +static int __live_active_setup(struct drm_i915_private *i915, + struct live_active *active) +{ + struct intel_engine_cs *engine; + struct i915_sw_fence *submit; + enum intel_engine_id id; + unsigned int count = 0; + int err = 0; + + submit = heap_fence_create(GFP_KERNEL); + if (!submit) + return -ENOMEM; + + i915_active_init(i915, &active->base, __live_active_retire); + active->retired = false; + + if (!i915_active_acquire(&active->base)) { + pr_err("First i915_active_acquire should report being idle\n"); + err = -EINVAL; + goto out; + } + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + rq = i915_request_alloc(engine, i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, + submit, + GFP_KERNEL); + if (err >= 0) + err = i915_active_ref(&active->base, + rq->fence.context, rq); + i915_request_add(rq); + if (err) { + pr_err("Failed to track active ref!\n"); + break; + } + + count++; + } + + i915_active_release(&active->base); + if (active->retired && count) { + pr_err("i915_active retired before submission!\n"); + err = -EINVAL; + } + if (active->base.count != count) { + pr_err("i915_active not tracking all requests, found %d, expected %d\n", + active->base.count, count); + err = -EINVAL; + } + +out: + i915_sw_fence_commit(submit); + heap_fence_put(submit); + + return err; +} + +static int live_active_wait(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct live_active active; + intel_wakeref_t wakeref; + int err; + + /* Check that we get a callback when requests retire upon waiting */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + err = __live_active_setup(i915, &active); + + i915_active_wait(&active.base); + if (!active.retired) { + pr_err("i915_active not retired after waiting!\n"); + err = -EINVAL; + } + + i915_active_fini(&active.base); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_active_retire(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct live_active active; + intel_wakeref_t wakeref; + int err; + + /* Check that we get a callback when requests are indirectly retired */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + err = __live_active_setup(i915, &active); + + /* waits for & retires all requests */ + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + if (!active.retired) { + pr_err("i915_active not retired after flushing!\n"); + err = -EINVAL; + } + + i915_active_fini(&active.base); + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_active_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_active_wait), + SUBTEST(live_active_retire), + }; + + if (i915_terminally_wedged(&i915->gpu_error)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index bdcc53e15e75..e77b7ed449ae 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -16,9 +16,10 @@ static int switch_to_context(struct drm_i915_private *i915, { struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); for_each_engine(engine, i915, id) { struct i915_request *rq; @@ -32,7 +33,7 @@ static int switch_to_context(struct drm_i915_private *i915, i915_request_add(rq); } - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); return err; } @@ -65,7 +66,9 @@ static void trash_stolen(struct drm_i915_private *i915) static void simulate_hibernate(struct drm_i915_private *i915) { - intel_runtime_pm_get(i915); + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(i915); /* * As a final sting in the tail, invalidate stolen. Under a real S4, @@ -76,7 +79,7 @@ static void simulate_hibernate(struct drm_i915_private *i915) */ trash_stolen(i915); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); } static int pm_prepare(struct drm_i915_private *i915) @@ -93,39 +96,39 @@ static int pm_prepare(struct drm_i915_private *i915) static void pm_suspend(struct drm_i915_private *i915) { - intel_runtime_pm_get(i915); - - i915_gem_suspend_gtt_mappings(i915); - i915_gem_suspend_late(i915); + intel_wakeref_t wakeref; - intel_runtime_pm_put(i915); + with_intel_runtime_pm(i915, wakeref) { + i915_gem_suspend_gtt_mappings(i915); + i915_gem_suspend_late(i915); + } } static void pm_hibernate(struct drm_i915_private *i915) { - intel_runtime_pm_get(i915); + intel_wakeref_t wakeref; - i915_gem_suspend_gtt_mappings(i915); + with_intel_runtime_pm(i915, wakeref) { + i915_gem_suspend_gtt_mappings(i915); - i915_gem_freeze(i915); - i915_gem_freeze_late(i915); - - intel_runtime_pm_put(i915); + i915_gem_freeze(i915); + i915_gem_freeze_late(i915); + } } static void pm_resume(struct drm_i915_private *i915) { + intel_wakeref_t wakeref; + /* * Both suspend and hibernate follow the same wakeup path and assume * that runtime-pm just works. */ - intel_runtime_pm_get(i915); - - intel_engines_sanitize(i915, false); - i915_gem_sanitize(i915); - i915_gem_resume(i915); - - intel_runtime_pm_put(i915); + with_intel_runtime_pm(i915, wakeref) { + intel_engines_sanitize(i915, false); + i915_gem_sanitize(i915); + i915_gem_resume(i915); + } } static int igt_gem_suspend(void *arg) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index f7392c1ffe75..fd89a5a33c1a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -279,6 +279,7 @@ static int igt_gem_coherency(void *arg) struct drm_i915_private *i915 = arg; const struct igt_coherency_mode *read, *write, *over; struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; unsigned long count, n; u32 *offsets, *values; int err = 0; @@ -298,7 +299,7 @@ static int igt_gem_coherency(void *arg) values = offsets + ncachelines; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); for (over = igt_coherency_mode; over->name; over++) { if (!over->set) continue; @@ -376,7 +377,7 @@ static int igt_gem_coherency(void *arg) } } unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); kfree(offsets); return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index d00cdf3c2939..d00d0bb07784 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -24,9 +24,13 @@ #include <linux/prime_numbers.h> +#include "../i915_reset.h" #include "../i915_selftest.h" #include "i915_random.h" #include "igt_flush_test.h" +#include "igt_live_test.h" +#include "igt_reset.h" +#include "igt_spinner.h" #include "mock_drm.h" #include "mock_gem_device.h" @@ -34,84 +38,6 @@ #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) -struct live_test { - struct drm_i915_private *i915; - const char *func; - const char *name; - - unsigned int reset_global; - unsigned int reset_engine[I915_NUM_ENGINES]; -}; - -static int begin_live_test(struct live_test *t, - struct drm_i915_private *i915, - const char *func, - const char *name) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - t->i915 = i915; - t->func = func; - t->name = name; - - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (err) { - pr_err("%s(%s): failed to idle before, with err=%d!", - func, name, err); - return err; - } - - i915->gpu_error.missed_irq_rings = 0; - t->reset_global = i915_reset_count(&i915->gpu_error); - - for_each_engine(engine, i915, id) - t->reset_engine[id] = - i915_reset_engine_count(&i915->gpu_error, engine); - - return 0; -} - -static int end_live_test(struct live_test *t) -{ - struct drm_i915_private *i915 = t->i915; - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - return -EIO; - - if (t->reset_global != i915_reset_count(&i915->gpu_error)) { - pr_err("%s(%s): GPU was reset %d times!\n", - t->func, t->name, - i915_reset_count(&i915->gpu_error) - t->reset_global); - return -EIO; - } - - for_each_engine(engine, i915, id) { - if (t->reset_engine[id] == - i915_reset_engine_count(&i915->gpu_error, engine)) - continue; - - pr_err("%s(%s): engine '%s' was reset %d times!\n", - t->func, t->name, engine->name, - i915_reset_engine_count(&i915->gpu_error, engine) - - t->reset_engine[id]); - return -EIO; - } - - if (i915->gpu_error.missed_irq_rings) { - pr_err("%s(%s): Missed interrupts on engines %lx\n", - t->func, t->name, i915->gpu_error.missed_irq_rings); - return -EIO; - } - - return 0; -} - static int live_nop_switch(void *arg) { const unsigned int nctx = 1024; @@ -119,8 +45,9 @@ static int live_nop_switch(void *arg) struct intel_engine_cs *engine; struct i915_gem_context **ctx; enum intel_engine_id id; + intel_wakeref_t wakeref; + struct igt_live_test t; struct drm_file *file; - struct live_test t; unsigned long n; int err = -ENODEV; @@ -140,7 +67,7 @@ static int live_nop_switch(void *arg) return PTR_ERR(file); mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); if (!ctx) { @@ -184,7 +111,7 @@ static int live_nop_switch(void *arg) pr_info("Populated %d contexts on %s in %lluns\n", nctx, engine->name, ktime_to_ns(times[1] - times[0])); - err = begin_live_test(&t, i915, __func__, engine->name); + err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) goto out_unlock; @@ -232,7 +159,7 @@ static int live_nop_switch(void *arg) break; } - err = end_live_test(&t); + err = igt_live_test_end(&t); if (err) goto out_unlock; @@ -243,7 +170,7 @@ static int live_nop_switch(void *arg) } out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); return err; @@ -553,10 +480,10 @@ static int igt_ctx_exec(void *arg) struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj = NULL; unsigned long ncontexts, ndwords, dw; + struct igt_live_test t; struct drm_file *file; IGT_TIMEOUT(end_time); LIST_HEAD(objects); - struct live_test t; int err = -ENODEV; /* @@ -574,7 +501,7 @@ static int igt_ctx_exec(void *arg) mutex_lock(&i915->drm.struct_mutex); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -593,6 +520,8 @@ static int igt_ctx_exec(void *arg) } for_each_engine(engine, i915, id) { + intel_wakeref_t wakeref; + if (!engine->context_size) continue; /* No logical context support in HW */ @@ -607,9 +536,9 @@ static int igt_ctx_exec(void *arg) } } - intel_runtime_pm_get(i915); - err = gpu_fill(obj, ctx, engine, dw); - intel_runtime_pm_put(i915); + err = 0; + with_intel_runtime_pm(i915, wakeref) + err = gpu_fill(obj, ctx, engine, dw); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), @@ -642,7 +571,7 @@ static int igt_ctx_exec(void *arg) } out_unlock: - if (end_live_test(&t)) + if (igt_live_test_end(&t)) err = -EIO; mutex_unlock(&i915->drm.struct_mutex); @@ -650,6 +579,469 @@ out_unlock: return err; } +static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj; + u32 *cmd; + int err; + + if (INTEL_GEN(vma->vm->i915) < 8) + return ERR_PTR(-EINVAL); + + obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + *cmd++ = MI_STORE_REGISTER_MEM_GEN8; + *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); + *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = upper_32_bits(vma->node.start); + *cmd = MI_BATCH_BUFFER_END; + + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int +emit_rpcs_query(struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct i915_request **rq_out) +{ + struct i915_request *rq; + struct i915_vma *batch; + struct i915_vma *vma; + int err; + + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + + vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + batch = rpcs_query_batch(vma); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_vma; + } + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = engine->emit_bb_start(rq, batch->node.start, batch->node.size, 0); + if (err) + goto err_request; + + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto skip_request; + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; + + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + i915_vma_unpin(vma); + + *rq_out = i915_request_get(rq); + + i915_request_add(rq); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_batch: + i915_vma_unpin(batch); +err_vma: + i915_vma_unpin(vma); + + return err; +} + +#define TEST_IDLE BIT(0) +#define TEST_BUSY BIT(1) +#define TEST_RESET BIT(2) + +static int +__sseu_prepare(struct drm_i915_private *i915, + const char *name, + unsigned int flags, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct igt_spinner **spin_out) +{ + int ret = 0; + + if (flags & (TEST_BUSY | TEST_RESET)) { + struct igt_spinner *spin; + struct i915_request *rq; + + spin = kzalloc(sizeof(*spin), GFP_KERNEL); + if (!spin) { + ret = -ENOMEM; + goto out; + } + + ret = igt_spinner_init(spin, i915); + if (ret) + return ret; + + rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + igt_spinner_fini(spin); + kfree(spin); + goto out; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(spin, rq)) { + pr_err("%s: Spinner failed to start!\n", name); + igt_spinner_end(spin); + igt_spinner_fini(spin); + kfree(spin); + ret = -ETIMEDOUT; + goto out; + } + + *spin_out = spin; + } + +out: + return ret; +} + +static int +__read_slice_count(struct drm_i915_private *i915, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, + struct igt_spinner *spin, + u32 *rpcs) +{ + struct i915_request *rq = NULL; + u32 s_mask, s_shift; + unsigned int cnt; + u32 *buf, val; + long ret; + + ret = emit_rpcs_query(obj, ctx, engine, &rq); + if (ret) + return ret; + + if (spin) + igt_spinner_end(spin); + + ret = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + if (ret < 0) + return ret; + + buf = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + return ret; + } + + if (INTEL_GEN(i915) >= 11) { + s_mask = GEN11_RPCS_S_CNT_MASK; + s_shift = GEN11_RPCS_S_CNT_SHIFT; + } else { + s_mask = GEN8_RPCS_S_CNT_MASK; + s_shift = GEN8_RPCS_S_CNT_SHIFT; + } + + val = *buf; + cnt = (val & s_mask) >> s_shift; + *rpcs = val; + + i915_gem_object_unpin_map(obj); + + return cnt; +} + +static int +__check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, + const char *prefix, const char *suffix) +{ + if (slices == expected) + return 0; + + if (slices < 0) { + pr_err("%s: %s read slice count failed with %d%s\n", + name, prefix, slices, suffix); + return slices; + } + + pr_err("%s: %s slice count %d is not %u%s\n", + name, prefix, slices, expected, suffix); + + pr_info("RPCS=0x%x; %u%sx%u%s\n", + rpcs, slices, + (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", + (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, + (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); + + return -EINVAL; +} + +static int +__sseu_finish(struct drm_i915_private *i915, + const char *name, + unsigned int flags, + struct i915_gem_context *ctx, + struct i915_gem_context *kctx, + struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, + unsigned int expected, + struct igt_spinner *spin) +{ + unsigned int slices = + hweight32(intel_device_default_sseu(i915).slice_mask); + u32 rpcs = 0; + int ret = 0; + + if (flags & TEST_RESET) { + ret = i915_reset_engine(engine, "sseu"); + if (ret) + goto out; + } + + ret = __read_slice_count(i915, ctx, engine, obj, + flags & TEST_RESET ? NULL : spin, &rpcs); + ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); + if (ret) + goto out; + + ret = __read_slice_count(i915, kctx, engine, obj, NULL, &rpcs); + ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); + +out: + if (spin) + igt_spinner_end(spin); + + if ((flags & TEST_IDLE) && ret == 0) { + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (ret) + return ret; + + ret = __read_slice_count(i915, ctx, engine, obj, NULL, &rpcs); + ret = __check_rpcs(name, rpcs, ret, expected, + "Context", " after idle!"); + } + + return ret; +} + +static int +__sseu_test(struct drm_i915_private *i915, + const char *name, + unsigned int flags, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, + struct intel_sseu sseu) +{ + struct igt_spinner *spin = NULL; + struct i915_gem_context *kctx; + int ret; + + kctx = kernel_context(i915); + if (IS_ERR(kctx)) + return PTR_ERR(kctx); + + ret = __sseu_prepare(i915, name, flags, ctx, engine, &spin); + if (ret) + goto out; + + ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu); + if (ret) + goto out; + + ret = __sseu_finish(i915, name, flags, ctx, kctx, engine, obj, + hweight32(sseu.slice_mask), spin); + +out: + if (spin) { + igt_spinner_end(spin); + igt_spinner_fini(spin); + kfree(spin); + } + + kernel_context_close(kctx); + + return ret; +} + +static int +__igt_ctx_sseu(struct drm_i915_private *i915, + const char *name, + unsigned int flags) +{ + struct intel_sseu default_sseu = intel_device_default_sseu(i915); + struct intel_engine_cs *engine = i915->engine[RCS]; + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + struct intel_sseu pg_sseu; + intel_wakeref_t wakeref; + struct drm_file *file; + int ret; + + if (INTEL_GEN(i915) < 9) + return 0; + + if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) + return 0; + + if (hweight32(default_sseu.slice_mask) < 2) + return 0; + + /* + * Gen11 VME friendly power-gated configuration with half enabled + * sub-slices. + */ + pg_sseu = default_sseu; + pg_sseu.slice_mask = 1; + pg_sseu.subslice_mask = + ~(~0 << (hweight32(default_sseu.subslice_mask) / 2)); + + pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", + name, flags, hweight32(default_sseu.slice_mask), + hweight32(pg_sseu.slice_mask)); + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (flags & TEST_RESET) + igt_global_reset_lock(i915); + + mutex_lock(&i915->drm.struct_mutex); + + ctx = i915_gem_create_context(i915, file->driver_priv); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto out_unlock; + } + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); + goto out_unlock; + } + + wakeref = intel_runtime_pm_get(i915); + + /* First set the default mask. */ + ret = __sseu_test(i915, name, flags, ctx, engine, obj, default_sseu); + if (ret) + goto out_fail; + + /* Then set a power-gated configuration. */ + ret = __sseu_test(i915, name, flags, ctx, engine, obj, pg_sseu); + if (ret) + goto out_fail; + + /* Back to defaults. */ + ret = __sseu_test(i915, name, flags, ctx, engine, obj, default_sseu); + if (ret) + goto out_fail; + + /* One last power-gated configuration for the road. */ + ret = __sseu_test(i915, name, flags, ctx, engine, obj, pg_sseu); + if (ret) + goto out_fail; + +out_fail: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + ret = -EIO; + + i915_gem_object_put(obj); + + intel_runtime_pm_put(i915, wakeref); + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (flags & TEST_RESET) + igt_global_reset_unlock(i915); + + mock_file_free(i915, file); + + if (ret) + pr_err("%s: Failed with %d!\n", name, ret); + + return ret; +} + +static int igt_ctx_sseu(void *arg) +{ + struct { + const char *name; + unsigned int flags; + } *phase, phases[] = { + { .name = "basic", .flags = 0 }, + { .name = "idle", .flags = TEST_IDLE }, + { .name = "busy", .flags = TEST_BUSY }, + { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, + { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, + { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, + }; + unsigned int i; + int ret = 0; + + for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); + i++, phase++) + ret = __igt_ctx_sseu(arg, phase->name, phase->flags); + + return ret; +} + static int igt_ctx_readonly(void *arg) { struct drm_i915_private *i915 = arg; @@ -657,11 +1049,11 @@ static int igt_ctx_readonly(void *arg) struct i915_gem_context *ctx; struct i915_hw_ppgtt *ppgtt; unsigned long ndwords, dw; + struct igt_live_test t; struct drm_file *file; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); LIST_HEAD(objects); - struct live_test t; int err = -ENODEV; /* @@ -676,7 +1068,7 @@ static int igt_ctx_readonly(void *arg) mutex_lock(&i915->drm.struct_mutex); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -699,6 +1091,8 @@ static int igt_ctx_readonly(void *arg) unsigned int id; for_each_engine(engine, i915, id) { + intel_wakeref_t wakeref; + if (!intel_engine_can_store_dword(engine)) continue; @@ -713,9 +1107,9 @@ static int igt_ctx_readonly(void *arg) i915_gem_object_set_readonly(obj); } - intel_runtime_pm_get(i915); - err = gpu_fill(obj, ctx, engine, dw); - intel_runtime_pm_put(i915); + err = 0; + with_intel_runtime_pm(i915, wakeref) + err = gpu_fill(obj, ctx, engine, dw); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), @@ -752,7 +1146,7 @@ static int igt_ctx_readonly(void *arg) } out_unlock: - if (end_live_test(&t)) + if (igt_live_test_end(&t)) err = -EIO; mutex_unlock(&i915->drm.struct_mutex); @@ -976,10 +1370,11 @@ static int igt_vm_isolation(void *arg) struct drm_i915_private *i915 = arg; struct i915_gem_context *ctx_a, *ctx_b; struct intel_engine_cs *engine; + intel_wakeref_t wakeref; + struct igt_live_test t; struct drm_file *file; I915_RND_STATE(prng); unsigned long count; - struct live_test t; unsigned int id; u64 vm_total; int err; @@ -998,7 +1393,7 @@ static int igt_vm_isolation(void *arg) mutex_lock(&i915->drm.struct_mutex); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -1022,7 +1417,7 @@ static int igt_vm_isolation(void *arg) GEM_BUG_ON(ctx_b->ppgtt->vm.total != vm_total); vm_total -= I915_GTT_PAGE_SIZE; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); count = 0; for_each_engine(engine, i915, id) { @@ -1067,9 +1462,9 @@ static int igt_vm_isolation(void *arg) count, RUNTIME_INFO(i915)->num_rings); out_rpm: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); out_unlock: - if (end_live_test(&t)) + if (igt_live_test_end(&t)) err = -EIO; mutex_unlock(&i915->drm.struct_mutex); @@ -1165,6 +1560,7 @@ static int igt_switch_to_kernel_context(void *arg) struct intel_engine_cs *engine; struct i915_gem_context *ctx; enum intel_engine_id id; + intel_wakeref_t wakeref; int err; /* @@ -1175,7 +1571,7 @@ static int igt_switch_to_kernel_context(void *arg) */ mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); ctx = kernel_context(i915); if (IS_ERR(ctx)) { @@ -1200,7 +1596,7 @@ out_unlock: if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); kernel_context_close(ctx); @@ -1232,6 +1628,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) SUBTEST(live_nop_switch), SUBTEST(igt_ctx_exec), SUBTEST(igt_ctx_readonly), + SUBTEST(igt_ctx_sseu), SUBTEST(igt_vm_isolation), }; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 4365979d8222..32dce7176f63 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -29,11 +29,23 @@ #include "mock_drm.h" #include "mock_gem_device.h" -static int populate_ggtt(struct drm_i915_private *i915) +static void quirk_add(struct drm_i915_gem_object *obj, + struct list_head *objects) { + /* quirk is only for live tiled objects, use it to declare ownership */ + GEM_BUG_ON(obj->mm.quirked); + obj->mm.quirked = true; + list_add(&obj->st_link, objects); +} + +static int populate_ggtt(struct drm_i915_private *i915, + struct list_head *objects) +{ + unsigned long unbound, bound, count; struct drm_i915_gem_object *obj; u64 size; + count = 0; for (size = 0; size + I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; size += I915_GTT_PAGE_SIZE) { @@ -43,21 +55,36 @@ static int populate_ggtt(struct drm_i915_private *i915) if (IS_ERR(obj)) return PTR_ERR(obj); + quirk_add(obj, objects); + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (IS_ERR(vma)) return PTR_ERR(vma); + + count++; } - if (!list_empty(&i915->mm.unbound_list)) { - size = 0; - list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) - size++; + unbound = 0; + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) + if (obj->mm.quirked) + unbound++; + if (unbound) { + pr_err("%s: Found %lu objects unbound, expected %u!\n", + __func__, unbound, 0); + return -EINVAL; + } - pr_err("Found %lld objects unbound!\n", size); + bound = 0; + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) + if (obj->mm.quirked) + bound++; + if (bound != count) { + pr_err("%s: Found %lu objects bound, expected %lu!\n", + __func__, bound, count); return -EINVAL; } - if (list_empty(&i915->ggtt.vm.inactive_list)) { + if (list_empty(&i915->ggtt.vm.bound_list)) { pr_err("No objects on the GGTT inactive list!\n"); return -EINVAL; } @@ -67,21 +94,26 @@ static int populate_ggtt(struct drm_i915_private *i915) static void unpin_ggtt(struct drm_i915_private *i915) { + struct i915_ggtt *ggtt = &i915->ggtt; struct i915_vma *vma; - list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link) - i915_vma_unpin(vma); + mutex_lock(&ggtt->vm.mutex); + list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link) + if (vma->obj->mm.quirked) + i915_vma_unpin(vma); + mutex_unlock(&ggtt->vm.mutex); } -static void cleanup_objects(struct drm_i915_private *i915) +static void cleanup_objects(struct drm_i915_private *i915, + struct list_head *list) { struct drm_i915_gem_object *obj, *on; - list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, mm.link) - i915_gem_object_put(obj); - - list_for_each_entry_safe(obj, on, &i915->mm.bound_list, mm.link) + list_for_each_entry_safe(obj, on, list, st_link) { + GEM_BUG_ON(!obj->mm.quirked); + obj->mm.quirked = false; i915_gem_object_put(obj); + } mutex_unlock(&i915->drm.struct_mutex); @@ -94,11 +126,12 @@ static int igt_evict_something(void *arg) { struct drm_i915_private *i915 = arg; struct i915_ggtt *ggtt = &i915->ggtt; + LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and try to evict one. */ - err = populate_ggtt(i915); + err = populate_ggtt(i915, &objects); if (err) goto cleanup; @@ -127,7 +160,7 @@ static int igt_evict_something(void *arg) } cleanup: - cleanup_objects(i915); + cleanup_objects(i915, &objects); return err; } @@ -136,13 +169,14 @@ static int igt_overcommit(void *arg) struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; struct i915_vma *vma; + LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and then try to pin one more. * We expect it to fail. */ - err = populate_ggtt(i915); + err = populate_ggtt(i915, &objects); if (err) goto cleanup; @@ -152,6 +186,8 @@ static int igt_overcommit(void *arg) goto cleanup; } + quirk_add(obj, &objects); + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) { pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma)); @@ -160,7 +196,7 @@ static int igt_overcommit(void *arg) } cleanup: - cleanup_objects(i915); + cleanup_objects(i915, &objects); return err; } @@ -172,11 +208,12 @@ static int igt_evict_for_vma(void *arg) .start = 0, .size = 4096, }; + LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and try to evict a range. */ - err = populate_ggtt(i915); + err = populate_ggtt(i915, &objects); if (err) goto cleanup; @@ -199,7 +236,7 @@ static int igt_evict_for_vma(void *arg) } cleanup: - cleanup_objects(i915); + cleanup_objects(i915, &objects); return err; } @@ -222,6 +259,7 @@ static int igt_evict_for_cache_color(void *arg) }; struct drm_i915_gem_object *obj; struct i915_vma *vma; + LIST_HEAD(objects); int err; /* Currently the use of color_adjust is limited to cache domains within @@ -237,6 +275,7 @@ static int igt_evict_for_cache_color(void *arg) goto cleanup; } i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + quirk_add(obj, &objects); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, I915_GTT_PAGE_SIZE | flags); @@ -252,6 +291,7 @@ static int igt_evict_for_cache_color(void *arg) goto cleanup; } i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + quirk_add(obj, &objects); /* Neighbouring; same colour - should fit */ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, @@ -287,7 +327,7 @@ static int igt_evict_for_cache_color(void *arg) cleanup: unpin_ggtt(i915); - cleanup_objects(i915); + cleanup_objects(i915, &objects); ggtt->vm.mm.color_adjust = NULL; return err; } @@ -296,11 +336,12 @@ static int igt_evict_vm(void *arg) { struct drm_i915_private *i915 = arg; struct i915_ggtt *ggtt = &i915->ggtt; + LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and try to evict everything. */ - err = populate_ggtt(i915); + err = populate_ggtt(i915, &objects); if (err) goto cleanup; @@ -322,7 +363,7 @@ static int igt_evict_vm(void *arg) } cleanup: - cleanup_objects(i915); + cleanup_objects(i915, &objects); return err; } @@ -336,6 +377,7 @@ static int igt_evict_contexts(void *arg) struct drm_mm_node node; struct reserved *next; } *reserved = NULL; + intel_wakeref_t wakeref; struct drm_mm_node hole; unsigned long count; int err; @@ -355,7 +397,7 @@ static int igt_evict_contexts(void *arg) return 0; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); /* Reserve a block so that we know we have enough to fit a few rq */ memset(&hole, 0, sizeof(hole)); @@ -400,8 +442,10 @@ static int igt_evict_contexts(void *arg) struct drm_file *file; file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); + if (IS_ERR(file)) { + err = PTR_ERR(file); + break; + } count = 0; mutex_lock(&i915->drm.struct_mutex); @@ -464,7 +508,7 @@ out_locked: } if (drm_mm_node_allocated(&hole)) drm_mm_remove_node(&hole); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -480,14 +524,17 @@ int i915_gem_evict_mock_selftests(void) SUBTEST(igt_overcommit), }; struct drm_i915_private *i915; - int err; + intel_wakeref_t wakeref; + int err = 0; i915 = mock_gem_device(); if (!i915) return -ENOMEM; mutex_lock(&i915->drm.struct_mutex); - err = i915_subtests(tests, i915); + with_intel_runtime_pm(i915, wakeref) + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); drm_dev_put(&i915->drm); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index a9ed0ecc94e2..3850ef4a5ec8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -275,6 +275,7 @@ static int lowlevel_hole(struct drm_i915_private *i915, for (n = 0; n < count; n++) { u64 addr = hole_start + order[n] * BIT_ULL(size); + intel_wakeref_t wakeref; GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); @@ -293,9 +294,9 @@ static int lowlevel_hole(struct drm_i915_private *i915, mock_vma.node.size = BIT_ULL(size); mock_vma.node.start = addr; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); } count = n; @@ -1144,6 +1145,7 @@ static int igt_ggtt_page(void *arg) struct drm_i915_private *i915 = arg; struct i915_ggtt *ggtt = &i915->ggtt; struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; struct drm_mm_node tmp; unsigned int *order, n; int err; @@ -1169,7 +1171,7 @@ static int igt_ggtt_page(void *arg) if (err) goto out_unpin; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); for (n = 0; n < count; n++) { u64 offset = tmp.start + n * PAGE_SIZE; @@ -1216,7 +1218,7 @@ static int igt_ggtt_page(void *arg) kfree(order); out_remove: ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); drm_mm_remove_node(&tmp); out_unpin: i915_gem_object_unpin_pages(obj); @@ -1235,7 +1237,10 @@ static void track_vma_bind(struct i915_vma *vma) __i915_gem_object_pin_pages(obj); vma->pages = obj->mm.pages; - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + + mutex_lock(&vma->vm->mutex); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); + mutex_unlock(&vma->vm->mutex); } static int exercise_mock(struct drm_i915_private *i915, @@ -1265,27 +1270,35 @@ static int exercise_mock(struct drm_i915_private *i915, static int igt_mock_fill(void *arg) { - return exercise_mock(arg, fill_hole); + struct i915_ggtt *ggtt = arg; + + return exercise_mock(ggtt->vm.i915, fill_hole); } static int igt_mock_walk(void *arg) { - return exercise_mock(arg, walk_hole); + struct i915_ggtt *ggtt = arg; + + return exercise_mock(ggtt->vm.i915, walk_hole); } static int igt_mock_pot(void *arg) { - return exercise_mock(arg, pot_hole); + struct i915_ggtt *ggtt = arg; + + return exercise_mock(ggtt->vm.i915, pot_hole); } static int igt_mock_drunk(void *arg) { - return exercise_mock(arg, drunk_hole); + struct i915_ggtt *ggtt = arg; + + return exercise_mock(ggtt->vm.i915, drunk_hole); } static int igt_gtt_reserve(void *arg) { - struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = arg; struct drm_i915_gem_object *obj, *on; LIST_HEAD(objects); u64 total; @@ -1298,11 +1311,12 @@ static int igt_gtt_reserve(void *arg) /* Start by filling the GGTT */ for (total = 0; - total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; - total += 2*I915_GTT_PAGE_SIZE) { + total + 2 * I915_GTT_PAGE_SIZE <= ggtt->vm.total; + total += 2 * I915_GTT_PAGE_SIZE) { struct i915_vma *vma; - obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + obj = i915_gem_object_create_internal(ggtt->vm.i915, + 2 * PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out; @@ -1316,20 +1330,20 @@ static int igt_gtt_reserve(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; } - err = i915_gem_gtt_reserve(&i915->ggtt.vm, &vma->node, + err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, total, obj->cache_level, 0); if (err) { pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.vm.total, err); + total, ggtt->vm.total, err); goto out; } track_vma_bind(vma); @@ -1347,11 +1361,12 @@ static int igt_gtt_reserve(void *arg) /* Now we start forcing evictions */ for (total = I915_GTT_PAGE_SIZE; - total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; - total += 2*I915_GTT_PAGE_SIZE) { + total + 2 * I915_GTT_PAGE_SIZE <= ggtt->vm.total; + total += 2 * I915_GTT_PAGE_SIZE) { struct i915_vma *vma; - obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + obj = i915_gem_object_create_internal(ggtt->vm.i915, + 2 * PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out; @@ -1365,20 +1380,20 @@ static int igt_gtt_reserve(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; } - err = i915_gem_gtt_reserve(&i915->ggtt.vm, &vma->node, + err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, total, obj->cache_level, 0); if (err) { pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.vm.total, err); + total, ggtt->vm.total, err); goto out; } track_vma_bind(vma); @@ -1399,7 +1414,7 @@ static int igt_gtt_reserve(void *arg) struct i915_vma *vma; u64 offset; - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; @@ -1411,18 +1426,18 @@ static int igt_gtt_reserve(void *arg) goto out; } - offset = random_offset(0, i915->ggtt.vm.total, + offset = random_offset(0, ggtt->vm.total, 2*I915_GTT_PAGE_SIZE, I915_GTT_MIN_ALIGNMENT); - err = i915_gem_gtt_reserve(&i915->ggtt.vm, &vma->node, + err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, offset, obj->cache_level, 0); if (err) { pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.vm.total, err); + total, ggtt->vm.total, err); goto out; } track_vma_bind(vma); @@ -1448,7 +1463,7 @@ out: static int igt_gtt_insert(void *arg) { - struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = arg; struct drm_i915_gem_object *obj, *on; struct drm_mm_node tmp = {}; const struct invalid_insert { @@ -1457,8 +1472,8 @@ static int igt_gtt_insert(void *arg) u64 start, end; } invalid_insert[] = { { - i915->ggtt.vm.total + I915_GTT_PAGE_SIZE, 0, - 0, i915->ggtt.vm.total, + ggtt->vm.total + I915_GTT_PAGE_SIZE, 0, + 0, ggtt->vm.total, }, { 2*I915_GTT_PAGE_SIZE, 0, @@ -1488,7 +1503,7 @@ static int igt_gtt_insert(void *arg) /* Check a couple of obviously invalid requests */ for (ii = invalid_insert; ii->size; ii++) { - err = i915_gem_gtt_insert(&i915->ggtt.vm, &tmp, + err = i915_gem_gtt_insert(&ggtt->vm, &tmp, ii->size, ii->alignment, I915_COLOR_UNEVICTABLE, ii->start, ii->end, @@ -1503,11 +1518,12 @@ static int igt_gtt_insert(void *arg) /* Start by filling the GGTT */ for (total = 0; - total + I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; + total + I915_GTT_PAGE_SIZE <= ggtt->vm.total; total += I915_GTT_PAGE_SIZE) { struct i915_vma *vma; - obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(ggtt->vm.i915, + I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out; @@ -1521,15 +1537,15 @@ static int igt_gtt_insert(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; } - err = i915_gem_gtt_insert(&i915->ggtt.vm, &vma->node, + err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, - 0, i915->ggtt.vm.total, + 0, ggtt->vm.total, 0); if (err == -ENOSPC) { /* maxed out the GGTT space */ @@ -1538,7 +1554,7 @@ static int igt_gtt_insert(void *arg) } if (err) { pr_err("i915_gem_gtt_insert (pass 1) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.vm.total, err); + total, ggtt->vm.total, err); goto out; } track_vma_bind(vma); @@ -1550,7 +1566,7 @@ static int igt_gtt_insert(void *arg) list_for_each_entry(obj, &objects, st_link) { struct i915_vma *vma; - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; @@ -1570,7 +1586,7 @@ static int igt_gtt_insert(void *arg) struct i915_vma *vma; u64 offset; - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; @@ -1585,13 +1601,13 @@ static int igt_gtt_insert(void *arg) goto out; } - err = i915_gem_gtt_insert(&i915->ggtt.vm, &vma->node, + err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, - 0, i915->ggtt.vm.total, + 0, ggtt->vm.total, 0); if (err) { pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.vm.total, err); + total, ggtt->vm.total, err); goto out; } track_vma_bind(vma); @@ -1607,11 +1623,12 @@ static int igt_gtt_insert(void *arg) /* And then force evictions */ for (total = 0; - total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; - total += 2*I915_GTT_PAGE_SIZE) { + total + 2 * I915_GTT_PAGE_SIZE <= ggtt->vm.total; + total += 2 * I915_GTT_PAGE_SIZE) { struct i915_vma *vma; - obj = i915_gem_object_create_internal(i915, 2*I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(ggtt->vm.i915, + 2 * I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out; @@ -1625,19 +1642,19 @@ static int igt_gtt_insert(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; } - err = i915_gem_gtt_insert(&i915->ggtt.vm, &vma->node, + err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, - 0, i915->ggtt.vm.total, + 0, ggtt->vm.total, 0); if (err) { pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.vm.total, err); + total, ggtt->vm.total, err); goto out; } track_vma_bind(vma); @@ -1664,17 +1681,25 @@ int i915_gem_gtt_mock_selftests(void) SUBTEST(igt_gtt_insert), }; struct drm_i915_private *i915; + struct i915_ggtt ggtt; int err; i915 = mock_gem_device(); if (!i915) return -ENOMEM; + mock_init_ggtt(i915, &ggtt); + mutex_lock(&i915->drm.struct_mutex); - err = i915_subtests(tests, i915); + err = i915_subtests(tests, &ggtt); + mock_device_flush(i915); mutex_unlock(&i915->drm.struct_mutex); + i915_gem_drain_freed_objects(i915); + + mock_fini_ggtt(&ggtt); drm_dev_put(&i915->drm); + return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index be7ecb66ad11..395ae878e0f7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -308,6 +308,7 @@ static int igt_partial_tiling(void *arg) const unsigned int nreal = 1 << 12; /* largest tile row x2 */ struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; int tiling; int err; @@ -333,7 +334,7 @@ static int igt_partial_tiling(void *arg) } mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (1) { IGT_TIMEOUT(end); @@ -444,7 +445,7 @@ next_tiling: ; } out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_unpin_pages(obj); out: @@ -506,11 +507,13 @@ static void disable_retire_worker(struct drm_i915_private *i915) mutex_lock(&i915->drm.struct_mutex); if (!i915->gt.active_requests++) { - intel_runtime_pm_get(i915); - i915_gem_unpark(i915); - intel_runtime_pm_put(i915); + intel_wakeref_t wakeref; + + with_intel_runtime_pm(i915, wakeref) + i915_gem_unpark(i915); } mutex_unlock(&i915->drm.struct_mutex); + cancel_delayed_work_sync(&i915->gt.retire_work); cancel_delayed_work_sync(&i915->gt.idle_work); } @@ -578,6 +581,8 @@ static int igt_mmap_offset_exhaustion(void *arg) /* Now fill with busy dead objects that we expect to reap */ for (loop = 0; loop < 3; loop++) { + intel_wakeref_t wakeref; + if (i915_terminally_wedged(&i915->gpu_error)) break; @@ -587,10 +592,10 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } + err = 0; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); - err = make_obj_busy(obj); - intel_runtime_pm_put(i915); + with_intel_runtime_pm(i915, wakeref) + err = make_obj_busy(obj); mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index a15713cae3b3..6d766925ad04 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -12,7 +12,9 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) selftest(workarounds, intel_workarounds_live_selftests) +selftest(timelines, i915_timeline_live_selftests) selftest(requests, i915_request_live_selftests) +selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 1b70208eeea7..88e5ab586337 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -15,8 +15,7 @@ selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) selftest(engine, intel_engine_cs_mock_selftests) -selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) -selftest(timelines, i915_gem_timeline_mock_selftests) +selftest(timelines, i915_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index 1f415ce47018..716a3f19f030 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -41,18 +41,37 @@ u64 i915_prandom_u64_state(struct rnd_state *rnd) return x; } -void i915_random_reorder(unsigned int *order, unsigned int count, - struct rnd_state *state) +void i915_prandom_shuffle(void *arr, size_t elsz, size_t count, + struct rnd_state *state) { - unsigned int i, j; + char stack[128]; + + if (WARN_ON(elsz > sizeof(stack) || count > U32_MAX)) + return; + + if (!elsz || !count) + return; + + /* Fisher-Yates shuffle courtesy of Knuth */ + while (--count) { + size_t swp; + + swp = i915_prandom_u32_max_state(count + 1, state); + if (swp == count) + continue; - for (i = 0; i < count; i++) { - BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32)); - j = i915_prandom_u32_max_state(count, state); - swap(order[i], order[j]); + memcpy(stack, arr + count * elsz, elsz); + memcpy(arr + count * elsz, arr + swp * elsz, elsz); + memcpy(arr + swp * elsz, stack, elsz); } } +void i915_random_reorder(unsigned int *order, unsigned int count, + struct rnd_state *state) +{ + i915_prandom_shuffle(order, sizeof(*order), count, state); +} + unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) { unsigned int *order, i; diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h index 7dffedc501ca..8e1ff9c105b6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.h +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -54,4 +54,7 @@ void i915_random_reorder(unsigned int *order, unsigned int count, struct rnd_state *state); +void i915_prandom_shuffle(void *arr, size_t elsz, size_t count, + struct rnd_state *state); + #endif /* !__I915_SELFTESTS_RANDOM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 07e557815308..6733dc5b6b4c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -25,8 +25,12 @@ #include <linux/prime_numbers.h> #include "../i915_selftest.h" +#include "i915_random.h" +#include "igt_live_test.h" +#include "lib_sw_fence.h" #include "mock_context.h" +#include "mock_drm.h" #include "mock_gem_device.h" static int igt_add_request(void *arg) @@ -246,93 +250,285 @@ err_context_0: return err; } -int i915_request_mock_selftests(void) +struct smoketest { + struct intel_engine_cs *engine; + struct i915_gem_context **contexts; + atomic_long_t num_waits, num_fences; + int ncontexts, max_batch; + struct i915_request *(*request_alloc)(struct i915_gem_context *, + struct intel_engine_cs *); +}; + +static struct i915_request * +__mock_request_alloc(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) { - static const struct i915_subtest tests[] = { - SUBTEST(igt_add_request), - SUBTEST(igt_wait_request), - SUBTEST(igt_fence_wait), - SUBTEST(igt_request_rewind), - }; - struct drm_i915_private *i915; - int err; + return mock_request(engine, ctx, 0); +} - i915 = mock_gem_device(); - if (!i915) +static struct i915_request * +__live_request_alloc(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + return i915_request_alloc(engine, ctx); +} + +static int __igt_breadcrumbs_smoketest(void *arg) +{ + struct smoketest *t = arg; + struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; + const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; + const unsigned int total = 4 * t->ncontexts + 1; + unsigned int num_waits = 0, num_fences = 0; + struct i915_request **requests; + I915_RND_STATE(prng); + unsigned int *order; + int err = 0; + + /* + * A very simple test to catch the most egregious of list handling bugs. + * + * At its heart, we simply create oodles of requests running across + * multiple kthreads and enable signaling on them, for the sole purpose + * of stressing our breadcrumb handling. The only inspection we do is + * that the fences were marked as signaled. + */ + + requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); + if (!requests) return -ENOMEM; - err = i915_subtests(tests, i915); - drm_dev_put(&i915->drm); + order = i915_random_order(total, &prng); + if (!order) { + err = -ENOMEM; + goto out_requests; + } - return err; -} + while (!kthread_should_stop()) { + struct i915_sw_fence *submit, *wait; + unsigned int n, count; -struct live_test { - struct drm_i915_private *i915; - const char *func; - const char *name; + submit = heap_fence_create(GFP_KERNEL); + if (!submit) { + err = -ENOMEM; + break; + } - unsigned int reset_count; -}; + wait = heap_fence_create(GFP_KERNEL); + if (!wait) { + i915_sw_fence_commit(submit); + heap_fence_put(submit); + err = ENOMEM; + break; + } -static int begin_live_test(struct live_test *t, - struct drm_i915_private *i915, - const char *func, - const char *name) -{ - int err; + i915_random_reorder(order, total, &prng); + count = 1 + i915_prandom_u32_max_state(max_batch, &prng); - t->i915 = i915; - t->func = func; - t->name = name; + for (n = 0; n < count; n++) { + struct i915_gem_context *ctx = + t->contexts[order[n] % t->ncontexts]; + struct i915_request *rq; - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (err) { - pr_err("%s(%s): failed to idle before, with err=%d!", - func, name, err); - return err; + mutex_lock(BKL); + + rq = t->request_alloc(ctx, t->engine); + if (IS_ERR(rq)) { + mutex_unlock(BKL); + err = PTR_ERR(rq); + count = n; + break; + } + + err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, + submit, + GFP_KERNEL); + + requests[n] = i915_request_get(rq); + i915_request_add(rq); + + mutex_unlock(BKL); + + if (err >= 0) + err = i915_sw_fence_await_dma_fence(wait, + &rq->fence, + 0, + GFP_KERNEL); + + if (err < 0) { + i915_request_put(rq); + count = n; + break; + } + } + + i915_sw_fence_commit(submit); + i915_sw_fence_commit(wait); + + if (!wait_event_timeout(wait->wait, + i915_sw_fence_done(wait), + HZ / 2)) { + struct i915_request *rq = requests[count - 1]; + + pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n", + count, + rq->fence.context, rq->fence.seqno, + t->engine->name); + i915_gem_set_wedged(t->engine->i915); + GEM_BUG_ON(!i915_request_completed(rq)); + i915_sw_fence_wait(wait); + err = -EIO; + } + + for (n = 0; n < count; n++) { + struct i915_request *rq = requests[n]; + + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags)) { + pr_err("%llu:%llu was not signaled!\n", + rq->fence.context, rq->fence.seqno); + err = -EINVAL; + } + + i915_request_put(rq); + } + + heap_fence_put(wait); + heap_fence_put(submit); + + if (err < 0) + break; + + num_fences += count; + num_waits++; + + cond_resched(); } - i915->gpu_error.missed_irq_rings = 0; - t->reset_count = i915_reset_count(&i915->gpu_error); + atomic_long_add(num_fences, &t->num_fences); + atomic_long_add(num_waits, &t->num_waits); - return 0; + kfree(order); +out_requests: + kfree(requests); + return err; } -static int end_live_test(struct live_test *t) +static int mock_breadcrumbs_smoketest(void *arg) { - struct drm_i915_private *i915 = t->i915; + struct drm_i915_private *i915 = arg; + struct smoketest t = { + .engine = i915->engine[RCS], + .ncontexts = 1024, + .max_batch = 1024, + .request_alloc = __mock_request_alloc + }; + unsigned int ncpus = num_online_cpus(); + struct task_struct **threads; + unsigned int n; + int ret = 0; + + /* + * Smoketest our breadcrumb/signal handling for requests across multiple + * threads. A very simple test to only catch the most egregious of bugs. + * See __igt_breadcrumbs_smoketest(); + */ - i915_retire_requests(i915); + threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); + if (!threads) + return -ENOMEM; - if (wait_for(intel_engines_are_idle(i915), 10)) { - pr_err("%s(%s): GPU not idle\n", t->func, t->name); - return -EIO; + t.contexts = + kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); + if (!t.contexts) { + ret = -ENOMEM; + goto out_threads; } - if (t->reset_count != i915_reset_count(&i915->gpu_error)) { - pr_err("%s(%s): GPU was reset %d times!\n", - t->func, t->name, - i915_reset_count(&i915->gpu_error) - t->reset_count); - return -EIO; + mutex_lock(&t.engine->i915->drm.struct_mutex); + for (n = 0; n < t.ncontexts; n++) { + t.contexts[n] = mock_context(t.engine->i915, "mock"); + if (!t.contexts[n]) { + ret = -ENOMEM; + goto out_contexts; + } } + mutex_unlock(&t.engine->i915->drm.struct_mutex); + + for (n = 0; n < ncpus; n++) { + threads[n] = kthread_run(__igt_breadcrumbs_smoketest, + &t, "igt/%d", n); + if (IS_ERR(threads[n])) { + ret = PTR_ERR(threads[n]); + ncpus = n; + break; + } - if (i915->gpu_error.missed_irq_rings) { - pr_err("%s(%s): Missed interrupts on engines %lx\n", - t->func, t->name, i915->gpu_error.missed_irq_rings); - return -EIO; + get_task_struct(threads[n]); } - return 0; + msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); + + for (n = 0; n < ncpus; n++) { + int err; + + err = kthread_stop(threads[n]); + if (err < 0 && !ret) + ret = err; + + put_task_struct(threads[n]); + } + pr_info("Completed %lu waits for %lu fence across %d cpus\n", + atomic_long_read(&t.num_waits), + atomic_long_read(&t.num_fences), + ncpus); + + mutex_lock(&t.engine->i915->drm.struct_mutex); +out_contexts: + for (n = 0; n < t.ncontexts; n++) { + if (!t.contexts[n]) + break; + mock_context_close(t.contexts[n]); + } + mutex_unlock(&t.engine->i915->drm.struct_mutex); + kfree(t.contexts); +out_threads: + kfree(threads); + + return ret; +} + +int i915_request_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_add_request), + SUBTEST(igt_wait_request), + SUBTEST(igt_fence_wait), + SUBTEST(igt_request_rewind), + SUBTEST(mock_breadcrumbs_smoketest), + }; + struct drm_i915_private *i915; + intel_wakeref_t wakeref; + int err = 0; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + with_intel_runtime_pm(i915, wakeref) + err = i915_subtests(tests, i915); + + drm_dev_put(&i915->drm); + + return err; } static int live_nop_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - struct live_test t; + intel_wakeref_t wakeref; + struct igt_live_test t; unsigned int id; int err = -ENODEV; @@ -342,7 +538,7 @@ static int live_nop_request(void *arg) */ mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); for_each_engine(engine, i915, id) { struct i915_request *request = NULL; @@ -350,7 +546,7 @@ static int live_nop_request(void *arg) IGT_TIMEOUT(end_time); ktime_t times[2] = {}; - err = begin_live_test(&t, i915, __func__, engine->name); + err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) goto out_unlock; @@ -392,7 +588,7 @@ static int live_nop_request(void *arg) break; } - err = end_live_test(&t); + err = igt_live_test_end(&t); if (err) goto out_unlock; @@ -403,7 +599,7 @@ static int live_nop_request(void *arg) } out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -478,7 +674,8 @@ static int live_empty_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - struct live_test t; + intel_wakeref_t wakeref; + struct igt_live_test t; struct i915_vma *batch; unsigned int id; int err = 0; @@ -489,7 +686,7 @@ static int live_empty_request(void *arg) */ mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); batch = empty_batch(i915); if (IS_ERR(batch)) { @@ -503,7 +700,7 @@ static int live_empty_request(void *arg) unsigned long n, prime; ktime_t times[2] = {}; - err = begin_live_test(&t, i915, __func__, engine->name); + err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) goto out_batch; @@ -539,7 +736,7 @@ static int live_empty_request(void *arg) break; } - err = end_live_test(&t); + err = igt_live_test_end(&t); if (err) goto out_batch; @@ -553,7 +750,7 @@ out_batch: i915_vma_unpin(batch); i915_vma_put(batch); out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -637,8 +834,9 @@ static int live_all_engines(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; struct i915_request *request[I915_NUM_ENGINES]; + intel_wakeref_t wakeref; + struct igt_live_test t; struct i915_vma *batch; - struct live_test t; unsigned int id; int err; @@ -648,9 +846,9 @@ static int live_all_engines(void *arg) */ mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -722,7 +920,7 @@ static int live_all_engines(void *arg) request[id] = NULL; } - err = end_live_test(&t); + err = igt_live_test_end(&t); out_request: for_each_engine(engine, i915, id) @@ -731,7 +929,7 @@ out_request: i915_vma_unpin(batch); i915_vma_put(batch); out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -742,7 +940,8 @@ static int live_sequential_engines(void *arg) struct i915_request *request[I915_NUM_ENGINES] = {}; struct i915_request *prev = NULL; struct intel_engine_cs *engine; - struct live_test t; + intel_wakeref_t wakeref; + struct igt_live_test t; unsigned int id; int err; @@ -753,9 +952,9 @@ static int live_sequential_engines(void *arg) */ mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -838,7 +1037,7 @@ static int live_sequential_engines(void *arg) GEM_BUG_ON(!i915_request_completed(request[id])); } - err = end_live_test(&t); + err = igt_live_test_end(&t); out_request: for_each_engine(engine, i915, id) { @@ -860,11 +1059,183 @@ out_request: i915_request_put(request[id]); } out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } +static int +max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + struct i915_request *rq; + int ret; + + /* + * Before execlists, all contexts share the same ringbuffer. With + * execlists, each context/engine has a separate ringbuffer and + * for the purposes of this test, inexhaustible. + * + * For the global ringbuffer though, we have to be very careful + * that we do not wrap while preventing the execution of requests + * with a unsignaled fence. + */ + if (HAS_EXECLISTS(ctx->i915)) + return INT_MAX; + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + } else { + int sz; + + ret = rq->ring->size - rq->reserved_space; + i915_request_add(rq); + + sz = rq->ring->emit - rq->head; + if (sz < 0) + sz += rq->ring->size; + ret /= sz; + ret /= 2; /* leave half spare, in case of emergency! */ + } + + return ret; +} + +static int live_breadcrumbs_smoketest(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct smoketest t[I915_NUM_ENGINES]; + unsigned int ncpus = num_online_cpus(); + unsigned long num_waits, num_fences; + struct intel_engine_cs *engine; + struct task_struct **threads; + struct igt_live_test live; + enum intel_engine_id id; + intel_wakeref_t wakeref; + struct drm_file *file; + unsigned int n; + int ret = 0; + + /* + * Smoketest our breadcrumb/signal handling for requests across multiple + * threads. A very simple test to only catch the most egregious of bugs. + * See __igt_breadcrumbs_smoketest(); + * + * On real hardware this time. + */ + + wakeref = intel_runtime_pm_get(i915); + + file = mock_file(i915); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto out_rpm; + } + + threads = kcalloc(ncpus * I915_NUM_ENGINES, + sizeof(*threads), + GFP_KERNEL); + if (!threads) { + ret = -ENOMEM; + goto out_file; + } + + memset(&t[0], 0, sizeof(t[0])); + t[0].request_alloc = __live_request_alloc; + t[0].ncontexts = 64; + t[0].contexts = kmalloc_array(t[0].ncontexts, + sizeof(*t[0].contexts), + GFP_KERNEL); + if (!t[0].contexts) { + ret = -ENOMEM; + goto out_threads; + } + + mutex_lock(&i915->drm.struct_mutex); + for (n = 0; n < t[0].ncontexts; n++) { + t[0].contexts[n] = live_context(i915, file); + if (!t[0].contexts[n]) { + ret = -ENOMEM; + goto out_contexts; + } + } + + ret = igt_live_test_begin(&live, i915, __func__, ""); + if (ret) + goto out_contexts; + + for_each_engine(engine, i915, id) { + t[id] = t[0]; + t[id].engine = engine; + t[id].max_batch = max_batches(t[0].contexts[0], engine); + if (t[id].max_batch < 0) { + ret = t[id].max_batch; + mutex_unlock(&i915->drm.struct_mutex); + goto out_flush; + } + /* One ring interleaved between requests from all cpus */ + t[id].max_batch /= num_online_cpus() + 1; + pr_debug("Limiting batches to %d requests on %s\n", + t[id].max_batch, engine->name); + + for (n = 0; n < ncpus; n++) { + struct task_struct *tsk; + + tsk = kthread_run(__igt_breadcrumbs_smoketest, + &t[id], "igt/%d.%d", id, n); + if (IS_ERR(tsk)) { + ret = PTR_ERR(tsk); + mutex_unlock(&i915->drm.struct_mutex); + goto out_flush; + } + + get_task_struct(tsk); + threads[id * ncpus + n] = tsk; + } + } + mutex_unlock(&i915->drm.struct_mutex); + + msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); + +out_flush: + num_waits = 0; + num_fences = 0; + for_each_engine(engine, i915, id) { + for (n = 0; n < ncpus; n++) { + struct task_struct *tsk = threads[id * ncpus + n]; + int err; + + if (!tsk) + continue; + + err = kthread_stop(tsk); + if (err < 0 && !ret) + ret = err; + + put_task_struct(tsk); + } + + num_waits += atomic_long_read(&t[id].num_waits); + num_fences += atomic_long_read(&t[id].num_fences); + } + pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", + num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus); + + mutex_lock(&i915->drm.struct_mutex); + ret = igt_live_test_end(&live) ?: ret; +out_contexts: + mutex_unlock(&i915->drm.struct_mutex); + kfree(t[0].contexts); +out_threads: + kfree(threads); +out_file: + mock_file_free(i915, file); +out_rpm: + intel_runtime_pm_put(i915, wakeref); + + return ret; +} + int i915_request_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { @@ -872,6 +1243,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_all_engines), SUBTEST(live_sequential_engines), SUBTEST(live_empty_request), + SUBTEST(live_breadcrumbs_smoketest), }; if (i915_terminally_wedged(&i915->gpu_error)) diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 86c54ea37f48..10ef0e636a24 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -197,6 +197,49 @@ int i915_live_selftests(struct pci_dev *pdev) return 0; } +static bool apply_subtest_filter(const char *caller, const char *name) +{ + char *filter, *sep, *tok; + bool result = true; + + filter = kstrdup(i915_selftest.filter, GFP_KERNEL); + for (sep = filter; (tok = strsep(&sep, ","));) { + bool allow = true; + char *sl; + + if (*tok == '!') { + allow = false; + tok++; + } + + if (*tok == '\0') + continue; + + sl = strchr(tok, '/'); + if (sl) { + *sl++ = '\0'; + if (strcmp(tok, caller)) { + if (allow) + result = false; + continue; + } + tok = sl; + } + + if (strcmp(tok, name)) { + if (allow) + result = false; + continue; + } + + result = allow; + break; + } + kfree(filter); + + return result; +} + int __i915_subtests(const char *caller, const struct i915_subtest *st, unsigned int count, @@ -209,6 +252,9 @@ int __i915_subtests(const char *caller, if (signal_pending(current)) return -EINTR; + if (!apply_subtest_filter(caller, st->name)) + continue; + pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name); GEM_TRACE("Running %s/%s\n", caller, st->name); @@ -244,6 +290,7 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...) module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400); module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); +module_param_named(st_filter, i915_selftest.filter, charp, 0400); module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 19f1c6a5c8fb..12ea69b1a1e5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -4,12 +4,155 @@ * Copyright © 2017-2018 Intel Corporation */ +#include <linux/prime_numbers.h> + #include "../i915_selftest.h" #include "i915_random.h" +#include "igt_flush_test.h" #include "mock_gem_device.h" #include "mock_timeline.h" +static struct page *hwsp_page(struct i915_timeline *tl) +{ + struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + return sg_page(obj->mm.pages->sgl); +} + +static unsigned long hwsp_cacheline(struct i915_timeline *tl) +{ + unsigned long address = (unsigned long)page_address(hwsp_page(tl)); + + return (address + tl->hwsp_offset) / CACHELINE_BYTES; +} + +#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) + +struct mock_hwsp_freelist { + struct drm_i915_private *i915; + struct radix_tree_root cachelines; + struct i915_timeline **history; + unsigned long count, max; + struct rnd_state prng; +}; + +enum { + SHUFFLE = BIT(0), +}; + +static void __mock_hwsp_record(struct mock_hwsp_freelist *state, + unsigned int idx, + struct i915_timeline *tl) +{ + tl = xchg(&state->history[idx], tl); + if (tl) { + radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); + i915_timeline_put(tl); + } +} + +static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, + unsigned int count, + unsigned int flags) +{ + struct i915_timeline *tl; + unsigned int idx; + + while (count--) { + unsigned long cacheline; + int err; + + tl = i915_timeline_create(state->i915, "mock", NULL); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + cacheline = hwsp_cacheline(tl); + err = radix_tree_insert(&state->cachelines, cacheline, tl); + if (err) { + if (err == -EEXIST) { + pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", + cacheline); + } + i915_timeline_put(tl); + return err; + } + + idx = state->count++ % state->max; + __mock_hwsp_record(state, idx, tl); + } + + if (flags & SHUFFLE) + i915_prandom_shuffle(state->history, + sizeof(*state->history), + min(state->count, state->max), + &state->prng); + + count = i915_prandom_u32_max_state(min(state->count, state->max), + &state->prng); + while (count--) { + idx = --state->count % state->max; + __mock_hwsp_record(state, idx, NULL); + } + + return 0; +} + +static int mock_hwsp_freelist(void *arg) +{ + struct mock_hwsp_freelist state; + const struct { + const char *name; + unsigned int flags; + } phases[] = { + { "linear", 0 }, + { "shuffled", SHUFFLE }, + { }, + }, *p; + unsigned int na; + int err = 0; + + INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); + state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); + + state.i915 = mock_gem_device(); + if (!state.i915) + return -ENOMEM; + + /* + * Create a bunch of timelines and check that their HWSP do not overlap. + * Free some, and try again. + */ + + state.max = PAGE_SIZE / sizeof(*state.history); + state.count = 0; + state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); + if (!state.history) { + err = -ENOMEM; + goto err_put; + } + + mutex_lock(&state.i915->drm.struct_mutex); + for (p = phases; p->name; p++) { + pr_debug("%s(%s)\n", __func__, p->name); + for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { + err = __mock_hwsp_timeline(&state, na, p->flags); + if (err) + goto out; + } + } + +out: + for (na = 0; na < state.max; na++) + __mock_hwsp_record(&state, na, NULL); + mutex_unlock(&state.i915->drm.struct_mutex); + kfree(state.history); +err_put: + drm_dev_put(&state.i915->drm); + return err; +} + struct __igt_sync { const char *name; u32 seqno; @@ -256,12 +399,331 @@ static int bench_sync(void *arg) return 0; } -int i915_gem_timeline_mock_selftests(void) +int i915_timeline_mock_selftests(void) { static const struct i915_subtest tests[] = { + SUBTEST(mock_hwsp_freelist), SUBTEST(igt_sync), SUBTEST(bench_sync), }; return i915_subtests(tests, NULL); } + +static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (INTEL_GEN(rq->i915) >= 8) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = addr; + *cs++ = 0; + *cs++ = value; + } else if (INTEL_GEN(rq->i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = 0; + *cs++ = addr; + *cs++ = value; + } else { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cs++ = addr; + *cs++ = value; + *cs++ = MI_NOOP; + } + + intel_ring_advance(rq, cs); + + return 0; +} + +static struct i915_request * +tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) +{ + struct i915_request *rq; + int err; + + lockdep_assert_held(&tl->i915->drm.struct_mutex); /* lazy rq refs */ + + err = i915_timeline_pin(tl); + if (err) { + rq = ERR_PTR(err); + goto out; + } + + rq = i915_request_alloc(engine, engine->i915->kernel_context); + if (IS_ERR(rq)) + goto out_unpin; + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); + i915_request_add(rq); + if (err) + rq = ERR_PTR(err); + +out_unpin: + i915_timeline_unpin(tl); +out: + if (IS_ERR(rq)) + pr_err("Failed to write to timeline!\n"); + return rq; +} + +static struct i915_timeline * +checked_i915_timeline_create(struct drm_i915_private *i915) +{ + struct i915_timeline *tl; + + tl = i915_timeline_create(i915, "live", NULL); + if (IS_ERR(tl)) + return tl; + + if (*tl->hwsp_seqno != tl->seqno) { + pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", + *tl->hwsp_seqno, tl->seqno); + i915_timeline_put(tl); + return ERR_PTR(-EINVAL); + } + + return tl; +} + +static int live_hwsp_engine(void *arg) +{ +#define NUM_TIMELINES 4096 + struct drm_i915_private *i915 = arg; + struct i915_timeline **timelines; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count, n; + int err = 0; + + /* + * Create a bunch of timelines and check we can write + * independently to each of their breadcrumb slots. + */ + + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, + sizeof(*timelines), + GFP_KERNEL); + if (!timelines) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + count = 0; + for_each_engine(engine, i915, id) { + if (!intel_engine_can_store_dword(engine)) + continue; + + for (n = 0; n < NUM_TIMELINES; n++) { + struct i915_timeline *tl; + struct i915_request *rq; + + tl = checked_i915_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + i915_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + timelines[count++] = tl; + } + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < count; n++) { + struct i915_timeline *tl = timelines[n]; + + if (!err && *tl->hwsp_seqno != n) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + n, *tl->hwsp_seqno); + err = -EINVAL; + } + i915_timeline_put(tl); + } + + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + kvfree(timelines); + + return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_alternate(void *arg) +{ +#define NUM_TIMELINES 4096 + struct drm_i915_private *i915 = arg; + struct i915_timeline **timelines; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count, n; + int err = 0; + + /* + * Create a bunch of timelines and check we can write + * independently to each of their breadcrumb slots with adjacent + * engines. + */ + + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, + sizeof(*timelines), + GFP_KERNEL); + if (!timelines) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + count = 0; + for (n = 0; n < NUM_TIMELINES; n++) { + for_each_engine(engine, i915, id) { + struct i915_timeline *tl; + struct i915_request *rq; + + if (!intel_engine_can_store_dword(engine)) + continue; + + tl = checked_i915_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + i915_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + timelines[count++] = tl; + } + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < count; n++) { + struct i915_timeline *tl = timelines[n]; + + if (!err && *tl->hwsp_seqno != n) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + n, *tl->hwsp_seqno); + err = -EINVAL; + } + i915_timeline_put(tl); + } + + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + kvfree(timelines); + + return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_recycle(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count; + int err = 0; + + /* + * Check seqno writes into one timeline at a time. We expect to + * recycle the breadcrumb slot between iterations and neither + * want to confuse ourselves or the GPU. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + count = 0; + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + + if (!intel_engine_can_store_dword(engine)) + continue; + + do { + struct i915_timeline *tl; + struct i915_request *rq; + + tl = checked_i915_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + i915_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + if (i915_request_wait(rq, + I915_WAIT_LOCKED, + HZ / 5) < 0) { + pr_err("Wait for timeline writes timed out!\n"); + i915_timeline_put(tl); + err = -EIO; + goto out; + } + + if (*tl->hwsp_seqno != count) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + count, *tl->hwsp_seqno); + err = -EINVAL; + } + + i915_timeline_put(tl); + count++; + + if (err) + goto out; + + i915_timelines_park(i915); /* Encourage recycling! */ + } while (!__igt_timeout(end_time, NULL)); + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +int i915_timeline_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_hwsp_recycle), + SUBTEST(live_hwsp_engine), + SUBTEST(live_hwsp_alternate), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index ffa74290e054..cf1de82741fa 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -28,6 +28,7 @@ #include "mock_gem_device.h" #include "mock_context.h" +#include "mock_gtt.h" static bool assert_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj, @@ -141,7 +142,8 @@ static int create_vmas(struct drm_i915_private *i915, static int igt_vma_create(void *arg) { - struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = arg; + struct drm_i915_private *i915 = ggtt->vm.i915; struct drm_i915_gem_object *obj, *on; struct i915_gem_context *ctx, *cn; unsigned long num_obj, num_ctx; @@ -245,7 +247,7 @@ static bool assert_pin_einval(const struct i915_vma *vma, static int igt_vma_pin1(void *arg) { - struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = arg; const struct pin_mode modes[] = { #define VALID(sz, fl) { .size = (sz), .flags = (fl), .assert = assert_pin_valid, .string = #sz ", " #fl ", (valid) " } #define __INVALID(sz, fl, check, eval) { .size = (sz), .flags = (fl), .assert = (check), .string = #sz ", " #fl ", (invalid " #eval ")" } @@ -256,30 +258,30 @@ static int igt_vma_pin1(void *arg) VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 4096), VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 8192), - VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), - VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), - VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.vm.total - 4096)), - - VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), - INVALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | i915->ggtt.mappable_end), - VALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.vm.total - 4096)), - INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | i915->ggtt.vm.total), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (ggtt->mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (ggtt->mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (ggtt->vm.total - 4096)), + + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (ggtt->mappable_end - 4096)), + INVALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | ggtt->mappable_end), + VALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | (ggtt->vm.total - 4096)), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | ggtt->vm.total), INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | round_down(U64_MAX, PAGE_SIZE)), VALID(4096, PIN_GLOBAL), VALID(8192, PIN_GLOBAL), - VALID(i915->ggtt.mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE), - VALID(i915->ggtt.mappable_end, PIN_GLOBAL | PIN_MAPPABLE), - NOSPACE(i915->ggtt.mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), - VALID(i915->ggtt.vm.total - 4096, PIN_GLOBAL), - VALID(i915->ggtt.vm.total, PIN_GLOBAL), - NOSPACE(i915->ggtt.vm.total + 4096, PIN_GLOBAL), + VALID(ggtt->mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(ggtt->mappable_end, PIN_GLOBAL | PIN_MAPPABLE), + NOSPACE(ggtt->mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(ggtt->vm.total - 4096, PIN_GLOBAL), + VALID(ggtt->vm.total, PIN_GLOBAL), + NOSPACE(ggtt->vm.total + 4096, PIN_GLOBAL), NOSPACE(round_down(U64_MAX, PAGE_SIZE), PIN_GLOBAL), - INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), - INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.vm.total - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (ggtt->mappable_end - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (ggtt->vm.total - 4096)), INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (round_down(U64_MAX, PAGE_SIZE) - 4096)), - VALID(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + VALID(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (ggtt->mappable_end - 4096)), #if !IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) /* Misusing BIAS is a programming error (it is not controllable @@ -287,10 +289,10 @@ static int igt_vma_pin1(void *arg) * However, the tests are still quite interesting for checking * variable start, end and size. */ - NOSPACE(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | i915->ggtt.mappable_end), - NOSPACE(0, PIN_GLOBAL | PIN_OFFSET_BIAS | i915->ggtt.vm.total), - NOSPACE(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), - NOSPACE(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.vm.total - 4096)), + NOSPACE(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | ggtt->mappable_end), + NOSPACE(0, PIN_GLOBAL | PIN_OFFSET_BIAS | ggtt->vm.total), + NOSPACE(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (ggtt->mappable_end - 4096)), + NOSPACE(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (ggtt->vm.total - 4096)), #endif { }, #undef NOSPACE @@ -306,13 +308,13 @@ static int igt_vma_pin1(void *arg) * focusing on error handling of boundary conditions. */ - GEM_BUG_ON(!drm_mm_clean(&i915->ggtt.vm.mm)); + GEM_BUG_ON(!drm_mm_clean(&ggtt->vm.mm)); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + obj = i915_gem_object_create_internal(ggtt->vm.i915, PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); - vma = checked_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = checked_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) goto out; @@ -403,8 +405,8 @@ static unsigned int rotated_size(const struct intel_rotation_plane_info *a, static int igt_vma_rotate(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_address_space *vm = &i915->ggtt.vm; + struct i915_ggtt *ggtt = arg; + struct i915_address_space *vm = &ggtt->vm; struct drm_i915_gem_object *obj; const struct intel_rotation_plane_info planes[] = { { .width = 1, .height = 1, .stride = 1 }, @@ -431,7 +433,7 @@ static int igt_vma_rotate(void *arg) * that the page layout within the rotated VMA match our expectations. */ - obj = i915_gem_object_create_internal(i915, max_pages * PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, max_pages * PAGE_SIZE); if (IS_ERR(obj)) goto out; @@ -602,8 +604,8 @@ static bool assert_pin(struct i915_vma *vma, static int igt_vma_partial(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_address_space *vm = &i915->ggtt.vm; + struct i915_ggtt *ggtt = arg; + struct i915_address_space *vm = &ggtt->vm; const unsigned int npages = 1021; /* prime! */ struct drm_i915_gem_object *obj; const struct phase { @@ -621,7 +623,7 @@ static int igt_vma_partial(void *arg) * we are returned the same VMA when we later request the same range. */ - obj = i915_gem_object_create_internal(i915, npages*PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, npages * PAGE_SIZE); if (IS_ERR(obj)) goto out; @@ -670,7 +672,7 @@ static int igt_vma_partial(void *arg) } count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) count++; if (count != nvma) { pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n", @@ -699,7 +701,7 @@ static int igt_vma_partial(void *arg) i915_vma_unpin(vma); count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) count++; if (count != nvma) { pr_err("(%s) allocated an extra full vma!\n", p->name); @@ -723,17 +725,24 @@ int i915_vma_mock_selftests(void) SUBTEST(igt_vma_partial), }; struct drm_i915_private *i915; + struct i915_ggtt ggtt; int err; i915 = mock_gem_device(); if (!i915) return -ENOMEM; + mock_init_ggtt(i915, &ggtt); + mutex_lock(&i915->drm.struct_mutex); - err = i915_subtests(tests, i915); + err = i915_subtests(tests, &ggtt); + mock_device_flush(i915); mutex_unlock(&i915->drm.struct_mutex); + i915_gem_drain_freed_objects(i915); + + mock_fini_ggtt(&ggtt); drm_dev_put(&i915->drm); + return err; } - diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c new file mode 100644 index 000000000000..3e902761cd16 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -0,0 +1,78 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_drv.h" + +#include "../i915_selftest.h" +#include "igt_flush_test.h" +#include "igt_live_test.h" + +int igt_live_test_begin(struct igt_live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + lockdep_assert_held(&i915->drm.struct_mutex); + + t->i915 = i915; + t->func = func; + t->name = name; + + err = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } + + t->reset_global = i915_reset_count(&i915->gpu_error); + + for_each_engine(engine, i915, id) + t->reset_engine[id] = + i915_reset_engine_count(&i915->gpu_error, engine); + + return 0; +} + +int igt_live_test_end(struct igt_live_test *t) +{ + struct drm_i915_private *i915 = t->i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + return -EIO; + + if (t->reset_global != i915_reset_count(&i915->gpu_error)) { + pr_err("%s(%s): GPU was reset %d times!\n", + t->func, t->name, + i915_reset_count(&i915->gpu_error) - t->reset_global); + return -EIO; + } + + for_each_engine(engine, i915, id) { + if (t->reset_engine[id] == + i915_reset_engine_count(&i915->gpu_error, engine)) + continue; + + pr_err("%s(%s): engine '%s' was reset %d times!\n", + t->func, t->name, engine->name, + i915_reset_engine_count(&i915->gpu_error, engine) - + t->reset_engine[id]); + return -EIO; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h new file mode 100644 index 000000000000..c0e9f99d50de --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef IGT_LIVE_TEST_H +#define IGT_LIVE_TEST_H + +#include "../i915_gem.h" + +struct drm_i915_private; + +struct igt_live_test { + struct drm_i915_private *i915; + const char *func; + const char *name; + + unsigned int reset_global; + unsigned int reset_engine[I915_NUM_ENGINES]; +}; + +/* + * Flush the GPU state before and after the test to ensure that no residual + * code is running on the GPU that may affect this test. Also compare the + * state before and after the test and alert if it unexpectedly changes, + * e.g. if the GPU was reset. + */ +int igt_live_test_begin(struct igt_live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name); +int igt_live_test_end(struct igt_live_test *t); + +#endif /* IGT_LIVE_TEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 0e70df0230b8..9ebd9225684e 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -185,11 +185,6 @@ void igt_spinner_fini(struct igt_spinner *spin) bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq) { - if (!wait_event_timeout(rq->execute, - READ_ONCE(rq->global_seqno), - msecs_to_jiffies(10))) - return false; - return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq), rq->fence.seqno), 10) && diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c deleted file mode 100644 index f03b407fdbe2..000000000000 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "../i915_selftest.h" -#include "i915_random.h" - -#include "mock_gem_device.h" -#include "mock_engine.h" - -static int check_rbtree(struct intel_engine_cs *engine, - const unsigned long *bitmap, - const struct intel_wait *waiters, - const int count) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct rb_node *rb; - int n; - - if (&b->irq_wait->node != rb_first(&b->waiters)) { - pr_err("First waiter does not match first element of wait-tree\n"); - return -EINVAL; - } - - n = find_first_bit(bitmap, count); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = container_of(rb, typeof(*w), node); - int idx = w - waiters; - - if (!test_bit(idx, bitmap)) { - pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n", - idx, w->seqno); - return -EINVAL; - } - - if (n != idx) { - pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n", - idx, w->seqno, n); - return -EINVAL; - } - - n = find_next_bit(bitmap, count, n + 1); - } - - return 0; -} - -static int check_completion(struct intel_engine_cs *engine, - const unsigned long *bitmap, - const struct intel_wait *waiters, - const int count) -{ - int n; - - for (n = 0; n < count; n++) { - if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap)) - continue; - - pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n", - n, waiters[n].seqno, - intel_wait_complete(&waiters[n]) ? "complete" : "active", - test_bit(n, bitmap) ? "active" : "complete"); - return -EINVAL; - } - - return 0; -} - -static int check_rbtree_empty(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - if (b->irq_wait) { - pr_err("Empty breadcrumbs still has a waiter\n"); - return -EINVAL; - } - - if (!RB_EMPTY_ROOT(&b->waiters)) { - pr_err("Empty breadcrumbs, but wait-tree not empty\n"); - return -EINVAL; - } - - return 0; -} - -static int igt_random_insert_remove(void *arg) -{ - const u32 seqno_bias = 0x1000; - I915_RND_STATE(prng); - struct intel_engine_cs *engine = arg; - struct intel_wait *waiters; - const int count = 4096; - unsigned int *order; - unsigned long *bitmap; - int err = -ENOMEM; - int n; - - mock_engine_reset(engine); - - waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); - if (!waiters) - goto out_engines; - - bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), - GFP_KERNEL); - if (!bitmap) - goto out_waiters; - - order = i915_random_order(count, &prng); - if (!order) - goto out_bitmap; - - for (n = 0; n < count; n++) - intel_wait_init_for_seqno(&waiters[n], seqno_bias + n); - - err = check_rbtree(engine, bitmap, waiters, count); - if (err) - goto out_order; - - /* Add and remove waiters into the rbtree in random order. At each - * step, we verify that the rbtree is correctly ordered. - */ - for (n = 0; n < count; n++) { - int i = order[n]; - - intel_engine_add_wait(engine, &waiters[i]); - __set_bit(i, bitmap); - - err = check_rbtree(engine, bitmap, waiters, count); - if (err) - goto out_order; - } - - i915_random_reorder(order, count, &prng); - for (n = 0; n < count; n++) { - int i = order[n]; - - intel_engine_remove_wait(engine, &waiters[i]); - __clear_bit(i, bitmap); - - err = check_rbtree(engine, bitmap, waiters, count); - if (err) - goto out_order; - } - - err = check_rbtree_empty(engine); -out_order: - kfree(order); -out_bitmap: - kfree(bitmap); -out_waiters: - kvfree(waiters); -out_engines: - mock_engine_flush(engine); - return err; -} - -static int igt_insert_complete(void *arg) -{ - const u32 seqno_bias = 0x1000; - struct intel_engine_cs *engine = arg; - struct intel_wait *waiters; - const int count = 4096; - unsigned long *bitmap; - int err = -ENOMEM; - int n, m; - - mock_engine_reset(engine); - - waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); - if (!waiters) - goto out_engines; - - bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), - GFP_KERNEL); - if (!bitmap) - goto out_waiters; - - for (n = 0; n < count; n++) { - intel_wait_init_for_seqno(&waiters[n], n + seqno_bias); - intel_engine_add_wait(engine, &waiters[n]); - __set_bit(n, bitmap); - } - err = check_rbtree(engine, bitmap, waiters, count); - if (err) - goto out_bitmap; - - /* On each step, we advance the seqno so that several waiters are then - * complete (we increase the seqno by increasingly larger values to - * retire more and more waiters at once). All retired waiters should - * be woken and removed from the rbtree, and so that we check. - */ - for (n = 0; n < count; n = m) { - int seqno = 2 * n; - - GEM_BUG_ON(find_first_bit(bitmap, count) != n); - - if (intel_wait_complete(&waiters[n])) { - pr_err("waiter[%d, seqno=%d] completed too early\n", - n, waiters[n].seqno); - err = -EINVAL; - goto out_bitmap; - } - - /* complete the following waiters */ - mock_seqno_advance(engine, seqno + seqno_bias); - for (m = n; m <= seqno; m++) { - if (m == count) - break; - - GEM_BUG_ON(!test_bit(m, bitmap)); - __clear_bit(m, bitmap); - } - - intel_engine_remove_wait(engine, &waiters[n]); - RB_CLEAR_NODE(&waiters[n].node); - - err = check_rbtree(engine, bitmap, waiters, count); - if (err) { - pr_err("rbtree corrupt after seqno advance to %d\n", - seqno + seqno_bias); - goto out_bitmap; - } - - err = check_completion(engine, bitmap, waiters, count); - if (err) { - pr_err("completions after seqno advance to %d failed\n", - seqno + seqno_bias); - goto out_bitmap; - } - } - - err = check_rbtree_empty(engine); -out_bitmap: - kfree(bitmap); -out_waiters: - kvfree(waiters); -out_engines: - mock_engine_flush(engine); - return err; -} - -struct igt_wakeup { - struct task_struct *tsk; - atomic_t *ready, *set, *done; - struct intel_engine_cs *engine; - unsigned long flags; -#define STOP 0 -#define IDLE 1 - wait_queue_head_t *wq; - u32 seqno; -}; - -static bool wait_for_ready(struct igt_wakeup *w) -{ - DEFINE_WAIT(ready); - - set_bit(IDLE, &w->flags); - if (atomic_dec_and_test(w->done)) - wake_up_var(w->done); - - if (test_bit(STOP, &w->flags)) - goto out; - - for (;;) { - prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE); - if (atomic_read(w->ready) == 0) - break; - - schedule(); - } - finish_wait(w->wq, &ready); - -out: - clear_bit(IDLE, &w->flags); - if (atomic_dec_and_test(w->set)) - wake_up_var(w->set); - - return !test_bit(STOP, &w->flags); -} - -static int igt_wakeup_thread(void *arg) -{ - struct igt_wakeup *w = arg; - struct intel_wait wait; - - while (wait_for_ready(w)) { - GEM_BUG_ON(kthread_should_stop()); - - intel_wait_init_for_seqno(&wait, w->seqno); - intel_engine_add_wait(w->engine, &wait); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (i915_seqno_passed(intel_engine_get_seqno(w->engine), - w->seqno)) - break; - - if (test_bit(STOP, &w->flags)) /* emergency escape */ - break; - - schedule(); - } - intel_engine_remove_wait(w->engine, &wait); - __set_current_state(TASK_RUNNING); - } - - return 0; -} - -static void igt_wake_all_sync(atomic_t *ready, - atomic_t *set, - atomic_t *done, - wait_queue_head_t *wq, - int count) -{ - atomic_set(set, count); - atomic_set(ready, 0); - wake_up_all(wq); - - wait_var_event(set, !atomic_read(set)); - atomic_set(ready, count); - atomic_set(done, count); -} - -static int igt_wakeup(void *arg) -{ - I915_RND_STATE(prng); - struct intel_engine_cs *engine = arg; - struct igt_wakeup *waiters; - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); - const int count = 4096; - const u32 max_seqno = count / 4; - atomic_t ready, set, done; - int err = -ENOMEM; - int n, step; - - mock_engine_reset(engine); - - waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); - if (!waiters) - goto out_engines; - - /* Create a large number of threads, each waiting on a random seqno. - * Multiple waiters will be waiting for the same seqno. - */ - atomic_set(&ready, count); - for (n = 0; n < count; n++) { - waiters[n].wq = &wq; - waiters[n].ready = &ready; - waiters[n].set = &set; - waiters[n].done = &done; - waiters[n].engine = engine; - waiters[n].flags = BIT(IDLE); - - waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n], - "i915/igt:%d", n); - if (IS_ERR(waiters[n].tsk)) - goto out_waiters; - - get_task_struct(waiters[n].tsk); - } - - for (step = 1; step <= max_seqno; step <<= 1) { - u32 seqno; - - /* The waiter threads start paused as we assign them a random - * seqno and reset the engine. Once the engine is reset, - * we signal that the threads may begin their wait upon their - * seqno. - */ - for (n = 0; n < count; n++) { - GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags)); - waiters[n].seqno = - 1 + prandom_u32_state(&prng) % max_seqno; - } - mock_seqno_advance(engine, 0); - igt_wake_all_sync(&ready, &set, &done, &wq, count); - - /* Simulate the GPU doing chunks of work, with one or more - * seqno appearing to finish at the same time. A random number - * of threads will be waiting upon the update and hopefully be - * woken. - */ - for (seqno = 1; seqno <= max_seqno + step; seqno += step) { - usleep_range(50, 500); - mock_seqno_advance(engine, seqno); - } - GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno); - - /* With the seqno now beyond any of the waiting threads, they - * should all be woken, see that they are complete and signal - * that they are ready for the next test. We wait until all - * threads are complete and waiting for us (i.e. not a seqno). - */ - if (!wait_var_event_timeout(&done, - !atomic_read(&done), 10 * HZ)) { - pr_err("Timed out waiting for %d remaining waiters\n", - atomic_read(&done)); - err = -ETIMEDOUT; - break; - } - - err = check_rbtree_empty(engine); - if (err) - break; - } - -out_waiters: - for (n = 0; n < count; n++) { - if (IS_ERR(waiters[n].tsk)) - break; - - set_bit(STOP, &waiters[n].flags); - } - mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */ - igt_wake_all_sync(&ready, &set, &done, &wq, n); - - for (n = 0; n < count; n++) { - if (IS_ERR(waiters[n].tsk)) - break; - - kthread_stop(waiters[n].tsk); - put_task_struct(waiters[n].tsk); - } - - kvfree(waiters); -out_engines: - mock_engine_flush(engine); - return err; -} - -int intel_breadcrumbs_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_random_insert_remove), - SUBTEST(igt_insert_complete), - SUBTEST(igt_wakeup), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915->engine[RCS]); - drm_dev_put(&i915->drm); - - return err; -} diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c index 32cba4cae31a..c5e0a0e98fcb 100644 --- a/drivers/gpu/drm/i915/selftests/intel_guc.c +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -137,12 +137,13 @@ static bool client_doorbell_in_sync(struct intel_guc_client *client) static int igt_guc_clients(void *args) { struct drm_i915_private *dev_priv = args; + intel_wakeref_t wakeref; struct intel_guc *guc; int err = 0; GEM_BUG_ON(!HAS_GUC(dev_priv)); mutex_lock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); guc = &dev_priv->guc; if (!guc) { @@ -225,7 +226,7 @@ out: guc_clients_create(guc); guc_clients_enable(guc); unlock: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); mutex_unlock(&dev_priv->drm.struct_mutex); return err; } @@ -238,13 +239,14 @@ unlock: static int igt_guc_doorbells(void *arg) { struct drm_i915_private *dev_priv = arg; + intel_wakeref_t wakeref; struct intel_guc *guc; int i, err = 0; u16 db_id; GEM_BUG_ON(!HAS_GUC(dev_priv)); mutex_lock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); guc = &dev_priv->guc; if (!guc) { @@ -337,7 +339,7 @@ out: guc_client_free(clients[i]); } unlock: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); mutex_unlock(&dev_priv->drm.struct_mutex); return err; } diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 0aadbd9c7d56..7b6f3bea9ef8 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -363,9 +363,7 @@ static int igt_global_reset(void *arg) /* Check that we can issue a global GPU reset */ igt_global_reset_lock(i915); - set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); - mutex_lock(&i915->drm.struct_mutex); reset_count = i915_reset_count(&i915->gpu_error); i915_reset(i915, ALL_ENGINES, NULL); @@ -374,9 +372,7 @@ static int igt_global_reset(void *arg) pr_err("No GPU reset recorded!\n"); err = -EINVAL; } - mutex_unlock(&i915->drm.struct_mutex); - GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); igt_global_reset_unlock(i915); if (i915_terminally_wedged(&i915->gpu_error)) @@ -388,22 +384,21 @@ static int igt_global_reset(void *arg) static int igt_wedged_reset(void *arg) { struct drm_i915_private *i915 = arg; + intel_wakeref_t wakeref; /* Check that we can recover a wedged device with a GPU reset */ igt_global_reset_lock(i915); - mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); i915_gem_set_wedged(i915); - GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error)); - set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); + mutex_lock(&i915->drm.struct_mutex); + GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error)); i915_reset(i915, ALL_ENGINES, NULL); - GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); - - intel_runtime_pm_put(i915); mutex_unlock(&i915->drm.struct_mutex); + + intel_runtime_pm_put(i915, wakeref); igt_global_reset_unlock(i915); return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; @@ -454,8 +449,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { - u32 seqno = intel_engine_get_seqno(engine); - if (active) { struct i915_request *rq; @@ -484,8 +477,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - GEM_BUG_ON(!rq->global_seqno); - seqno = rq->global_seqno - 1; i915_request_put(rq); } @@ -501,16 +492,15 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - reset_engine_count += active; if (i915_reset_engine_count(&i915->gpu_error, engine) != - reset_engine_count) { - pr_err("%s engine reset %srecorded!\n", - engine->name, active ? "not " : ""); + ++reset_engine_count) { + pr_err("%s engine reset not recorded!\n", + engine->name); err = -EINVAL; break; } - if (!wait_for_idle(engine)) { + if (!i915_reset_flush(i915)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -733,7 +723,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915, set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { - u32 seqno = intel_engine_get_seqno(engine); struct i915_request *rq = NULL; if (flags & TEST_ACTIVE) { @@ -761,9 +750,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915, err = -EIO; break; } - - GEM_BUG_ON(!rq->global_seqno); - seqno = rq->global_seqno - 1; } err = i915_reset_engine(engine, NULL); @@ -800,10 +786,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915, reported = i915_reset_engine_count(&i915->gpu_error, engine); reported -= threads[engine->id].resets; - if (reported != (flags & TEST_ACTIVE ? count : 0)) { - pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n", - engine->name, test_name, count, reported, - (flags & TEST_ACTIVE ? count : 0)); + if (reported != count) { + pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", + engine->name, test_name, count, reported); if (!err) err = -EINVAL; } @@ -902,20 +887,13 @@ static int igt_reset_engines(void *arg) return 0; } -static u32 fake_hangcheck(struct i915_request *rq, u32 mask) +static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask) { - struct i915_gpu_error *error = &rq->i915->gpu_error; - u32 reset_count = i915_reset_count(error); - - error->stalled_mask = mask; - - /* set_bit() must be after we have setup the backchannel (mask) */ - smp_mb__before_atomic(); - set_bit(I915_RESET_HANDOFF, &error->flags); + u32 count = i915_reset_count(&i915->gpu_error); - wake_up_all(&error->wait_queue); + i915_reset(i915, mask, NULL); - return reset_count; + return count; } static int igt_reset_wait(void *arg) @@ -961,7 +939,7 @@ static int igt_reset_wait(void *arg) goto out_rq; } - reset_count = fake_hangcheck(rq, ALL_ENGINES); + reset_count = fake_hangcheck(i915, ALL_ENGINES); timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); if (timeout < 0) { @@ -971,7 +949,6 @@ static int igt_reset_wait(void *arg) goto out_rq; } - GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); if (i915_reset_count(&i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); err = -EINVAL; @@ -1150,7 +1127,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, wait_for_completion(&arg.completion); - if (wait_for(waitqueue_active(&rq->execute), 10)) { + if (wait_for(!list_empty(&rq->fence.cb_list), 10)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("igt/evict_vma kthread did not wait\n"); @@ -1161,7 +1138,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, } out_reset: - fake_hangcheck(rq, intel_engine_flag(rq->engine)); + fake_hangcheck(rq->i915, intel_engine_flag(rq->engine)); if (tsk) { struct igt_wedge_me w; @@ -1340,12 +1317,7 @@ static int igt_reset_queue(void *arg) goto fini; } - reset_count = fake_hangcheck(prev, ENGINE_MASK(id)); - - i915_reset(i915, ENGINE_MASK(id), NULL); - - GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, - &i915->gpu_error.flags)); + reset_count = fake_hangcheck(i915, ENGINE_MASK(id)); if (prev->fence.error != -EIO) { pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", @@ -1564,6 +1536,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, pr_err("%s(%s): Failed to start request %llx, at %x\n", __func__, engine->name, rq->fence.seqno, hws_seqno(&h, rq)); + i915_gem_set_wedged(i915); err = -EIO; } @@ -1587,7 +1560,6 @@ out: static void force_reset(struct drm_i915_private *i915) { i915_gem_set_wedged(i915); - set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); i915_reset(i915, 0, NULL); } @@ -1600,6 +1572,7 @@ static int igt_atomic_reset(void *arg) { } }; struct drm_i915_private *i915 = arg; + intel_wakeref_t wakeref; int err = 0; /* Check that the resets are usable from atomic context */ @@ -1609,13 +1582,33 @@ static int igt_atomic_reset(void *arg) igt_global_reset_lock(i915); mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); /* Flush any requests before we get started and check basics */ force_reset(i915); if (i915_terminally_wedged(&i915->gpu_error)) goto unlock; + if (intel_has_gpu_reset(i915)) { + const typeof(*phases) *p; + + for (p = phases; p->name; p++) { + GEM_TRACE("intel_gpu_reset under %s\n", p->name); + + p->critical_section_begin(); + err = intel_gpu_reset(i915, ALL_ENGINES); + p->critical_section_end(); + + if (err) { + pr_err("intel_gpu_reset failed under %s\n", + p->name); + goto out; + } + } + + force_reset(i915); + } + if (intel_has_reset_engine(i915)) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1636,7 +1629,7 @@ out: force_reset(i915); unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); igt_global_reset_unlock(i915); @@ -1660,6 +1653,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_handle_error), SUBTEST(igt_atomic_reset), }; + intel_wakeref_t wakeref; bool saved_hangcheck; int err; @@ -1669,8 +1663,9 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) if (i915_terminally_wedged(&i915->gpu_error)) return -EIO; /* we're long past hope of a successful reset */ - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); + drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */ err = i915_subtests(tests, i915); @@ -1679,7 +1674,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) mutex_unlock(&i915->drm.struct_mutex); i915_modparams.enable_hangcheck = saved_hangcheck; - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 00caaa00f02f..58144e024751 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -4,6 +4,10 @@ * Copyright © 2018 Intel Corporation */ +#include <linux/prime_numbers.h> + +#include "../i915_reset.h" + #include "../i915_selftest.h" #include "igt_flush_test.h" #include "igt_spinner.h" @@ -18,13 +22,14 @@ static int live_sanitycheck(void *arg) struct i915_gem_context *ctx; enum intel_engine_id id; struct igt_spinner spin; + intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) return 0; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (igt_spinner_init(&spin, i915)) goto err_unlock; @@ -65,7 +70,7 @@ err_spin: igt_spinner_fini(&spin); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -77,13 +82,14 @@ static int live_preempt(void *arg) struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (igt_spinner_init(&spin_hi, i915)) goto err_unlock; @@ -158,7 +164,7 @@ err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -171,13 +177,14 @@ static int live_late_preempt(void *arg) struct intel_engine_cs *engine; struct i915_sched_attr attr = {}; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (igt_spinner_init(&spin_hi, i915)) goto err_unlock; @@ -251,7 +258,7 @@ err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -263,6 +270,243 @@ err_wedged: goto err_ctx_lo; } +struct preempt_client { + struct igt_spinner spin; + struct i915_gem_context *ctx; +}; + +static int preempt_client_init(struct drm_i915_private *i915, + struct preempt_client *c) +{ + c->ctx = kernel_context(i915); + if (!c->ctx) + return -ENOMEM; + + if (igt_spinner_init(&c->spin, i915)) + goto err_ctx; + + return 0; + +err_ctx: + kernel_context_close(c->ctx); + return -ENOMEM; +} + +static void preempt_client_fini(struct preempt_client *c) +{ + igt_spinner_fini(&c->spin); + kernel_context_close(c->ctx); +} + +static int live_suppress_self_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) + }; + struct preempt_client a, b; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + /* + * Verify that if a preemption request does not cause a change in + * the current execution order, the preempt-to-idle injection is + * skipped and that we do not accidentally apply it after the CS + * completion event. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + if (USES_GUC_SUBMISSION(i915)) + return 0; /* presume black blox */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (preempt_client_init(i915, &a)) + goto err_unlock; + if (preempt_client_init(i915, &b)) + goto err_client_a; + + for_each_engine(engine, i915, id) { + struct i915_request *rq_a, *rq_b; + int depth; + + engine->execlists.preempt_hang.count = 0; + + rq_a = igt_spinner_create_request(&a.spin, + a.ctx, engine, + MI_NOOP); + if (IS_ERR(rq_a)) { + err = PTR_ERR(rq_a); + goto err_client_b; + } + + i915_request_add(rq_a); + if (!igt_wait_for_spinner(&a.spin, rq_a)) { + pr_err("First client failed to start\n"); + goto err_wedged; + } + + for (depth = 0; depth < 8; depth++) { + rq_b = igt_spinner_create_request(&b.spin, + b.ctx, engine, + MI_NOOP); + if (IS_ERR(rq_b)) { + err = PTR_ERR(rq_b); + goto err_client_b; + } + i915_request_add(rq_b); + + GEM_BUG_ON(i915_request_completed(rq_a)); + engine->schedule(rq_a, &attr); + igt_spinner_end(&a.spin); + + if (!igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client failed to start\n"); + goto err_wedged; + } + + swap(a, b); + rq_a = rq_b; + } + igt_spinner_end(&a.spin); + + if (engine->execlists.preempt_hang.count) { + pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n", + engine->execlists.preempt_hang.count, + depth); + err = -EINVAL; + goto err_client_b; + } + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&b); +err_client_a: + preempt_client_fini(&a); +err_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + igt_spinner_end(&b.spin); + igt_spinner_end(&a.spin); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_client_b; +} + +static int live_chain_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct preempt_client hi, lo; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + /* + * Build a chain AB...BA between two contexts (A, B) and request + * preemption of the last request. It should then complete before + * the previously submitted spinner in B. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (preempt_client_init(i915, &hi)) + goto err_unlock; + + if (preempt_client_init(i915, &lo)) + goto err_client_hi; + + for_each_engine(engine, i915, id) { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + }; + int count, i; + + for_each_prime_number_from(count, 1, 32) { /* must fit ring! */ + struct i915_request *rq; + + rq = igt_spinner_create_request(&hi.spin, + hi.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + if (!igt_wait_for_spinner(&hi.spin, rq)) + goto err_wedged; + + rq = igt_spinner_create_request(&lo.spin, + lo.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + + for (i = 0; i < count; i++) { + rq = i915_request_alloc(engine, lo.ctx); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + } + + rq = i915_request_alloc(engine, hi.ctx); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + engine->schedule(rq, &attr); + + igt_spinner_end(&hi.spin); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("Failed to preempt over chain of %d\n", + count); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + goto err_wedged; + } + igt_spinner_end(&lo.spin); + } + } + + err = 0; +err_client_lo: + preempt_client_fini(&lo); +err_client_hi: + preempt_client_fini(&hi); +err_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + igt_spinner_end(&hi.spin); + igt_spinner_end(&lo.spin); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_client_lo; +} + static int live_preempt_hang(void *arg) { struct drm_i915_private *i915 = arg; @@ -270,6 +514,7 @@ static int live_preempt_hang(void *arg) struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_PREEMPTION(i915)) @@ -279,7 +524,7 @@ static int live_preempt_hang(void *arg) return 0; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (igt_spinner_init(&spin_hi, i915)) goto err_unlock; @@ -374,7 +619,7 @@ err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -562,6 +807,7 @@ static int live_preempt_smoke(void *arg) .ncontext = 1024, }; const unsigned int phase[] = { 0, BATCH }; + intel_wakeref_t wakeref; int err = -ENOMEM; u32 *cs; int n; @@ -576,7 +822,7 @@ static int live_preempt_smoke(void *arg) return -ENOMEM; mutex_lock(&smoke.i915->drm.struct_mutex); - intel_runtime_pm_get(smoke.i915); + wakeref = intel_runtime_pm_get(smoke.i915); smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE); if (IS_ERR(smoke.batch)) { @@ -627,7 +873,7 @@ err_ctx: err_batch: i915_gem_object_put(smoke.batch); err_unlock: - intel_runtime_pm_put(smoke.i915); + intel_runtime_pm_put(smoke.i915, wakeref); mutex_unlock(&smoke.i915->drm.struct_mutex); kfree(smoke.contexts); @@ -640,6 +886,8 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_sanitycheck), SUBTEST(live_preempt), SUBTEST(live_late_preempt), + SUBTEST(live_suppress_self_preempt), + SUBTEST(live_chain_preempt), SUBTEST(live_preempt_hang), SUBTEST(live_preempt_smoke), }; diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 8b3f3200a3bd..b15c4f26c593 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -5,6 +5,7 @@ */ #include "../i915_selftest.h" +#include "../i915_reset.h" #include "igt_flush_test.h" #include "igt_reset.h" @@ -60,10 +61,11 @@ reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists) static struct drm_i915_gem_object * read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { + const u32 base = engine->mmio_base; struct drm_i915_gem_object *result; + intel_wakeref_t wakeref; struct i915_request *rq; struct i915_vma *vma; - const u32 base = engine->mmio_base; u32 srm, *cs; int err; int i; @@ -92,9 +94,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) if (err) goto err_obj; - intel_runtime_pm_get(engine->i915); - rq = i915_request_alloc(engine, ctx); - intel_runtime_pm_put(engine->i915); + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(engine->i915, wakeref) + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_pin; @@ -212,7 +214,6 @@ out_put: static int do_device_reset(struct intel_engine_cs *engine) { - set_bit(I915_RESET_HANDOFF, &engine->i915->gpu_error.flags); i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds"); return 0; } @@ -228,20 +229,22 @@ switch_to_scratch_context(struct intel_engine_cs *engine, { struct i915_gem_context *ctx; struct i915_request *rq; + intel_wakeref_t wakeref; int err = 0; ctx = kernel_context(engine->i915); if (IS_ERR(ctx)) return PTR_ERR(ctx); - intel_runtime_pm_get(engine->i915); - - if (spin) - rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP); - else - rq = i915_request_alloc(engine, ctx); - - intel_runtime_pm_put(engine->i915); + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(engine->i915, wakeref) { + if (spin) + rq = igt_spinner_create_request(spin, + ctx, engine, + MI_NOOP); + else + rq = i915_request_alloc(engine, ctx); + } kernel_context_close(ctx); @@ -273,6 +276,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, bool want_spin = reset == do_engine_reset; struct i915_gem_context *ctx; struct igt_spinner spin; + intel_wakeref_t wakeref; int err; pr_info("Checking %d whitelisted registers (RING_NONPRIV) [%s]\n", @@ -298,9 +302,8 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, if (err) goto out; - intel_runtime_pm_get(i915); - err = reset(engine); - intel_runtime_pm_put(i915); + with_intel_runtime_pm(i915, wakeref) + err = reset(engine); if (want_spin) { igt_spinner_end(&spin); @@ -390,7 +393,7 @@ static int live_gpu_reset_gt_engine_workarounds(void *arg) { struct drm_i915_private *i915 = arg; - struct i915_gpu_error *error = &i915->gpu_error; + intel_wakeref_t wakeref; struct wa_lists lists; bool ok; @@ -400,21 +403,21 @@ live_gpu_reset_gt_engine_workarounds(void *arg) pr_info("Verifying after GPU reset...\n"); igt_global_reset_lock(i915); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); + reference_lists_init(i915, &lists); ok = verify_gt_engine_wa(i915, &lists, "before reset"); if (!ok) goto out; - set_bit(I915_RESET_HANDOFF, &error->flags); i915_reset(i915, ALL_ENGINES, "live_workarounds"); ok = verify_gt_engine_wa(i915, &lists, "after reset"); out: reference_lists_fini(i915, &lists); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); igt_global_reset_unlock(i915); return ok ? 0 : -ESRCH; @@ -429,6 +432,7 @@ live_engine_reset_gt_engine_workarounds(void *arg) struct igt_spinner spin; enum intel_engine_id id; struct i915_request *rq; + intel_wakeref_t wakeref; struct wa_lists lists; int ret = 0; @@ -440,7 +444,8 @@ live_engine_reset_gt_engine_workarounds(void *arg) return PTR_ERR(ctx); igt_global_reset_lock(i915); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); + reference_lists_init(i915, &lists); for_each_engine(engine, i915, id) { @@ -496,7 +501,7 @@ live_engine_reset_gt_engine_workarounds(void *arg) err: reference_lists_fini(i915, &lists); - intel_runtime_pm_put(i915); + intel_runtime_pm_put(i915, wakeref); igt_global_reset_unlock(i915); kernel_context_close(ctx); diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c index b26f07b55d86..2bfa72c1654b 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -76,3 +76,57 @@ void timed_fence_fini(struct timed_fence *tf) destroy_timer_on_stack(&tf->timer); i915_sw_fence_fini(&tf->fence); } + +struct heap_fence { + struct i915_sw_fence fence; + union { + struct kref ref; + struct rcu_head rcu; + }; +}; + +static int __i915_sw_fence_call +heap_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + struct heap_fence *h = container_of(fence, typeof(*h), fence); + + switch (state) { + case FENCE_COMPLETE: + break; + + case FENCE_FREE: + heap_fence_put(&h->fence); + } + + return NOTIFY_DONE; +} + +struct i915_sw_fence *heap_fence_create(gfp_t gfp) +{ + struct heap_fence *h; + + h = kmalloc(sizeof(*h), gfp); + if (!h) + return NULL; + + i915_sw_fence_init(&h->fence, heap_fence_notify); + refcount_set(&h->ref.refcount, 2); + + return &h->fence; +} + +static void heap_fence_release(struct kref *ref) +{ + struct heap_fence *h = container_of(ref, typeof(*h), ref); + + i915_sw_fence_fini(&h->fence); + + kfree_rcu(h, rcu); +} + +void heap_fence_put(struct i915_sw_fence *fence) +{ + struct heap_fence *h = container_of(fence, typeof(*h), fence); + + kref_put(&h->ref, heap_fence_release); +} diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h index 474aafb92ae1..1f9927e10f3a 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h @@ -39,4 +39,7 @@ struct timed_fence { void timed_fence_init(struct timed_fence *tf, unsigned long expires); void timed_fence_fini(struct timed_fence *tf); +struct i915_sw_fence *heap_fence_create(gfp_t gfp); +void heap_fence_put(struct i915_sw_fence *fence); + #endif /* _LIB_SW_FENCE_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index d937bdff26f9..b646cdcdd602 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -45,11 +45,8 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->handles_list); INIT_LIST_HEAD(&ctx->hw_id_link); - for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { - struct intel_context *ce = &ctx->__engine[n]; - - ce->gem_context = ctx; - } + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) + intel_context_init(&ctx->__engine[n], ctx, i915->engine[n]); ret = i915_gem_context_pin_hw_id(ctx); if (ret < 0) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 50e1a0b1af7e..08f0cab02e0f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -30,6 +30,52 @@ struct mock_ring { struct i915_timeline timeline; }; +static void mock_timeline_pin(struct i915_timeline *tl) +{ + tl->pin_count++; +} + +static void mock_timeline_unpin(struct i915_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + tl->pin_count--; +} + +static struct intel_ring *mock_ring(struct intel_engine_cs *engine) +{ + const unsigned long sz = PAGE_SIZE / 2; + struct mock_ring *ring; + + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); + if (!ring) + return NULL; + + if (i915_timeline_init(engine->i915, + &ring->timeline, engine->name, + NULL)) { + kfree(ring); + return NULL; + } + + ring->base.size = sz; + ring->base.effective_size = sz; + ring->base.vaddr = (void *)(ring + 1); + ring->base.timeline = &ring->timeline; + + INIT_LIST_HEAD(&ring->base.request_list); + intel_ring_update_space(&ring->base); + + return &ring->base; +} + +static void mock_ring_free(struct intel_ring *base) +{ + struct mock_ring *ring = container_of(base, typeof(*ring), base); + + i915_timeline_fini(&ring->timeline); + kfree(ring); +} + static struct mock_request *first_request(struct mock_engine *engine) { return list_first_entry_or_null(&engine->hw_queue, @@ -37,24 +83,29 @@ static struct mock_request *first_request(struct mock_engine *engine) link); } -static void advance(struct mock_engine *engine, - struct mock_request *request) +static void advance(struct mock_request *request) { list_del_init(&request->link); - mock_seqno_advance(&engine->base, request->base.global_seqno); + intel_engine_write_global_seqno(request->base.engine, + request->base.global_seqno); + i915_request_mark_complete(&request->base); + GEM_BUG_ON(!i915_request_completed(&request->base)); + + intel_engine_queue_breadcrumbs(request->base.engine); } static void hw_delay_complete(struct timer_list *t) { struct mock_engine *engine = from_timer(engine, t, hw_delay); struct mock_request *request; + unsigned long flags; - spin_lock(&engine->hw_lock); + spin_lock_irqsave(&engine->hw_lock, flags); /* Timer fired, first request is complete */ request = first_request(engine); if (request) - advance(engine, request); + advance(request); /* * Also immediately signal any subsequent 0-delay requests, but @@ -66,20 +117,24 @@ static void hw_delay_complete(struct timer_list *t) break; } - advance(engine, request); + advance(request); } - spin_unlock(&engine->hw_lock); + spin_unlock_irqrestore(&engine->hw_lock, flags); } static void mock_context_unpin(struct intel_context *ce) { + mock_timeline_unpin(ce->ring->timeline); i915_gem_context_put(ce->gem_context); } static void mock_context_destroy(struct intel_context *ce) { GEM_BUG_ON(ce->pin_count); + + if (ce->ring) + mock_ring_free(ce->ring); } static const struct intel_context_ops mock_context_ops = { @@ -92,14 +147,26 @@ mock_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { struct intel_context *ce = to_intel_context(ctx, engine); + int err = -ENOMEM; + + if (ce->pin_count++) + return ce; - if (!ce->pin_count++) { - i915_gem_context_get(ctx); - ce->ring = engine->buffer; - ce->ops = &mock_context_ops; + if (!ce->ring) { + ce->ring = mock_ring(engine); + if (!ce->ring) + goto err; } + mock_timeline_pin(ce->ring->timeline); + + ce->ops = &mock_context_ops; + i915_gem_context_get(ctx); return ce; + +err: + ce->pin_count = 0; + return ERR_PTR(err); } static int mock_request_alloc(struct i915_request *request) @@ -118,9 +185,9 @@ static int mock_emit_flush(struct i915_request *request, return 0; } -static void mock_emit_breadcrumb(struct i915_request *request, - u32 *flags) +static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs) { + return cs; } static void mock_submit_request(struct i915_request *request) @@ -128,49 +195,20 @@ static void mock_submit_request(struct i915_request *request) struct mock_request *mock = container_of(request, typeof(*mock), base); struct mock_engine *engine = container_of(request->engine, typeof(*engine), base); + unsigned long flags; i915_request_submit(request); GEM_BUG_ON(!request->global_seqno); - spin_lock_irq(&engine->hw_lock); + spin_lock_irqsave(&engine->hw_lock, flags); list_add_tail(&mock->link, &engine->hw_queue); if (mock->link.prev == &engine->hw_queue) { if (mock->delay) mod_timer(&engine->hw_delay, jiffies + mock->delay); else - advance(engine, mock); + advance(mock); } - spin_unlock_irq(&engine->hw_lock); -} - -static struct intel_ring *mock_ring(struct intel_engine_cs *engine) -{ - const unsigned long sz = PAGE_SIZE / 2; - struct mock_ring *ring; - - ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); - if (!ring) - return NULL; - - i915_timeline_init(engine->i915, &ring->timeline, engine->name); - - ring->base.size = sz; - ring->base.effective_size = sz; - ring->base.vaddr = (void *)(ring + 1); - ring->base.timeline = &ring->timeline; - - INIT_LIST_HEAD(&ring->base.request_list); - intel_ring_update_space(&ring->base); - - return &ring->base; -} - -static void mock_ring_free(struct intel_ring *base) -{ - struct mock_ring *ring = container_of(base, typeof(*ring), base); - - i915_timeline_fini(&ring->timeline); - kfree(ring); + spin_unlock_irqrestore(&engine->hw_lock, flags); } struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -189,39 +227,37 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.i915 = i915; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; - engine->base.status_page.page_addr = (void *)(engine + 1); + engine->base.status_page.addr = (void *)(engine + 1); engine->base.context_pin = mock_context_pin; engine->base.request_alloc = mock_request_alloc; engine->base.emit_flush = mock_emit_flush; - engine->base.emit_breadcrumb = mock_emit_breadcrumb; + engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; - i915_timeline_init(i915, &engine->base.timeline, engine->base.name); + if (i915_timeline_init(i915, + &engine->base.timeline, + engine->base.name, + NULL)) + goto err_free; i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE); intel_engine_init_breadcrumbs(&engine->base); - engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ /* fake hw queue */ spin_lock_init(&engine->hw_lock); timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); - engine->base.buffer = mock_ring(&engine->base); - if (!engine->base.buffer) - goto err_breadcrumbs; - if (IS_ERR(intel_context_pin(i915->kernel_context, &engine->base))) - goto err_ring; + goto err_breadcrumbs; return &engine->base; -err_ring: - mock_ring_free(engine->base.buffer); err_breadcrumbs: intel_engine_fini_breadcrumbs(&engine->base); i915_timeline_fini(&engine->base.timeline); +err_free: kfree(engine); return NULL; } @@ -235,16 +271,14 @@ void mock_engine_flush(struct intel_engine_cs *engine) del_timer_sync(&mock->hw_delay); spin_lock_irq(&mock->hw_lock); - list_for_each_entry_safe(request, rn, &mock->hw_queue, link) { - list_del_init(&request->link); - mock_seqno_advance(&mock->base, request->base.global_seqno); - } + list_for_each_entry_safe(request, rn, &mock->hw_queue, link) + advance(request); spin_unlock_irq(&mock->hw_lock); } void mock_engine_reset(struct intel_engine_cs *engine) { - intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0); + intel_engine_write_global_seqno(engine, 0); } void mock_engine_free(struct intel_engine_cs *engine) @@ -261,8 +295,6 @@ void mock_engine_free(struct intel_engine_cs *engine) __intel_context_unpin(engine->i915->kernel_context, engine); - mock_ring_free(engine->buffer); - intel_engine_fini_breadcrumbs(engine); i915_timeline_fini(&engine->timeline); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h index 133d0c21790d..b9cc3a245f16 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.h +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -46,10 +46,4 @@ void mock_engine_flush(struct intel_engine_cs *engine); void mock_engine_reset(struct intel_engine_cs *engine); void mock_engine_free(struct intel_engine_cs *engine); -static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno) -{ - intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - intel_engine_wakeup(engine); -} - #endif /* !__MOCK_ENGINE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index baa3c38919de..14ae46fda49f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -58,8 +58,8 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); - cancel_delayed_work_sync(&i915->gt.retire_work); - cancel_delayed_work_sync(&i915->gt.idle_work); + drain_delayed_work(&i915->gt.retire_work); + drain_delayed_work(&i915->gt.idle_work); i915_gem_drain_workqueue(i915); mutex_lock(&i915->drm.struct_mutex); @@ -68,13 +68,14 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_fini(i915); mutex_unlock(&i915->drm.struct_mutex); + i915_timelines_fini(i915); + drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); mutex_lock(&i915->drm.struct_mutex); - mock_fini_ggtt(i915); + mock_fini_ggtt(&i915->ggtt); mutex_unlock(&i915->drm.struct_mutex); - WARN_ON(!list_empty(&i915->gt.timelines)); destroy_workqueue(i915->wq); @@ -154,15 +155,17 @@ struct drm_i915_private *mock_gem_device(void) pdev->dev.archdata.iommu = (void *)-1; #endif + i915 = (struct drm_i915_private *)(pdev + 1); + pci_set_drvdata(pdev, i915); + + intel_runtime_pm_init_early(i915); + dev_pm_domain_set(&pdev->dev, &pm_domain); pm_runtime_enable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); if (pm_runtime_enabled(&pdev->dev)) WARN_ON(pm_runtime_get_sync(&pdev->dev)); - i915 = (struct drm_i915_private *)(pdev + 1); - pci_set_drvdata(pdev, i915); - err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev); if (err) { pr_err("Failed to initialise mock GEM device: err=%d\n", err); @@ -186,6 +189,7 @@ struct drm_i915_private *mock_gem_device(void) init_waitqueue_head(&i915->gpu_error.wait_queue); init_waitqueue_head(&i915->gpu_error.reset_queue); + mutex_init(&i915->gpu_error.wedge_mutex); i915->wq = alloc_ordered_workqueue("mock", 0); if (!i915->wq) @@ -223,13 +227,14 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->priorities) goto err_dependencies; - INIT_LIST_HEAD(&i915->gt.timelines); + i915_timelines_init(i915); + INIT_LIST_HEAD(&i915->gt.active_rings); INIT_LIST_HEAD(&i915->gt.closed_vma); mutex_lock(&i915->drm.struct_mutex); - mock_init_ggtt(i915); + mock_init_ggtt(i915, &i915->ggtt); mkwrite_device_info(i915)->ring_mask = BIT(0); i915->kernel_context = mock_context(i915, NULL); @@ -250,6 +255,7 @@ err_context: i915_gem_contexts_fini(i915); err_unlock: mutex_unlock(&i915->drm.struct_mutex); + i915_timelines_fini(i915); kmem_cache_destroy(i915->priorities); err_dependencies: kmem_cache_destroy(i915->dependencies); diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 6ae418c76015..cd83929fde8e 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -70,7 +70,7 @@ mock_ppgtt(struct drm_i915_private *i915, ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); ppgtt->vm.file = ERR_PTR(-ENODEV); - i915_address_space_init(&ppgtt->vm, i915); + i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); ppgtt->vm.clear_range = nop_clear_range; ppgtt->vm.insert_page = mock_insert_page; @@ -97,11 +97,12 @@ static void mock_unbind_ggtt(struct i915_vma *vma) { } -void mock_init_ggtt(struct drm_i915_private *i915) +void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) { - struct i915_ggtt *ggtt = &i915->ggtt; + memset(ggtt, 0, sizeof(*ggtt)); ggtt->vm.i915 = i915; + ggtt->vm.is_ggtt = true; ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE); ggtt->mappable_end = resource_size(&ggtt->gmadr); @@ -117,14 +118,10 @@ void mock_init_ggtt(struct drm_i915_private *i915) ggtt->vm.vma_ops.set_pages = ggtt_set_pages; ggtt->vm.vma_ops.clear_pages = clear_pages; - i915_address_space_init(&ggtt->vm, i915); - - ggtt->vm.is_ggtt = true; + i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); } -void mock_fini_ggtt(struct drm_i915_private *i915) +void mock_fini_ggtt(struct i915_ggtt *ggtt) { - struct i915_ggtt *ggtt = &i915->ggtt; - i915_address_space_fini(&ggtt->vm); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.h b/drivers/gpu/drm/i915/selftests/mock_gtt.h index 9a0a833bb545..40d544bde1d5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.h +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.h @@ -25,8 +25,8 @@ #ifndef __MOCK_GTT_H #define __MOCK_GTT_H -void mock_init_ggtt(struct drm_i915_private *i915); -void mock_fini_ggtt(struct drm_i915_private *i915); +void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt); +void mock_fini_ggtt(struct i915_ggtt *ggtt); struct i915_hw_ppgtt * mock_ppgtt(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index dcf3b16f5a07..d2de9ece2118 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -10,11 +10,13 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) { + timeline->i915 = NULL; timeline->fence_context = context; spin_lock_init(&timeline->lock); - init_request_active(&timeline->last_request, NULL); + INIT_ACTIVE_REQUEST(&timeline->barrier); + INIT_ACTIVE_REQUEST(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); @@ -24,5 +26,5 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) void mock_timeline_fini(struct i915_timeline *timeline) { - i915_timeline_fini(timeline); + i915_syncmap_free(&timeline->sync); } diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c index 3225621f820c..6403728fe778 100644 --- a/drivers/gpu/drm/i915/vlv_dsi.c +++ b/drivers/gpu/drm/i915/vlv_dsi.c @@ -275,7 +275,7 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder, if (fixed_mode) { intel_fixed_panel_mode(fixed_mode, adjusted_mode); - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) intel_gmch_panel_fitting(crtc, pipe_config, conn_state->scaling_mode); else @@ -289,6 +289,11 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder, /* DSI uses short packets for sync events, so clear mode flags for DSI */ adjusted_mode->flags = 0; + if (intel_dsi->pixel_format == MIPI_DSI_FMT_RGB888) + pipe_config->pipe_bpp = 24; + else + pipe_config->pipe_bpp = 18; + if (IS_GEN9_LP(dev_priv)) { /* Enable Frame time stamp based scanline reporting */ adjusted_mode->private_flags |= @@ -673,6 +678,10 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder, LANE_CONFIGURATION_DUAL_LINK_B : LANE_CONFIGURATION_DUAL_LINK_A; } + + if (intel_dsi->pixel_format != MIPI_DSI_FMT_RGB888) + temp |= DITHERING_ENABLE; + /* assert ip_tg_enable signal */ I915_WRITE(port_ctrl, temp | DPI_ENABLE); POSTING_READ(port_ctrl); @@ -959,13 +968,15 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + intel_wakeref_t wakeref; enum port port; bool active = false; DRM_DEBUG_KMS("\n"); - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; /* @@ -1021,7 +1032,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, } out_put_power: - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return active; } @@ -1057,10 +1068,8 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, } fmt = I915_READ(MIPI_DSI_FUNC_PRG(port)) & VID_MODE_FORMAT_MASK; - pipe_config->pipe_bpp = - mipi_dsi_pixel_format_to_bpp( - pixel_format_from_register_bits(fmt)); - bpp = pipe_config->pipe_bpp; + bpp = mipi_dsi_pixel_format_to_bpp( + pixel_format_from_register_bits(fmt)); /* Enable Frame time stamo based scanline reporting */ adjusted_mode->private_flags |= @@ -1198,11 +1207,9 @@ static void intel_dsi_get_config(struct intel_encoder *encoder, if (IS_GEN9_LP(dev_priv)) { bxt_dsi_get_pipe_config(encoder, pipe_config); - pclk = bxt_dsi_get_pclk(encoder, pipe_config->pipe_bpp, - pipe_config); + pclk = bxt_dsi_get_pclk(encoder, pipe_config); } else { - pclk = vlv_dsi_get_pclk(encoder, pipe_config->pipe_bpp, - pipe_config); + pclk = vlv_dsi_get_pclk(encoder, pipe_config); } if (pclk) { @@ -1574,6 +1581,7 @@ vlv_dsi_get_hw_panel_orientation(struct intel_connector *connector) enum drm_panel_orientation orientation; struct intel_plane *plane; struct intel_crtc *crtc; + intel_wakeref_t wakeref; enum pipe pipe; u32 val; @@ -1584,7 +1592,8 @@ vlv_dsi_get_hw_panel_orientation(struct intel_connector *connector) plane = to_intel_plane(crtc->base.primary); power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return DRM_MODE_PANEL_ORIENTATION_UNKNOWN; val = I915_READ(DSPCNTR(plane->i9xx_plane)); @@ -1596,7 +1605,7 @@ vlv_dsi_get_hw_panel_orientation(struct intel_connector *connector) else orientation = DRM_MODE_PANEL_ORIENTATION_NORMAL; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return orientation; } @@ -1624,7 +1633,7 @@ static void intel_dsi_add_properties(struct intel_connector *connector) u32 allowed_scalers; allowed_scalers = BIT(DRM_MODE_SCALE_ASPECT) | BIT(DRM_MODE_SCALE_FULLSCREEN); - if (!HAS_GMCH_DISPLAY(dev_priv)) + if (!HAS_GMCH(dev_priv)) allowed_scalers |= BIT(DRM_MODE_SCALE_CENTER); drm_connector_attach_scaling_mode_property(&connector->base, @@ -1688,6 +1697,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) intel_encoder->post_disable = intel_dsi_post_disable; intel_encoder->get_hw_state = intel_dsi_get_hw_state; intel_encoder->get_config = intel_dsi_get_config; + intel_encoder->update_pipe = intel_panel_update_backlight; intel_connector->get_hw_state = intel_connector_get_hw_state; diff --git a/drivers/gpu/drm/i915/vlv_dsi_pll.c b/drivers/gpu/drm/i915/vlv_dsi_pll.c index a132a8037ecc..954d5a8c4fa7 100644 --- a/drivers/gpu/drm/i915/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/vlv_dsi_pll.c @@ -252,20 +252,12 @@ void bxt_dsi_pll_disable(struct intel_encoder *encoder) DRM_ERROR("Timeout waiting for PLL lock deassertion\n"); } -static void assert_bpp_mismatch(enum mipi_dsi_pixel_format fmt, int pipe_bpp) -{ - int bpp = mipi_dsi_pixel_format_to_bpp(fmt); - - WARN(bpp != pipe_bpp, - "bpp match assertion failure (expected %d, current %d)\n", - bpp, pipe_bpp); -} - -u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, +u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); u32 dsi_clock, pclk; u32 pll_ctl, pll_div; u32 m = 0, p = 0, n; @@ -319,15 +311,12 @@ u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, dsi_clock = (m * refclk) / (p * n); - /* pixel_format and pipe_bpp should agree */ - assert_bpp_mismatch(intel_dsi->pixel_format, pipe_bpp); - - pclk = DIV_ROUND_CLOSEST(dsi_clock * intel_dsi->lane_count, pipe_bpp); + pclk = DIV_ROUND_CLOSEST(dsi_clock * intel_dsi->lane_count, bpp); return pclk; } -u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, +u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, struct intel_crtc_state *config) { u32 pclk; @@ -335,12 +324,7 @@ u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, u32 dsi_ratio; struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - /* Divide by zero */ - if (!pipe_bpp) { - DRM_ERROR("Invalid BPP(0)\n"); - return 0; - } + int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); config->dsi_pll.ctrl = I915_READ(BXT_DSI_PLL_CTL); @@ -348,10 +332,7 @@ u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, dsi_clk = (dsi_ratio * BXT_REF_CLOCK_KHZ) / 2; - /* pixel_format and pipe_bpp should agree */ - assert_bpp_mismatch(intel_dsi->pixel_format, pipe_bpp); - - pclk = DIV_ROUND_CLOSEST(dsi_clk * intel_dsi->lane_count, pipe_bpp); + pclk = DIV_ROUND_CLOSEST(dsi_clk * intel_dsi->lane_count, bpp); DRM_DEBUG_DRIVER("Calculated pclk=%u\n", pclk); return pclk; |