diff options
author | Dave Airlie <airlied@redhat.com> | 2017-03-08 05:41:47 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2017-03-08 05:41:47 +0300 |
commit | 2e16101780e9cc8c4c68566db002e7513a1530eb (patch) | |
tree | e3fa976f5fe68aea853c3653be13168bb6b43438 /drivers | |
parent | b558dfd56a5c1f915327967ecfe1181cf2a7a494 (diff) | |
parent | 505b681539a7e14aeb866515d3ef1a67375839bc (diff) | |
download | linux-2e16101780e9cc8c4c68566db002e7513a1530eb.tar.xz |
Merge tag 'drm-intel-next-2017-03-06' of git://anongit.freedesktop.org/git/drm-intel into drm-next
4 weeks worth of stuff since I was traveling&lazy:
- lspcon improvements (Imre)
- proper atomic state for cdclk handling (Ville)
- gpu reset improvements (Chris)
- lots and lots of polish around fences, requests, waiting and
everything related all over (both gem and modeset code), from Chris
- atomic by default on gen5+ minus byt/bsw (Maarten did the patch to
flip the default, really this is a massive joint team effort)
- moar power domains, now 64bit (Ander)
- big pile of in-kernel unit tests for various gem subsystems (Chris),
including simple mock objects for i915 device and and the ggtt
manager.
- i915_gpu_info in debugfs, for taking a snapshot of the current gpu
state. Same thing as i915_error_state, but useful if the kernel didn't
notice something is stick. From Chris.
- bxt dsi fixes (Umar Shankar)
- bxt w/a updates (Jani)
- no more struct_mutex for gem object unreference (Chris)
- some execlist refactoring (Tvrtko)
- color manager support for glk (Ander)
- improve the power-well sync code to better take over from the
firmware (Imre)
- gem tracepoint polish (Tvrtko)
- lots of glk fixes all around (Ander)
- ctx switch improvements (Chris)
- glk dsi support&fixes (Deepak M)
- dsi fixes for vlv and clanups, lots of them (Hans de Goede)
- switch to i915.ko types in lots of our internal modeset code (Ander)
- byt/bsw atomic wm update code, yay (Ville)
* tag 'drm-intel-next-2017-03-06' of git://anongit.freedesktop.org/git/drm-intel: (432 commits)
drm/i915: Update DRIVER_DATE to 20170306
drm/i915: Don't use enums for hardware engine id
drm/i915: Split breadcrumbs spinlock into two
drm/i915: Refactor wakeup of the next breadcrumb waiter
drm/i915: Take reference for signaling the request from hardirq
drm/i915: Add FIFO underrun tracepoints
drm/i915: Add cxsr toggle tracepoint
drm/i915: Add VLV/CHV watermark/FIFO programming tracepoints
drm/i915: Add plane update/disable tracepoints
drm/i915: Kill level 0 wm hack for VLV/CHV
drm/i915: Workaround VLV/CHV sprite1->sprite0 enable underrun
drm/i915: Sanitize VLV/CHV watermarks properly
drm/i915: Only use update_wm_{pre,post} for pre-ilk platforms
drm/i915: Nuke crtc->wm.cxsr_allowed
drm/i915: Compute proper intermediate wms for vlv/cvh
drm/i915: Skip useless watermark/FIFO related work on VLV/CHV when not needed
drm/i915: Compute vlv/chv wms the atomic way
drm/i915: Compute VLV/CHV FIFO sizes based on the PM2 watermarks
drm/i915: Plop vlv/chv fifo sizes into crtc state
drm/i915: Plop vlv wm state into crtc_state
...
Diffstat (limited to 'drivers')
127 files changed, 18678 insertions, 8200 deletions
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 9702c78f458d..7fcc2a9d1d5a 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -332,14 +332,6 @@ static void i810_write_entry(dma_addr_t addr, unsigned int entry, writel_relaxed(addr | pte_flags, intel_private.gtt + entry); } -static const struct aper_size_info_fixed intel_fake_agp_sizes[] = { - {32, 8192, 3}, - {64, 16384, 4}, - {128, 32768, 5}, - {256, 65536, 6}, - {512, 131072, 7}, -}; - static unsigned int intel_gtt_stolen_size(void) { u16 gmch_ctrl; @@ -670,6 +662,14 @@ static int intel_gtt_init(void) } #if IS_ENABLED(CONFIG_AGP_INTEL) +static const struct aper_size_info_fixed intel_fake_agp_sizes[] = { + {32, 8192, 3}, + {64, 16384, 4}, + {128, 32768, 5}, + {256, 65536, 6}, + {512, 131072, 7}, +}; + static int intel_fake_agp_fetch_size(void) { int num_sizes = ARRAY_SIZE(intel_fake_agp_sizes); diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 183f5dc1c3f2..1ae0bb91ee60 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -19,6 +19,7 @@ config DRM_I915 select INPUT if ACPI select ACPI_VIDEO if ACPI select ACPI_BUTTON if ACPI + select SYNC_FILE help Choose this option if you have a system that has "Intel Graphics Media Accelerator" or "HD Graphics" integrated graphics, diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 597648c7a645..e091809a9a9e 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -24,7 +24,9 @@ config DRM_I915_DEBUG select X86_MSR # used by igt/pm_rpm select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y + select DRM_DEBUG_MM_SELFTEST select DRM_I915_SW_FENCE_DEBUG_OBJECTS + select DRM_I915_SELFTEST default n help Choose this option to turn on extra driver debugging that may affect @@ -58,3 +60,30 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS Recommended for driver developers only. If in doubt, say "N". + +config DRM_I915_SELFTEST + bool "Enable selftests upon driver load" + depends on DRM_I915 + default n + select FAULT_INJECTION + select PRIME_NUMBERS + help + Choose this option to allow the driver to perform selftests upon + loading; also requires the i915.selftest=1 module parameter. To + exit the module after running the selftests (i.e. to prevent normal + module initialisation afterwards) use i915.selftest=-1. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_I915_LOW_LEVEL_TRACEPOINTS + bool "Enable low level request tracing events" + depends on DRM_I915 + default n + help + Choose this option to turn on low level request tracing events. + This provides the ability to precisely monitor engine utilisation + and also analyze the request dependency resolving timeline. + + If in doubt, say "N". diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index c62ab45683c0..b1b580337c7a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o # GEM code i915-y += i915_cmd_parser.o \ i915_gem_batch_pool.o \ + i915_gem_clflush.o \ i915_gem_context.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ @@ -72,6 +73,7 @@ i915-y += intel_audio.o \ intel_atomic.o \ intel_atomic_plane.o \ intel_bios.o \ + intel_cdclk.o \ intel_color.o \ intel_display.o \ intel_dpio_phy.o \ @@ -116,6 +118,9 @@ i915-y += dvo_ch7017.o \ # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o +i915-$(CONFIG_DRM_I915_SELFTEST) += \ + selftests/i915_random.o \ + selftests/i915_selftest.o # virtual gpu code i915-y += i915_vgpu.o diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index b9c8e2407682..81076d8ec41a 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1530,7 +1530,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm, len += copy_len; gma += copy_len; } - return 0; + return len; } @@ -1644,7 +1644,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, gma, gma + bb_size, dst); - if (ret) { + if (ret < 0) { gvt_err("fail to copy guest ring buffer\n"); goto unmap_src; } @@ -2608,11 +2608,8 @@ out: static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx; - struct intel_ring *ring = shadow_ctx->engine[ring_id].ring; unsigned long gma_head, gma_tail, gma_top, guest_rb_size; - unsigned int copy_len = 0; + u32 *cs; int ret; guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl); @@ -2626,36 +2623,33 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_top = workload->rb_start + guest_rb_size; /* allocate shadow ring buffer */ - ret = intel_ring_begin(workload->req, workload->rb_len / 4); - if (ret) - return ret; + cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* get shadow ring buffer va */ - workload->shadow_ring_buffer_va = ring->vaddr + ring->tail; + workload->shadow_ring_buffer_va = cs; /* head > tail --> copy head <-> top */ if (gma_head > gma_tail) { ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_top, - workload->shadow_ring_buffer_va); - if (ret) { + gma_head, gma_top, cs); + if (ret < 0) { gvt_err("fail to copy guest ring buffer\n"); return ret; } - copy_len = gma_top - gma_head; + cs += ret / sizeof(u32); gma_head = workload->rb_start; } /* copy head or start <-> tail */ - ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_tail, - workload->shadow_ring_buffer_va + copy_len); - if (ret) { + ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs); + if (ret < 0) { gvt_err("fail to copy guest ring buffer\n"); return ret; } - ring->tail += workload->rb_len; - intel_ring_advance(ring); + cs += ret / sizeof(u32); + intel_ring_advance(workload->req, cs); return 0; } @@ -2709,7 +2703,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx->workload->vgpu->gtt.ggtt_mm, guest_gma, guest_gma + ctx_size, map); - if (ret) { + if (ret < 0) { gvt_err("fail to copy guest indirect ctx\n"); goto unmap_src; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7d7244798507..aa2d726b4349 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -35,6 +35,23 @@ static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) return to_i915(node->minor->dev); } +static __always_inline void seq_print_param(struct seq_file *m, + const char *name, + const char *type, + const void *x) +{ + if (!__builtin_strcmp(type, "bool")) + seq_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); + else if (!__builtin_strcmp(type, "int")) + seq_printf(m, "i915.%s=%d\n", name, *(const int *)x); + else if (!__builtin_strcmp(type, "unsigned int")) + seq_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "char *")) + seq_printf(m, "i915.%s=%s\n", name, *(const char **)x); + else + BUILD_BUG(); +} + static int i915_capabilities(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -43,10 +60,17 @@ static int i915_capabilities(struct seq_file *m, void *data) seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv)); seq_printf(m, "platform: %s\n", intel_platform_name(info->platform)); seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv)); + #define PRINT_FLAG(x) seq_printf(m, #x ": %s\n", yesno(info->x)) DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); #undef PRINT_FLAG + kernel_param_lock(THIS_MODULE); +#define PRINT_PARAM(T, x) seq_print_param(m, #x, #T, &i915.x); + I915_PARAMS_FOR_EACH(PRINT_PARAM); +#undef PRINT_PARAM + kernel_param_unlock(THIS_MODULE); + return 0; } @@ -428,7 +452,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) dpy_count, dpy_size); seq_printf(m, "%llu [%llu] gtt total\n", - ggtt->base.total, ggtt->mappable_end - ggtt->base.start); + ggtt->base.total, ggtt->mappable_end); seq_putc(m, '\n'); print_batch_pool_stats(m, dev_priv); @@ -456,7 +480,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mutex_lock(&dev->struct_mutex); request = list_first_entry_or_null(&file_priv->mm.request_list, struct drm_i915_gem_request, - client_list); + client_link); rcu_read_lock(); task = pid_task(request && request->ctx->pid ? request->ctx->pid : file->pid, @@ -676,14 +700,14 @@ static void i915_ring_seqno_info(struct seq_file *m, seq_printf(m, "Current sequence (%s): %x\n", engine->name, intel_engine_get_seqno(engine)); - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); seq_printf(m, "Waiting (%s): %s [%d] on %x\n", engine->name, w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); } static int i915_gem_seqno_info(struct seq_file *m, void *data) @@ -827,10 +851,22 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(VLV_IIR_RW)); seq_printf(m, "Display IMR:\t%08x\n", I915_READ(VLV_IMR)); - for_each_pipe(dev_priv, pipe) + for_each_pipe(dev_priv, pipe) { + enum intel_display_power_domain power_domain; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, + power_domain)) { + seq_printf(m, "Pipe %c power disabled\n", + pipe_name(pipe)); + continue; + } + seq_printf(m, "Pipe %c stat:\t%08x\n", pipe_name(pipe), I915_READ(PIPESTAT(pipe))); + intel_display_power_put(dev_priv, power_domain); + } seq_printf(m, "Master IER:\t%08x\n", I915_READ(VLV_MASTER_IER)); @@ -928,101 +964,96 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) } #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) - -static ssize_t -i915_error_state_write(struct file *filp, - const char __user *ubuf, - size_t cnt, - loff_t *ppos) +static ssize_t gpu_state_read(struct file *file, char __user *ubuf, + size_t count, loff_t *pos) { - struct i915_error_state_file_priv *error_priv = filp->private_data; - - DRM_DEBUG_DRIVER("Resetting error state\n"); - i915_destroy_error_state(error_priv->i915); + struct i915_gpu_state *error = file->private_data; + struct drm_i915_error_state_buf str; + ssize_t ret; + loff_t tmp; - return cnt; -} - -static int i915_error_state_open(struct inode *inode, struct file *file) -{ - struct drm_i915_private *dev_priv = inode->i_private; - struct i915_error_state_file_priv *error_priv; + if (!error) + return 0; - error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL); - if (!error_priv) - return -ENOMEM; + ret = i915_error_state_buf_init(&str, error->i915, count, *pos); + if (ret) + return ret; - error_priv->i915 = dev_priv; + ret = i915_error_state_to_str(&str, error); + if (ret) + goto out; - i915_error_state_get(&dev_priv->drm, error_priv); + tmp = 0; + ret = simple_read_from_buffer(ubuf, count, &tmp, str.buf, str.bytes); + if (ret < 0) + goto out; - file->private_data = error_priv; + *pos = str.start + ret; +out: + i915_error_state_buf_release(&str); + return ret; +} +static int gpu_state_release(struct inode *inode, struct file *file) +{ + i915_gpu_state_put(file->private_data); return 0; } -static int i915_error_state_release(struct inode *inode, struct file *file) +static int i915_gpu_info_open(struct inode *inode, struct file *file) { - struct i915_error_state_file_priv *error_priv = file->private_data; + struct i915_gpu_state *gpu; - i915_error_state_put(error_priv); - kfree(error_priv); + gpu = i915_capture_gpu_state(inode->i_private); + if (!gpu) + return -ENOMEM; + file->private_data = gpu; return 0; } -static ssize_t i915_error_state_read(struct file *file, char __user *userbuf, - size_t count, loff_t *pos) +static const struct file_operations i915_gpu_info_fops = { + .owner = THIS_MODULE, + .open = i915_gpu_info_open, + .read = gpu_state_read, + .llseek = default_llseek, + .release = gpu_state_release, +}; + +static ssize_t +i915_error_state_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) { - struct i915_error_state_file_priv *error_priv = file->private_data; - struct drm_i915_error_state_buf error_str; - loff_t tmp_pos = 0; - ssize_t ret_count = 0; - int ret; + struct i915_gpu_state *error = filp->private_data; - ret = i915_error_state_buf_init(&error_str, error_priv->i915, - count, *pos); - if (ret) - return ret; + if (!error) + return 0; - ret = i915_error_state_to_str(&error_str, error_priv); - if (ret) - goto out; + DRM_DEBUG_DRIVER("Resetting error state\n"); + i915_reset_error_state(error->i915); - ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos, - error_str.buf, - error_str.bytes); + return cnt; +} - if (ret_count < 0) - ret = ret_count; - else - *pos = error_str.start + ret_count; -out: - i915_error_state_buf_release(&error_str); - return ret ?: ret_count; +static int i915_error_state_open(struct inode *inode, struct file *file) +{ + file->private_data = i915_first_error_state(inode->i_private); + return 0; } static const struct file_operations i915_error_state_fops = { .owner = THIS_MODULE, .open = i915_error_state_open, - .read = i915_error_state_read, + .read = gpu_state_read, .write = i915_error_state_write, .llseek = default_llseek, - .release = i915_error_state_release, + .release = gpu_state_release, }; - #endif static int -i915_next_seqno_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno); - return 0; -} - -static int i915_next_seqno_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; @@ -1040,13 +1071,12 @@ i915_next_seqno_set(void *data, u64 val) } DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, - i915_next_seqno_get, i915_next_seqno_set, + NULL, i915_next_seqno_set, "0x%llx\n"); static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; int ret = 0; intel_runtime_pm_get(dev_priv); @@ -1109,10 +1139,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) } /* RPSTAT1 is in the GT power well */ - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - goto out; - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); reqf = I915_READ(GEN6_RPNSWREQ); @@ -1147,7 +1173,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) cagf = intel_gpu_freq(dev_priv, cagf); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - mutex_unlock(&dev->struct_mutex); if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { pm_ier = I915_READ(GEN6_PMIER); @@ -1198,21 +1223,18 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -1236,11 +1258,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_puts(m, "no P-state info available\n"); } - seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk_freq); + seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk.hw.cdclk); seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq); seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq); -out: intel_runtime_pm_put(dev_priv); return ret; } @@ -1307,35 +1328,40 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_runtime_pm_put(dev_priv); - if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) { - seq_printf(m, "Hangcheck active, fires in %dms\n", + if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer)) + seq_printf(m, "Hangcheck active, timer fires in %dms\n", jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires - jiffies)); - } else - seq_printf(m, "Hangcheck inactive\n"); + else if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) + seq_puts(m, "Hangcheck active, work pending\n"); + else + seq_puts(m, "Hangcheck inactive\n"); + + seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); for_each_engine(engine, dev_priv, id) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node *rb; seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x]\n", + seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine)); + intel_engine_last_submit(engine), + engine->timeline->inflight_seqnos); seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings)), yesno(engine->hangcheck.stalled)); - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); seq_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, @@ -1788,7 +1814,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) if (ret) goto out; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER; @@ -1808,8 +1834,8 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) &ia_freq); seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", intel_gpu_freq(dev_priv, (gpu_freq * - (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1))), + (IS_GEN9_BC(dev_priv) ? + GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); } @@ -2302,10 +2328,10 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", rps_power_to_str(dev_priv->rps.power)); seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", - 100 * rpup / rpupei, + rpup && rpupei ? 100 * rpup / rpupei : 0, dev_priv->rps.up_threshold); seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", - 100 * rpdown / rpdownei, + rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, dev_priv->rps.down_threshold); } else { seq_puts(m, "\nRPS Autotuning inactive\n"); @@ -2351,7 +2377,9 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "\tRSA: offset is %d; size = %d\n", huc_fw->rsa_offset, huc_fw->rsa_size); + intel_runtime_pm_get(dev_priv); seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); + intel_runtime_pm_put(dev_priv); return 0; } @@ -2383,6 +2411,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "\tRSA: offset is %d; size = %d\n", guc_fw->rsa_offset, guc_fw->rsa_size); + intel_runtime_pm_get(dev_priv); + tmp = I915_READ(GUC_STATUS); seq_printf(m, "\nGuC status 0x%08x:\n", tmp); @@ -2396,6 +2426,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) for (i = 0; i < 16; i++) seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i))); + intel_runtime_pm_put(dev_priv); + return 0; } @@ -2777,15 +2809,10 @@ static int i915_power_domain_info(struct seq_file *m, void *unused) seq_printf(m, "%-25s %d\n", power_well->name, power_well->count); - for (power_domain = 0; power_domain < POWER_DOMAIN_NUM; - power_domain++) { - if (!(BIT(power_domain) & power_well->domains)) - continue; - + for_each_power_domain(power_domain, power_well->domains) seq_printf(m, " %-23s %d\n", intel_display_power_domain_str(power_domain), power_domains->domain_use_count[power_domain]); - } } mutex_unlock(&power_domains->lock); @@ -3205,6 +3232,11 @@ static int i915_engine_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); + seq_printf(m, "GT awake? %s\n", + yesno(dev_priv->gt.awake)); + seq_printf(m, "Global active requests: %d\n", + dev_priv->gt.active_requests); + for_each_engine(engine, dev_priv, id) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct drm_i915_gem_request *rq; @@ -3212,11 +3244,12 @@ static int i915_engine_info(struct seq_file *m, void *unused) u64 addr; seq_printf(m, "%s\n", engine->name); - seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", + seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), + engine->timeline->inflight_seqnos); rcu_read_lock(); @@ -3294,15 +3327,21 @@ static int i915_engine_info(struct seq_file *m, void *unused) rcu_read_lock(); rq = READ_ONCE(engine->execlist_port[0].request); - if (rq) - print_request(m, rq, "\t\tELSP[0] "); - else + if (rq) { + seq_printf(m, "\t\tELSP[0] count=%d, ", + engine->execlist_port[0].count); + print_request(m, rq, "rq: "); + } else { seq_printf(m, "\t\tELSP[0] idle\n"); + } rq = READ_ONCE(engine->execlist_port[1].request); - if (rq) - print_request(m, rq, "\t\tELSP[1] "); - else + if (rq) { + seq_printf(m, "\t\tELSP[1] count=%d, ", + engine->execlist_port[1].count); + print_request(m, rq, "rq: "); + } else { seq_printf(m, "\t\tELSP[1] idle\n"); + } rcu_read_unlock(); spin_lock_irq(&engine->timeline->lock); @@ -3320,14 +3359,14 @@ static int i915_engine_info(struct seq_file *m, void *unused) I915_READ(RING_PP_DIR_DCLV(engine))); } - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); seq_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); seq_puts(m, "\n"); } @@ -3746,7 +3785,19 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) if (connector->status == connector_status_connected && connector->encoder != NULL) { intel_dp = enc_to_intel_dp(connector->encoder); - seq_printf(m, "%lx", intel_dp->compliance.test_data.edid); + if (intel_dp->compliance.test_type == + DP_TEST_LINK_EDID_READ) + seq_printf(m, "%lx", + intel_dp->compliance.test_data.edid); + else if (intel_dp->compliance.test_type == + DP_TEST_LINK_VIDEO_PATTERN) { + seq_printf(m, "hdisplay: %d\n", + intel_dp->compliance.test_data.hdisplay); + seq_printf(m, "vdisplay: %d\n", + intel_dp->compliance.test_data.vdisplay); + seq_printf(m, "bpc: %u\n", + intel_dp->compliance.test_data.bpc); + } } else seq_puts(m, "0"); } @@ -4237,7 +4288,8 @@ i915_max_freq_set(void *data, u64 val) dev_priv->rps.max_freq_softlimit = val; - intel_set_rps(dev_priv, val); + if (intel_set_rps(dev_priv, val)) + DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); mutex_unlock(&dev_priv->rps.hw_lock); @@ -4292,7 +4344,8 @@ i915_min_freq_set(void *data, u64 val) dev_priv->rps.min_freq_softlimit = val; - intel_set_rps(dev_priv, val); + if (intel_set_rps(dev_priv, val)) + DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); mutex_unlock(&dev_priv->rps.hw_lock); @@ -4418,7 +4471,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; @@ -4567,6 +4620,81 @@ static const struct file_operations i915_forcewake_fops = { .release = i915_forcewake_release, }; +static int i915_hpd_storm_ctl_show(struct seq_file *m, void *data) +{ + struct drm_i915_private *dev_priv = m->private; + struct i915_hotplug *hotplug = &dev_priv->hotplug; + + seq_printf(m, "Threshold: %d\n", hotplug->hpd_storm_threshold); + seq_printf(m, "Detected: %s\n", + yesno(delayed_work_pending(&hotplug->reenable_work))); + + return 0; +} + +static ssize_t i915_hpd_storm_ctl_write(struct file *file, + const char __user *ubuf, size_t len, + loff_t *offp) +{ + struct seq_file *m = file->private_data; + struct drm_i915_private *dev_priv = m->private; + struct i915_hotplug *hotplug = &dev_priv->hotplug; + unsigned int new_threshold; + int i; + char *newline; + char tmp[16]; + + if (len >= sizeof(tmp)) + return -EINVAL; + + if (copy_from_user(tmp, ubuf, len)) + return -EFAULT; + + tmp[len] = '\0'; + + /* Strip newline, if any */ + newline = strchr(tmp, '\n'); + if (newline) + *newline = '\0'; + + if (strcmp(tmp, "reset") == 0) + new_threshold = HPD_STORM_DEFAULT_THRESHOLD; + else if (kstrtouint(tmp, 10, &new_threshold) != 0) + return -EINVAL; + + if (new_threshold > 0) + DRM_DEBUG_KMS("Setting HPD storm detection threshold to %d\n", + new_threshold); + else + DRM_DEBUG_KMS("Disabling HPD storm detection\n"); + + spin_lock_irq(&dev_priv->irq_lock); + hotplug->hpd_storm_threshold = new_threshold; + /* Reset the HPD storm stats so we don't accidentally trigger a storm */ + for_each_hpd_pin(i) + hotplug->stats[i].count = 0; + spin_unlock_irq(&dev_priv->irq_lock); + + /* Re-enable hpd immediately if we were in an irq storm */ + flush_delayed_work(&dev_priv->hotplug.reenable_work); + + return len; +} + +static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file) +{ + return single_open(file, i915_hpd_storm_ctl_show, inode->i_private); +} + +static const struct file_operations i915_hpd_storm_ctl_fops = { + .owner = THIS_MODULE, + .open = i915_hpd_storm_ctl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = i915_hpd_storm_ctl_write +}; + static const struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, @@ -4633,6 +4761,7 @@ static const struct i915_debugfs_files { {"i915_gem_drop_caches", &i915_drop_caches_fops}, #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) {"i915_error_state", &i915_error_state_fops}, + {"i915_gpu_info", &i915_gpu_info_fops}, #endif {"i915_next_seqno", &i915_next_seqno_fops}, {"i915_display_crc_ctl", &i915_display_crc_ctl_fops}, @@ -4643,7 +4772,8 @@ static const struct i915_debugfs_files { {"i915_dp_test_data", &i915_displayport_test_data_fops}, {"i915_dp_test_type", &i915_displayport_test_type_fops}, {"i915_dp_test_active", &i915_displayport_test_active_fops}, - {"i915_guc_log_control", &i915_guc_log_control_fops} + {"i915_guc_log_control", &i915_guc_log_control_fops}, + {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 655d146e1126..704dbcf63866 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -43,6 +43,7 @@ #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_atomic_helper.h> #include <drm/i915_drm.h> #include "i915_drv.h" @@ -248,6 +249,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_IRQ_ACTIVE: case I915_PARAM_ALLOW_BATCHBUFFER: case I915_PARAM_LAST_DISPATCH: + case I915_PARAM_HAS_EXEC_CONSTANTS: /* Reject all old ums/dri params. */ return -ENODEV; case I915_PARAM_CHIPSET_ID: @@ -274,9 +276,6 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_BSD2: value = !!dev_priv->engine[VCS2]; break; - case I915_PARAM_HAS_EXEC_CONSTANTS: - value = INTEL_GEN(dev_priv) >= 4; - break; case I915_PARAM_HAS_LLC: value = HAS_LLC(dev_priv); break; @@ -318,10 +317,9 @@ static int i915_getparam(struct drm_device *dev, void *data, value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool; break; case I915_PARAM_HUC_STATUS: - /* The register is already force-woken. We dont need - * any rpm here - */ + intel_runtime_pm_get(dev_priv); value = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; + intel_runtime_pm_put(dev_priv); break; case I915_PARAM_MMAP_GTT_VERSION: /* Though we've started our numbering from 1, and so class all @@ -350,6 +348,8 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_HANDLE_LUT: case I915_PARAM_HAS_COHERENT_PHYS_GTT: case I915_PARAM_HAS_EXEC_SOFTPIN: + case I915_PARAM_HAS_EXEC_ASYNC: + case I915_PARAM_HAS_EXEC_FENCE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -756,6 +756,15 @@ out_err: return -ENOMEM; } +static void i915_engines_cleanup(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + kfree(engine); +} + static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) { destroy_workqueue(dev_priv->hotplug.dp_wq); @@ -769,10 +778,17 @@ static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) */ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) { - if (IS_HSW_EARLY_SDV(dev_priv) || - IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0)) + bool pre = false; + + pre |= IS_HSW_EARLY_SDV(dev_priv); + pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); + pre |= IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST); + + if (pre) { DRM_ERROR("This is a pre-production stepping. " "It may not be fully functional.\n"); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK); + } } /** @@ -808,6 +824,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, spin_lock_init(&dev_priv->gpu_error.lock); mutex_init(&dev_priv->backlight_lock); spin_lock_init(&dev_priv->uncore.lock); + spin_lock_init(&dev_priv->mm.object_stat_lock); spin_lock_init(&dev_priv->mmio_flip_lock); spin_lock_init(&dev_priv->wm.dsparb_lock); @@ -818,12 +835,15 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, mutex_init(&dev_priv->pps_mutex); intel_uc_init_early(dev_priv); - i915_memcpy_init_early(dev_priv); + ret = intel_engines_init_early(dev_priv); + if (ret) + return ret; + ret = i915_workqueues_init(dev_priv); if (ret < 0) - return ret; + goto err_engines; /* This must be called before any calls to HAS_PCH_* */ intel_detect_pch(dev_priv); @@ -852,6 +872,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, err_workqueues: i915_workqueues_cleanup(dev_priv); +err_engines: + i915_engines_cleanup(dev_priv); return ret; } @@ -864,6 +886,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) i915_perf_fini(dev_priv); i915_gem_load_cleanup(dev_priv); i915_workqueues_cleanup(dev_priv); + i915_engines_cleanup(dev_priv); } static int i915_mmio_setup(struct drm_i915_private *dev_priv) @@ -930,6 +953,7 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) goto put_bridge; intel_uncore_init(dev_priv); + i915_gem_init_mmio(dev_priv); return 0; @@ -967,7 +991,7 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores); - DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores)); + DRM_DEBUG_DRIVER("use GPU semaphores? %s\n", yesno(i915.semaphores)); } /** @@ -1185,11 +1209,15 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) */ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) { + const struct intel_device_info *match_info = + (struct intel_device_info *)ent->driver_data; struct drm_i915_private *dev_priv; int ret; - if (i915.nuclear_pageflip) - driver.driver_features |= DRIVER_ATOMIC; + /* Enable nuclear pageflip on ILK+, except vlv/chv */ + if (!i915.nuclear_pageflip && + (match_info->gen < 5 || match_info->has_gmch_display)) + driver.driver_features &= ~DRIVER_ATOMIC; ret = -ENOMEM; dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL); @@ -1197,8 +1225,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) ret = drm_dev_init(&dev_priv->drm, &driver, &pdev->dev); if (ret) { DRM_DEV_ERROR(&pdev->dev, "allocation failed\n"); - kfree(dev_priv); - return ret; + goto out_free; } dev_priv->drm.pdev = pdev; @@ -1206,7 +1233,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) ret = pci_enable_device(pdev); if (ret) - goto out_free_priv; + goto out_fini; pci_set_drvdata(pdev, &dev_priv->drm); @@ -1270,9 +1297,11 @@ out_runtime_pm_put: i915_driver_cleanup_early(dev_priv); out_pci_disable: pci_disable_device(pdev); -out_free_priv: +out_fini: i915_load_error(dev_priv, "Device initialization failed (%d)\n", ret); - drm_dev_unref(&dev_priv->drm); + drm_dev_fini(&dev_priv->drm); +out_free: + kfree(dev_priv); return ret; } @@ -1280,6 +1309,8 @@ void i915_driver_unload(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = dev_priv->drm.pdev; + struct drm_modeset_acquire_ctx ctx; + int ret; intel_fbdev_fini(dev); @@ -1288,6 +1319,24 @@ void i915_driver_unload(struct drm_device *dev) intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + drm_modeset_acquire_init(&ctx, 0); + while (1) { + ret = drm_modeset_lock_all_ctx(dev, &ctx); + if (!ret) + ret = drm_atomic_helper_disable_all(dev, &ctx); + + if (ret != -EDEADLK) + break; + + drm_modeset_backoff(&ctx); + } + + if (ret) + DRM_ERROR("Disabling all crtc's during unload failed with %i\n", ret); + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + intel_gvt_cleanup(dev_priv); i915_driver_unregister(dev_priv); @@ -1317,7 +1366,7 @@ void i915_driver_unload(struct drm_device *dev) /* Free error state after interrupts are fully disabled. */ cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); - i915_destroy_error_state(dev_priv); + i915_reset_error_state(dev_priv); /* Flush any outstanding unpin_work. */ drain_workqueue(dev_priv->wq); @@ -1333,8 +1382,16 @@ void i915_driver_unload(struct drm_device *dev) i915_driver_cleanup_mmio(dev_priv); intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); +} + +static void i915_driver_release(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); i915_driver_cleanup_early(dev_priv); + drm_dev_fini(&dev_priv->drm); + + kfree(dev_priv); } static int i915_driver_open(struct drm_device *dev, struct drm_file *file) @@ -1716,6 +1773,8 @@ static int i915_drm_resume_early(struct drm_device *dev) !(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload)) intel_power_domains_init_hw(dev_priv, true); + i915_gem_sanitize(dev_priv); + enable_rpm_wakeref_asserts(dev_priv); out: @@ -1787,7 +1846,7 @@ void i915_reset(struct drm_i915_private *dev_priv) goto error; } - i915_gem_reset_finish(dev_priv); + i915_gem_reset(dev_priv); intel_overlay_reset(dev_priv); /* Ok, now get things going again... */ @@ -1813,6 +1872,7 @@ void i915_reset(struct drm_i915_private *dev_priv) i915_queue_hangcheck(dev_priv); wakeup: + i915_gem_reset_finish(dev_priv); enable_irq(dev_priv->drm.irq); wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS); return; @@ -2532,7 +2592,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), @@ -2574,7 +2634,8 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME | - DRIVER_RENDER | DRIVER_MODESET, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, + .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, .preclose = i915_driver_preclose, @@ -2603,3 +2664,7 @@ static struct drm_driver driver = { .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, }; + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_drm.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8df73751c367..9ae1e520f48c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,8 +79,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170123" -#define DRIVER_TIMESTAMP 1485156432 +#define DRIVER_DATE "20170306" +#define DRIVER_TIMESTAMP 1488785683 #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ @@ -293,6 +293,7 @@ enum plane_id { PLANE_PRIMARY, PLANE_SPRITE0, PLANE_SPRITE1, + PLANE_SPRITE2, PLANE_CURSOR, I915_MAX_PLANES, }; @@ -343,6 +344,11 @@ enum intel_display_power_domain { POWER_DOMAIN_PORT_DDI_C_LANES, POWER_DOMAIN_PORT_DDI_D_LANES, POWER_DOMAIN_PORT_DDI_E_LANES, + POWER_DOMAIN_PORT_DDI_A_IO, + POWER_DOMAIN_PORT_DDI_B_IO, + POWER_DOMAIN_PORT_DDI_C_IO, + POWER_DOMAIN_PORT_DDI_D_IO, + POWER_DOMAIN_PORT_DDI_E_IO, POWER_DOMAIN_PORT_DSI, POWER_DOMAIN_PORT_CRT, POWER_DOMAIN_PORT_OTHER, @@ -384,6 +390,8 @@ enum hpd_pin { #define for_each_hpd_pin(__pin) \ for ((__pin) = (HPD_NONE + 1); (__pin) < HPD_NUM_PINS; (__pin)++) +#define HPD_STORM_DEFAULT_THRESHOLD 5 + struct i915_hotplug { struct work_struct hotplug_work; @@ -407,6 +415,8 @@ struct i915_hotplug { struct work_struct poll_init_work; bool poll_enabled; + unsigned int hpd_storm_threshold; + /* * if we get a HPD irq from DP and a HPD irq from non-DP * the non-DP HPD could block the workqueue on a mode config @@ -494,7 +504,35 @@ struct i915_hotplug { #define for_each_power_domain(domain, mask) \ for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ - for_each_if ((1 << (domain)) & (mask)) + for_each_if (BIT_ULL(domain) & (mask)) + +#define for_each_power_well(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells; \ + (__power_well) - (__dev_priv)->power_domains.power_wells < \ + (__dev_priv)->power_domains.power_well_count; \ + (__power_well)++) + +#define for_each_power_well_rev(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells + \ + (__dev_priv)->power_domains.power_well_count - 1; \ + (__power_well) - (__dev_priv)->power_domains.power_wells >= 0; \ + (__power_well)--) + +#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well(__dev_priv, __power_well) \ + for_each_if ((__power_well)->domains & (__domain_mask)) + +#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well_rev(__dev_priv, __power_well) \ + for_each_if ((__power_well)->domains & (__domain_mask)) + +#define for_each_intel_plane_in_state(__state, plane, plane_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->base.dev->mode_config.num_total_plane && \ + ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \ + (plane_state) = to_intel_plane_state((__state)->base.planes[__i].state), 1); \ + (__i)++) \ + for_each_if (plane_state) struct drm_i915_private; struct i915_mm_struct; @@ -600,9 +638,13 @@ struct intel_initial_plane_config; struct intel_crtc; struct intel_limit; struct dpll; +struct intel_cdclk_state; struct drm_i915_display_funcs { - int (*get_display_clock_speed)(struct drm_i915_private *dev_priv); + void (*get_cdclk)(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state); + void (*set_cdclk)(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state); int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, @@ -617,7 +659,6 @@ struct drm_i915_display_funcs { int (*compute_global_watermarks)(struct drm_atomic_state *state); void (*update_wm)(struct intel_crtc *crtc); int (*modeset_calc_cdclk)(struct drm_atomic_state *state); - void (*modeset_commit_cdclk)(struct drm_atomic_state *state); /* Returns the active state of the crtc, and if the crtc is active, * fills out the pipe-config with the hw state. */ bool (*get_pipe_config)(struct intel_crtc *, @@ -636,7 +677,8 @@ struct drm_i915_display_funcs { struct intel_encoder *encoder, const struct drm_display_mode *adjusted_mode); void (*audio_codec_disable)(struct intel_encoder *encoder); - void (*fdi_link_train)(struct drm_crtc *crtc); + void (*fdi_link_train)(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state); void (*init_clock_gating)(struct drm_i915_private *dev_priv); int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, @@ -856,6 +898,7 @@ enum intel_platform { INTEL_BROXTON, INTEL_KABYLAKE, INTEL_GEMINILAKE, + INTEL_MAX_PLATFORMS }; struct intel_device_info { @@ -890,7 +933,7 @@ struct intel_device_info { struct intel_display_error_state; -struct drm_i915_error_state { +struct i915_gpu_state { struct kref ref; struct timeval time; struct timeval boottime; @@ -900,16 +943,20 @@ struct drm_i915_error_state { char error_msg[128]; bool simulated; + bool awake; + bool wakelock; + bool suspended; int iommu; u32 reset_count; u32 suspend_count; struct intel_device_info device_info; + struct i915_params params; /* Generic register state */ u32 eir; u32 pgtbl_er; u32 ier; - u32 gtier[4]; + u32 gtier[4], ngtier; u32 ccid; u32 derrmr; u32 forcewake; @@ -923,6 +970,7 @@ struct drm_i915_error_state { u32 gab_ctl; u32 gfx_mode; + u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; @@ -970,6 +1018,16 @@ struct drm_i915_error_state { u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; struct intel_instdone instdone; + struct drm_i915_error_context { + char comm[TASK_COMM_LEN]; + pid_t pid; + u32 handle; + u32 hw_id; + int ban_score; + int active; + int guilty; + } context; + struct drm_i915_error_object { u64 gtt_offset; u64 gtt_size; @@ -1003,10 +1061,6 @@ struct drm_i915_error_state { u32 pp_dir_base; }; } vm_info; - - pid_t pid; - char comm[TASK_COMM_LEN]; - int context_bans; } engine[I915_NUM_ENGINES]; struct drm_i915_error_buffer { @@ -1395,7 +1449,7 @@ struct i915_power_well { int count; /* cached hw enabled state */ bool hw_enabled; - unsigned long domains; + u64 domains; /* unique identifier for this power well */ unsigned long id; /* @@ -1456,7 +1510,7 @@ struct i915_gem_mm { struct work_struct free_work; /** Usable portion of the GTT for GEM */ - phys_addr_t stolen_base; /* limited to low memory (32-bit) */ + dma_addr_t stolen_base; /* limited to low memory (32-bit) */ /** PPGTT used for aliasing the PPGTT with the GTT */ struct i915_hw_ppgtt *aliasing_ppgtt; @@ -1498,11 +1552,6 @@ struct drm_i915_error_state_buf { loff_t pos; }; -struct i915_error_state_file_priv { - struct drm_i915_private *i915; - struct drm_i915_error_state *error; -}; - #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ #define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */ @@ -1519,7 +1568,7 @@ struct i915_gpu_error { /* For reset and error_state handling. */ spinlock_t lock; /* Protected by the above dev->gpu_error.lock. */ - struct drm_i915_error_state *first_error; + struct i915_gpu_state *first_error; unsigned long missed_irq_rings; @@ -2053,6 +2102,10 @@ struct i915_oa_ops { bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv); }; +struct intel_cdclk_state { + unsigned int cdclk, vco, ref; +}; + struct drm_i915_private { struct drm_device drm; @@ -2063,8 +2116,6 @@ struct drm_i915_private { const struct intel_device_info info; - int relative_constants_mode; - void __iomem *regs; struct intel_uncore uncore; @@ -2157,13 +2208,7 @@ struct drm_i915_private { unsigned int fsb_freq, mem_freq, is_ddr3; unsigned int skl_preferred_vco_freq; - unsigned int cdclk_freq, max_cdclk_freq; - - /* - * For reading holding any crtc lock is sufficient, - * for writing must hold all of them. - */ - unsigned int atomic_cdclk_freq; + unsigned int max_cdclk_freq; unsigned int max_dotclk_freq; unsigned int rawclk_freq; @@ -2171,8 +2216,22 @@ struct drm_i915_private { unsigned int czclk_freq; struct { - unsigned int vco, ref; - } cdclk_pll; + /* + * The current logical cdclk state. + * See intel_atomic_state.cdclk.logical + * + * For reading holding any crtc lock is sufficient, + * for writing must hold all of them. + */ + struct intel_cdclk_state logical; + /* + * The current actual cdclk state. + * See intel_atomic_state.cdclk.actual + */ + struct intel_cdclk_state actual; + /* The current hardware cdclk state */ + struct intel_cdclk_state hw; + } cdclk; /** * wq - Driver workqueue for GEM. @@ -2752,6 +2811,12 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_KBL_REVID(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_REVID(dev_priv, since, until)) +#define GLK_REVID_A0 0x0 +#define GLK_REVID_A1 0x1 + +#define IS_GLK_REVID(dev_priv, since, until) \ + (IS_GEMINILAKE(dev_priv) && IS_REVID(dev_priv, since, until)) + /* * The genX designation typically refers to the render engine, so render * capability related checks should use IS_GEN, while display and other checks @@ -2767,8 +2832,9 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_GEN8(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(7))) #define IS_GEN9(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(8))) -#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && INTEL_INFO(dev_priv)->is_lp) #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) +#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv)) +#define IS_GEN9_BC(dev_priv) (IS_GEN9(dev_priv) && !IS_LP(dev_priv)) #define ENGINE_MASK(id) BIT(id) #define RENDER_RING ENGINE_MASK(RCS) @@ -2810,9 +2876,7 @@ intel_info(const struct drm_i915_private *dev_priv) /* WaRsDisableCoarsePowerGating:skl,bxt */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) || \ - IS_SKL_GT3(dev_priv) || \ - IS_SKL_GT4(dev_priv)) + (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) /* * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts @@ -2952,6 +3016,9 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); +int intel_engines_init_early(struct drm_i915_private *dev_priv); +int intel_engines_init(struct drm_i915_private *dev_priv); + /* intel_hotplug.c */ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 pin_mask, u32 long_mask); @@ -3129,6 +3196,7 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +void i915_gem_sanitize(struct drm_i915_private *i915); int i915_gem_load_init(struct drm_i915_private *dev_priv); void i915_gem_load_cleanup(struct drm_i915_private *dev_priv); void i915_gem_load_init_fences(struct drm_i915_private *dev_priv); @@ -3341,15 +3409,17 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) } int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); +void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); + +void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); void i915_gem_init_swizzling(struct drm_i915_private *dev_priv); void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv); -int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, - unsigned int flags); +int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, + unsigned int flags); int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); int i915_gem_fault(struct vm_fault *vmf); @@ -3543,7 +3613,7 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {} __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); int i915_error_state_to_str(struct drm_i915_error_state_buf *estr, - const struct i915_error_state_file_priv *error); + const struct i915_gpu_state *gpu); int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb, struct drm_i915_private *i915, size_t count, loff_t pos); @@ -3552,13 +3622,28 @@ static inline void i915_error_state_buf_release( { kfree(eb->buf); } + +struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); void i915_capture_error_state(struct drm_i915_private *dev_priv, u32 engine_mask, const char *error_msg); -void i915_error_state_get(struct drm_device *dev, - struct i915_error_state_file_priv *error_priv); -void i915_error_state_put(struct i915_error_state_file_priv *error_priv); -void i915_destroy_error_state(struct drm_i915_private *dev_priv); + +static inline struct i915_gpu_state * +i915_gpu_state_get(struct i915_gpu_state *gpu) +{ + kref_get(&gpu->ref); + return gpu; +} + +void __i915_gpu_state_free(struct kref *kref); +static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) +{ + if (gpu) + kref_put(&gpu->ref, __i915_gpu_state_free); +} + +struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); +void i915_reset_error_state(struct drm_i915_private *i915); #else @@ -3568,7 +3653,13 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, { } -static inline void i915_destroy_error_state(struct drm_i915_private *dev_priv) +static inline struct i915_gpu_state * +i915_first_error_state(struct drm_i915_private *i915) +{ + return NULL; +} + +static inline void i915_reset_error_state(struct drm_i915_private *i915) { } @@ -3708,7 +3799,7 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv); extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv); extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val); extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv); -extern void intel_set_rps(struct drm_i915_private *dev_priv, u8 val); +extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val); extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); @@ -3724,7 +3815,6 @@ extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, extern struct intel_display_error_state * intel_display_capture_error_state(struct drm_i915_private *dev_priv); extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, - struct drm_i915_private *dev_priv, struct intel_display_error_state *error); int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val); @@ -3734,7 +3824,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, /* intel_sideband.c */ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr); -void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); +int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr); u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg); void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val); @@ -3953,14 +4043,34 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } static inline bool -__i915_request_irq_complete(struct drm_i915_gem_request *req) +__i915_request_irq_complete(const struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; + u32 seqno; + + /* Note that the engine may have wrapped around the seqno, and + * so our request->global_seqno will be ahead of the hardware, + * even though it completed the request before wrapping. We catch + * this by kicking all the waiters before resetting the seqno + * in hardware, and also signal the fence. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags)) + return true; + + /* The request was dequeued before we were awoken. We check after + * inspecting the hw to confirm that this was the same request + * that generated the HWS update. The memory barriers within + * the request execution are sufficient to ensure that a check + * after reading the value from hw matches this request. + */ + seqno = i915_gem_request_global_seqno(req); + if (!seqno) + return false; /* Before we do the heavier coherent read of the seqno, * check the value (hopefully) in the CPU cacheline. */ - if (__i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req, seqno)) return true; /* Ensure our read of the seqno is coherent so that we @@ -3975,9 +4085,9 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) * is woken. */ if (engine->irq_seqno_barrier && - rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current && - cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) { - struct task_struct *tsk; + test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) { + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; /* The ordering of irq_posted versus applying the barrier * is crucial. The clearing of the current irq_posted must @@ -3999,19 +4109,18 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) * the seqno before we believe it coherent since they see * irq_posted == false but we are still running). */ - rcu_read_lock(); - tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); - if (tsk && tsk != current) + spin_lock_irqsave(&b->irq_lock, flags); + if (b->irq_wait && b->irq_wait->tsk != current) /* Note that if the bottom-half is changed as we * are sending the wake-up, the new bottom-half will * be woken by whomever made the change. We only have * to worry about when we steal the irq-posted for * ourself. */ - wake_up_process(tsk); - rcu_read_unlock(); + wake_up_process(b->irq_wait->tsk); + spin_unlock_irqrestore(&b->irq_lock, flags); - if (__i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req, seqno)) return true; } @@ -4042,4 +4151,10 @@ int remap_io_mapping(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, struct io_mapping *iomap); +static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj) +{ + return (obj->cache_level != I915_CACHE_NONE || + HAS_LLC(to_i915(obj->base.dev))); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6908123162d1..7c20601fe1de 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,12 +29,14 @@ #include <drm/drm_vma_manager.h> #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" #include "intel_mocs.h" #include <linux/dma-fence-array.h> +#include <linux/kthread.h> #include <linux/reservation.h> #include <linux/shmem_fs.h> #include <linux/slab.h> @@ -47,18 +49,12 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915); static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static bool cpu_cache_is_coherent(struct drm_device *dev, - enum i915_cache_level level) -{ - return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE; -} - static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) return false; - if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + if (!i915_gem_object_is_coherent(obj)) return true; return obj->pin_display; @@ -254,7 +250,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, if (needs_clflush && (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && - !cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + !i915_gem_object_is_coherent(obj)) drm_clflush_sg(pages); obj->base.read_domains = I915_GEM_DOMAIN_CPU; @@ -312,6 +308,8 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; +static const struct drm_i915_gem_object_ops i915_gem_object_ops; + int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -399,7 +397,7 @@ out: if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) i915_gem_request_retire_upto(rq); - if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) { + if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the * meantime, assume that this client is the only one @@ -424,7 +422,9 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, long timeout, struct intel_rps_client *rps) { + unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; + bool prune_fences = false; if (flags & I915_WAIT_ALL) { struct dma_fence **shared; @@ -449,15 +449,26 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, for (; i < count; i++) dma_fence_put(shared[i]); kfree(shared); + + prune_fences = count && timeout >= 0; } else { excl = reservation_object_get_excl_rcu(resv); } - if (excl && timeout >= 0) + if (excl && timeout >= 0) { timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + prune_fences = timeout >= 0; + } dma_fence_put(excl); + if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { + reservation_object_lock(resv, NULL); + if (!__read_seqcount_retry(&resv->seq, seq)) + reservation_object_add_excl_fence(resv, NULL); + reservation_object_unlock(resv); + } + return timeout; } @@ -585,9 +596,18 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, if (obj->mm.pages) return -EBUSY; + GEM_BUG_ON(obj->ops != &i915_gem_object_ops); obj->ops = &i915_gem_phys_ops; - return i915_gem_object_pin_pages(obj); + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err_xfer; + + return 0; + +err_xfer: + obj->ops = &i915_gem_object_ops; + return ret; } static int @@ -608,7 +628,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, drm_clflush_virt_range(vaddr, args->size); i915_gem_chipset_flush(to_i915(obj->base.dev)); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); return 0; } @@ -771,8 +791,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, * anyway again before the next pread happens. */ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, - obj->cache_level); + *needs_clflush = !i915_gem_object_is_coherent(obj); if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, false); @@ -828,8 +847,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, * before writing. */ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev, - obj->cache_level); + *needs_clflush |= !i915_gem_object_is_coherent(obj); if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, true); @@ -1257,7 +1275,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, user_data += page_length; offset += page_length; } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); mutex_lock(&i915->drm.struct_mutex); out_unpin: @@ -1393,7 +1411,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, offset = 0; } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); i915_gem_obj_finish_shmem_access(obj); return ret; } @@ -1596,23 +1614,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_sw_finish *args = data; struct drm_i915_gem_object *obj; - int err = 0; obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; /* Pinned buffers may be scanout, so flush the cache */ - if (READ_ONCE(obj->pin_display)) { - err = i915_mutex_lock_interruptible(dev); - if (!err) { - i915_gem_object_flush_cpu_write_domain(obj); - mutex_unlock(&dev->struct_mutex); - } - } - + i915_gem_object_flush_if_display(obj); i915_gem_object_put(obj); - return err; + + return 0; } /** @@ -2223,17 +2234,17 @@ unlock: mutex_unlock(&obj->mm.lock); } -static void i915_sg_trim(struct sg_table *orig_st) +static bool i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; struct scatterlist *sg, *new_sg; unsigned int i; if (orig_st->nents == orig_st->orig_nents) - return; + return false; if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) - return; + return false; new_sg = new_st.sgl; for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { @@ -2246,6 +2257,7 @@ static void i915_sg_trim(struct sg_table *orig_st) sg_free_table(orig_st); *orig_st = new_st; + return true; } static struct sg_table * @@ -2596,7 +2608,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request; + struct drm_i915_gem_request *request, *active = NULL; + unsigned long flags; /* We are called by the error capture and reset at a random * point in time. In particular, note that neither is crucially @@ -2606,15 +2619,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ + spin_lock_irqsave(&engine->timeline->lock, flags); list_for_each_entry(request, &engine->timeline->requests, link) { - if (__i915_gem_request_completed(request)) + if (__i915_gem_request_completed(request, + request->global_seqno)) continue; GEM_BUG_ON(request->engine != engine); - return request; + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &request->fence.flags)); + + active = request; + break; } + spin_unlock_irqrestore(&engine->timeline->lock, flags); - return NULL; + return active; } static bool engine_stalled(struct intel_engine_cs *engine) @@ -2641,7 +2661,30 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *request; + /* Prevent the signaler thread from updating the request + * state (by calling dma_fence_signal) as we are processing + * the reset. The write from the GPU of the seqno is + * asynchronous and the signaler thread may see a different + * value to us and declare the request complete, even though + * the reset routine have picked that request as the active + * (incomplete) request. This conflict is not handled + * gracefully! + */ + kthread_park(engine->breadcrumbs.signaler); + + /* Prevent request submission to the hardware until we have + * completed the reset in i915_gem_reset_finish(). If a request + * is completed by one engine, it may then queue a request + * to a second via its engine->irq_tasklet *just* as we are + * calling engine->init_hw() and also writing the ELSP. + * Turning off the engine->irq_tasklet until the reset is over + * prevents the race. + */ tasklet_kill(&engine->irq_tasklet); + tasklet_disable(&engine->irq_tasklet); + + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); if (engine_stalled(engine)) { request = i915_gem_find_active_request(engine); @@ -2739,9 +2782,6 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - request = i915_gem_find_active_request(engine); if (request && i915_gem_reset_request(request)) { DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", @@ -2756,7 +2796,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) engine->reset_hw(engine, request); } -void i915_gem_reset_finish(struct drm_i915_private *dev_priv) +void i915_gem_reset(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -2765,8 +2805,14 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) i915_gem_retire_requests(dev_priv); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + struct i915_gem_context *ctx; + i915_gem_reset_engine(engine); + ctx = fetch_and_zero(&engine->last_retired_context); + if (ctx) + engine->context_unpin(engine, ctx); + } i915_gem_restore_fences(dev_priv); @@ -2778,6 +2824,19 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) } } +void i915_gem_reset_finish(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + for_each_engine(engine, dev_priv, id) { + tasklet_enable(&engine->irq_tasklet); + kthread_unpark(engine->breadcrumbs.signaler); + } +} + static void nop_submit_request(struct drm_i915_gem_request *request) { dma_fence_set_error(&request->fence, -EIO); @@ -2900,8 +2959,8 @@ i915_gem_idle_work_handler(struct work_struct *work) * new request is submitted. */ wait_for(READ_ONCE(dev_priv->gt.active_requests) || - intel_execlists_idle(dev_priv), 10); - + intel_engines_are_idle(dev_priv), + 10); if (READ_ONCE(dev_priv->gt.active_requests)) return; @@ -2926,11 +2985,13 @@ i915_gem_idle_work_handler(struct work_struct *work) if (dev_priv->gt.active_requests) goto out_unlock; - if (wait_for(intel_execlists_idle(dev_priv), 10)) + if (wait_for(intel_engines_are_idle(dev_priv), 10)) DRM_ERROR("Timeout waiting for engines to idle\n"); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + intel_engine_disarm_breadcrumbs(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + } GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; @@ -3029,6 +3090,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); if (args->timeout_ns < 0) args->timeout_ns = 0; + + /* + * Apparently ktime isn't accurate enough and occasionally has a + * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch + * things up to make the test happy. We allow up to 1 jiffy. + * + * This is a regression from the timespec->ktime conversion. + */ + if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) + args->timeout_ns = 0; } i915_gem_object_put(obj); @@ -3071,41 +3142,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) return 0; } -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, - bool force) -{ - /* If we don't have a page list set up, then we're not pinned - * to GPU, and we can ignore the cache flush because it'll happen - * again at bind time. - */ - if (!obj->mm.pages) - return; - - /* - * Stolen memory is always coherent with the GPU as it is explicitly - * marked as wc by the system, or the system is cache-coherent. - */ - if (obj->stolen || obj->phys_handle) - return; - - /* If the GPU is snooping the contents of the CPU cache, - * we do not need to manually clear the CPU cache lines. However, - * the caches are only snooped when the render cache is - * flushed/invalidated. As we always have to emit invalidations - * and flushes when moving into and out of the RENDER domain, correct - * snooping behaviour occurs naturally as the result of our domain - * tracking. - */ - if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { - obj->cache_dirty = true; - return; - } - - trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->mm.pages); - obj->cache_dirty = false; -} - /** Flushes the GTT write domain for the object if it's dirty. */ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) @@ -3134,12 +3170,9 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); - intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT)); + intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); obj->base.write_domain = 0; - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - I915_GEM_DOMAIN_GTT); } /** Flushes the CPU write domain for the object if it's dirty. */ @@ -3149,13 +3182,27 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - i915_gem_clflush_object(obj, obj->pin_display); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + obj->base.write_domain = 0; +} + +static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) +{ + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) + return; + i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); obj->base.write_domain = 0; - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - I915_GEM_DOMAIN_CPU); +} + +void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) +{ + if (!READ_ONCE(obj->pin_display)) + return; + + mutex_lock(&obj->base.dev->struct_mutex); + __i915_gem_object_flush_for_display(obj); + mutex_unlock(&obj->base.dev->struct_mutex); } /** @@ -3169,7 +3216,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { - uint32_t old_write_domain, old_read_domains; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3207,9 +3253,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) mb(); - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ @@ -3221,10 +3264,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) obj->mm.dirty = true; } - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - i915_gem_object_unpin_pages(obj); return 0; } @@ -3349,7 +3388,7 @@ restart: } if (obj->base.write_domain == I915_GEM_DOMAIN_CPU && - cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + i915_gem_object_is_coherent(obj)) obj->cache_dirty = true; list_for_each_entry(vma, &obj->vma_list, obj_link) @@ -3461,7 +3500,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view) { struct i915_vma *vma; - u32 old_read_domains, old_write_domain; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3521,24 +3559,14 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ - if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj, true); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); - } - - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; + __i915_gem_object_flush_for_display(obj); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - obj->base.write_domain = 0; obj->base.read_domains |= I915_GEM_DOMAIN_GTT; - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - return vma; err_unpin_display: @@ -3574,7 +3602,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) { - uint32_t old_write_domain, old_read_domains; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3593,13 +3620,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) i915_gem_object_flush_gtt_write_domain(obj); - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj, false); - + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } @@ -3616,10 +3639,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) obj->base.write_domain = I915_GEM_DOMAIN_CPU; } - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - return 0; } @@ -3647,16 +3666,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) return -EIO; spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_list) { + list_for_each_entry(request, &file_priv->mm.request_list, client_link) { if (time_after_eq(request->emitted_jiffies, recent_enough)) break; - /* - * Note that the request might not have been submitted yet. - * In which case emitted_jiffies will be zero. - */ - if (!request->emitted_jiffies) - continue; + if (target) { + list_del(&target->client_link); + target->file_priv = NULL; + } target = request; } @@ -3942,7 +3959,7 @@ frontbuffer_retire(struct i915_gem_active *active, struct drm_i915_gem_object *obj = container_of(active, typeof(*obj), frontbuffer_write); - intel_fb_obj_flush(obj, true, ORIGIN_CS); + intel_fb_obj_flush(obj, ORIGIN_CS); } void i915_gem_object_init(struct drm_i915_gem_object *obj, @@ -4203,11 +4220,29 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) !i915_gem_context_is_kernel(engine->last_retired_context)); } +void i915_gem_sanitize(struct drm_i915_private *i915) +{ + /* + * If we inherit context state from the BIOS or earlier occupants + * of the GPU, the GPU may be in an inconsistent state when we + * try to take over. The only way to remove the earlier state + * is by resetting. However, resetting on earlier gen is tricky as + * it may impact the display and we are uncertain about the stability + * of the reset, so we only reset recent machines with logical + * context support (that must be reset to remove any stray contexts). + */ + if (HAS_HW_CONTEXTS(i915)) { + int reset = intel_gpu_reset(i915, ALL_ENGINES); + WARN_ON(reset && reset != -ENODEV); + } +} + int i915_gem_suspend(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; int ret; + intel_runtime_pm_get(dev_priv); intel_suspend_gt_powersave(dev_priv); mutex_lock(&dev->struct_mutex); @@ -4222,13 +4257,13 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) */ ret = i915_gem_switch_to_kernel_context(dev_priv); if (ret) - goto err; + goto err_unlock; ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED); if (ret) - goto err; + goto err_unlock; i915_gem_retire_requests(dev_priv); GEM_BUG_ON(dev_priv->gt.active_requests); @@ -4252,7 +4287,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * reset the GPU back to its idle, low power state. */ WARN_ON(dev_priv->gt.awake); - WARN_ON(!intel_execlists_idle(dev_priv)); + WARN_ON(!intel_engines_are_idle(dev_priv)); /* * Neither the BIOS, ourselves or any other kernel @@ -4273,15 +4308,13 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machines is a good idea, we don't - just in case it leaves the * machine in an unusable condition. */ - if (HAS_HW_CONTEXTS(dev_priv)) { - int reset = intel_gpu_reset(dev_priv, ALL_ENGINES); - WARN_ON(reset && reset != -ENODEV); - } - - return 0; + i915_gem_sanitize(dev_priv); + goto out_rpm_put; -err: +err_unlock: mutex_unlock(&dev->struct_mutex); +out_rpm_put: + intel_runtime_pm_put(dev_priv); return ret; } @@ -4351,11 +4384,24 @@ static void init_unused_rings(struct drm_i915_private *dev_priv) } } -int -i915_gem_init_hw(struct drm_i915_private *dev_priv) +static int __i915_gem_restart_engines(void *data) { + struct drm_i915_private *i915 = data; struct intel_engine_cs *engine; enum intel_engine_id id; + int err; + + for_each_engine(engine, i915, id) { + err = engine->init_hw(engine); + if (err) + return err; + } + + return 0; +} + +int i915_gem_init_hw(struct drm_i915_private *dev_priv) +{ int ret; dev_priv->gt.last_init_time = ktime_get(); @@ -4401,11 +4447,9 @@ i915_gem_init_hw(struct drm_i915_private *dev_priv) } /* Need to do basic initialisation of all rings first: */ - for_each_engine(engine, dev_priv, id) { - ret = engine->init_hw(engine); - if (ret) - goto out; - } + ret = __i915_gem_restart_engines(dev_priv); + if (ret) + goto out; intel_mocs_init_l3cc_table(dev_priv); @@ -4446,6 +4490,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_clflush_init(dev_priv); + if (!i915.enable_execlists) { dev_priv->gt.resume = intel_legacy_submission_resume; dev_priv->gt.cleanup_engine = intel_engine_cleanup; @@ -4494,6 +4540,11 @@ out_unlock: return ret; } +void i915_gem_init_mmio(struct drm_i915_private *i915) +{ + i915_gem_sanitize(i915); +} + void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) { @@ -4583,8 +4634,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); - dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; - init_waitqueue_head(&dev_priv->pending_flip_queue); dev_priv->mm.interruptible = true; @@ -4609,7 +4658,9 @@ err_out: void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) { + i915_gem_drain_freed_objects(dev_priv); WARN_ON(!llist_empty(&dev_priv->mm.free_list)); + WARN_ON(dev_priv->mm.object_count); mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_timeline_fini(&dev_priv->gt.global_timeline); @@ -4627,14 +4678,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) int i915_gem_freeze(struct drm_i915_private *dev_priv) { - intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_shrink_all(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); - return 0; } @@ -4685,7 +4732,7 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) * file_priv. */ spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_list) + list_for_each_entry(request, &file_priv->mm.request_list, client_link) request->file_priv = NULL; spin_unlock(&file_priv->mm.lock); @@ -4949,3 +4996,11 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, sg = i915_gem_object_get_sg(obj, n, &offset); return sg_dma_address(sg) + (offset << PAGE_SHIFT); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/scatterlist.c" +#include "selftests/mock_gem_device.c" +#include "selftests/huge_gem_object.c" +#include "selftests/i915_gem_object.c" +#include "selftests/i915_gem_coherency.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index a585d47c420a..5a49487368ca 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -28,9 +28,18 @@ #ifdef CONFIG_DRM_I915_DEBUG_GEM #define GEM_BUG_ON(expr) BUG_ON(expr) #define GEM_WARN_ON(expr) WARN_ON(expr) + +#define GEM_DEBUG_DECL(var) var +#define GEM_DEBUG_EXEC(expr) expr +#define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr) + #else #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0) + +#define GEM_DEBUG_DECL(var) +#define GEM_DEBUG_EXEC(expr) do { } while (0) +#define GEM_DEBUG_BUG_ON(expr) #endif #define I915_NUM_ENGINES 5 diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index b3bc119ec1bb..99ceae7855f8 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -122,9 +122,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, if (tmp->base.size >= size) { /* Clear the set of shared fences early */ - ww_mutex_lock(&tmp->resv->lock, NULL); + reservation_object_lock(tmp->resv, NULL); reservation_object_add_excl_fence(tmp->resv, NULL); - ww_mutex_unlock(&tmp->resv->lock); + reservation_object_unlock(tmp->resv); obj = tmp; break; diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c new file mode 100644 index 000000000000..d925fb582ba7 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -0,0 +1,189 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" +#include "intel_frontbuffer.h" +#include "i915_gem_clflush.h" + +static DEFINE_SPINLOCK(clflush_lock); +static u64 clflush_context; + +struct clflush { + struct dma_fence dma; /* Must be first for dma_fence_free() */ + struct i915_sw_fence wait; + struct work_struct work; + struct drm_i915_gem_object *obj; +}; + +static const char *i915_clflush_get_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) +{ + return "clflush"; +} + +static bool i915_clflush_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static void i915_clflush_release(struct dma_fence *fence) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), dma); + + i915_sw_fence_fini(&clflush->wait); + + BUILD_BUG_ON(offsetof(typeof(*clflush), dma)); + dma_fence_free(&clflush->dma); +} + +static const struct dma_fence_ops i915_clflush_ops = { + .get_driver_name = i915_clflush_get_driver_name, + .get_timeline_name = i915_clflush_get_timeline_name, + .enable_signaling = i915_clflush_enable_signaling, + .wait = dma_fence_default_wait, + .release = i915_clflush_release, +}; + +static void __i915_do_clflush(struct drm_i915_gem_object *obj) +{ + drm_clflush_sg(obj->mm.pages); + obj->cache_dirty = false; + + intel_fb_obj_flush(obj, ORIGIN_CPU); +} + +static void i915_clflush_work(struct work_struct *work) +{ + struct clflush *clflush = container_of(work, typeof(*clflush), work); + struct drm_i915_gem_object *obj = clflush->obj; + + if (!obj->cache_dirty) + goto out; + + if (i915_gem_object_pin_pages(obj)) { + DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); + goto out; + } + + __i915_do_clflush(obj); + + i915_gem_object_unpin_pages(obj); + +out: + i915_gem_object_put(obj); + + dma_fence_signal(&clflush->dma); + dma_fence_put(&clflush->dma); +} + +static int __i915_sw_fence_call +i915_clflush_notify(struct i915_sw_fence *fence, + enum i915_sw_fence_notify state) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), wait); + + switch (state) { + case FENCE_COMPLETE: + schedule_work(&clflush->work); + break; + + case FENCE_FREE: + dma_fence_put(&clflush->dma); + break; + } + + return NOTIFY_DONE; +} + +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + struct clflush *clflush; + + /* + * Stolen memory is always coherent with the GPU as it is explicitly + * marked as wc by the system, or the system is cache-coherent. + * Similarly, we only access struct pages through the CPU cache, so + * anything not backed by physical memory we consider to be always + * coherent and not need clflushing. + */ + if (!i915_gem_object_has_struct_page(obj)) + return; + + obj->cache_dirty = true; + + /* If the GPU is snooping the contents of the CPU cache, + * we do not need to manually clear the CPU cache lines. However, + * the caches are only snooped when the render cache is + * flushed/invalidated. As we always have to emit invalidations + * and flushes when moving into and out of the RENDER domain, correct + * snooping behaviour occurs naturally as the result of our domain + * tracking. + */ + if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj)) + return; + + trace_i915_gem_object_clflush(obj); + + clflush = NULL; + if (!(flags & I915_CLFLUSH_SYNC)) + clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + if (clflush) { + dma_fence_init(&clflush->dma, + &i915_clflush_ops, + &clflush_lock, + clflush_context, + 0); + i915_sw_fence_init(&clflush->wait, i915_clflush_notify); + + clflush->obj = i915_gem_object_get(obj); + INIT_WORK(&clflush->work, i915_clflush_work); + + dma_fence_get(&clflush->dma); + + i915_sw_fence_await_reservation(&clflush->wait, + obj->resv, NULL, + false, I915_FENCE_TIMEOUT, + GFP_KERNEL); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &clflush->dma); + reservation_object_unlock(obj->resv); + + i915_sw_fence_commit(&clflush->wait); + } else if (obj->mm.pages) { + __i915_do_clflush(obj); + } else { + GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); + } +} + +void i915_gem_clflush_init(struct drm_i915_private *i915) +{ + clflush_context = dma_fence_context_alloc(1); +} diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h new file mode 100644 index 000000000000..b62d61a2d15f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_clflush.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_GEM_CLFLUSH_H__ +#define __I915_GEM_CLFLUSH_H__ + +struct drm_i915_private; +struct drm_i915_gem_object; + +void i915_gem_clflush_init(struct drm_i915_private *i915); +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags); +#define I915_CLFLUSH_FORCE BIT(0) +#define I915_CLFLUSH_SYNC BIT(1) + +#endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 17f90c618208..baceca14f5e0 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -92,21 +92,6 @@ #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -/* This is a HW constraint. The value below is the largest known requirement - * I've seen in a spec to date, and that was a workaround for a non-shipping - * part. It should be safe to decrease this, but it's more future proof as is. - */ -#define GEN6_CONTEXT_ALIGN (64<<10) -#define GEN7_CONTEXT_ALIGN I915_GTT_MIN_ALIGNMENT - -static size_t get_context_alignment(struct drm_i915_private *dev_priv) -{ - if (IS_GEN6(dev_priv)) - return GEN6_CONTEXT_ALIGN; - - return GEN7_CONTEXT_ALIGN; -} - static int get_context_size(struct drm_i915_private *dev_priv) { int ret; @@ -236,6 +221,30 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) return 0; } +static u32 default_desc_template(const struct drm_i915_private *i915, + const struct i915_hw_ppgtt *ppgtt) +{ + u32 address_mode; + u32 desc; + + desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; + + address_mode = INTEL_LEGACY_32B_CONTEXT; + if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) + address_mode = INTEL_LEGACY_64B_CONTEXT; + desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; + + if (IS_GEN8(i915)) + desc |= GEN8_CTX_L3LLC_COHERENT; + + /* TODO: WaDisableLiteRestore when we start using semaphore + * signalling between Command Streamers + * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; + */ + + return desc; +} + static struct i915_gem_context * __create_hw_context(struct drm_i915_private *dev_priv, struct drm_i915_file_private *file_priv) @@ -257,8 +266,6 @@ __create_hw_context(struct drm_i915_private *dev_priv, list_add_tail(&ctx->link, &dev_priv->context_list); ctx->i915 = dev_priv; - ctx->ggtt_alignment = get_context_alignment(dev_priv); - if (dev_priv->hw_context_size) { struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -309,8 +316,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, i915_gem_context_set_bannable(ctx); ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) << - GEN8_CTX_ADDRESSING_MODE_SHIFT; + ctx->desc_template = + default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt); ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier); /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not @@ -332,6 +339,13 @@ err_out: return ERR_PTR(ret); } +static void __destroy_hw_context(struct i915_gem_context *ctx, + struct drm_i915_file_private *file_priv) +{ + idr_remove(&file_priv->context_idr, ctx->user_handle); + context_close(ctx); +} + /** * The default context needs to exist per ring that uses contexts. It stores the * context state of the GPU for applications that don't utilize HW contexts, as @@ -356,12 +370,12 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); - idr_remove(&file_priv->context_idr, ctx->user_handle); - context_close(ctx); + __destroy_hw_context(ctx, file_priv); return ERR_CAST(ppgtt); } ctx->ppgtt = ppgtt; + ctx->desc_template = default_desc_template(dev_priv, ppgtt); } trace_i915_context_create(ctx); @@ -400,7 +414,8 @@ i915_gem_context_create_gvt(struct drm_device *dev) i915_gem_context_set_closed(ctx); /* not user accessible */ i915_gem_context_clear_bannable(ctx); i915_gem_context_set_force_single_submission(ctx); - ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ + if (!i915.enable_guc_submission) + ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); out: @@ -451,6 +466,11 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) return PTR_ERR(ctx); } + /* For easy recognisablity, we want the kernel context to be 0 and then + * all user contexts will have non-zero hw_id. + */ + GEM_BUG_ON(ctx->hw_id); + i915_gem_context_clear_bannable(ctx); ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ dev_priv->kernel_context = ctx; @@ -560,27 +580,15 @@ static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { struct drm_i915_private *dev_priv = req->i915; - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; enum intel_engine_id id; - u32 flags = hw_flags | MI_MM_SPACE_GTT; + u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ i915.semaphores ? INTEL_INFO(dev_priv)->num_rings - 1 : 0; - int len, ret; - - /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB - * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value - * explicitly, so we rely on the value at ring init, stored in - * itlb_before_ctx_switch. - */ - if (IS_GEN6(dev_priv)) { - ret = engine->emit_flush(req, EMIT_INVALIDATE); - if (ret) - return ret; - } + int len; /* These flags are for resource streamer on HSW+ */ if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) @@ -593,99 +601,92 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) if (INTEL_GEN(dev_priv) >= 7) len += 2 + (num_rings ? 4*num_rings + 6 : 0); - ret = intel_ring_begin(req, len); - if (ret) - return ret; + cs = intel_ring_begin(req, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (INTEL_GEN(dev_priv) >= 7) { - intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; if (num_rings) { struct intel_engine_cs *signaller; - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(num_rings)); + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); for_each_engine(signaller, dev_priv, id) { if (signaller == engine) continue; - intel_ring_emit_reg(ring, - RING_PSMI_CTL(signaller->mmio_base)); - intel_ring_emit(ring, - _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + *cs++ = i915_mmio_reg_offset( + RING_PSMI_CTL(signaller->mmio_base)); + *cs++ = _MASKED_BIT_ENABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); } } } - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_SET_CONTEXT); - intel_ring_emit(ring, - i915_ggtt_offset(req->ctx->engine[RCS].state) | flags); + *cs++ = MI_NOOP; + *cs++ = MI_SET_CONTEXT; + *cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv */ - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; if (INTEL_GEN(dev_priv) >= 7) { if (num_rings) { struct intel_engine_cs *signaller; i915_reg_t last_reg = {}; /* keep gcc quiet */ - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(num_rings)); + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); for_each_engine(signaller, dev_priv, id) { if (signaller == engine) continue; last_reg = RING_PSMI_CTL(signaller->mmio_base); - intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, - _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = _MASKED_BIT_DISABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); } /* Insert a delay before the next switch! */ - intel_ring_emit(ring, - MI_STORE_REGISTER_MEM | - MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, - i915_ggtt_offset(engine->scratch)); - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = i915_ggtt_offset(engine->scratch); + *cs++ = MI_NOOP; } - intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); - return ret; + return 0; } static int remap_l3(struct drm_i915_gem_request *req, int slice) { - u32 *remap_info = req->i915->l3_parity.remap_info[slice]; - struct intel_ring *ring = req->ring; - int i, ret; + u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice]; + int i; if (!remap_info) return 0; - ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* * Note: We do not worry about the concurrent register cacheline hang * here because no other code should access these registers other than * at initialization time. */ - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { - intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); - intel_ring_emit(ring, remap_info[i]); + *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); + *cs++ = remap_info[i]; } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1014,8 +1015,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, return PTR_ERR(ctx); } - idr_remove(&file_priv->context_idr, ctx->user_handle); - context_close(ctx); + __destroy_hw_context(ctx, file_priv); mutex_unlock(&dev->struct_mutex); DRM_DEBUG("HW context %d destroyed\n", args->ctx_id); @@ -1164,3 +1164,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_context.c" +#include "selftests/i915_gem_context.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 0ac750b90f3d..81268c9770a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -140,8 +140,6 @@ struct i915_gem_context { */ int priority; - /** ggtt_alignment: alignment restriction for context objects */ - u32 ggtt_alignment; /** ggtt_offset_bias: placement restriction for context objects */ u32 ggtt_offset_bias; diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 29bb8011dbc4..11898cd97596 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -307,3 +307,8 @@ fail_detach: return ERR_PTR(ret); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_dmabuf.c" +#include "selftests/i915_gem_dmabuf.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index c181b1bb3d2c..a0de5734f7d0 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -258,6 +258,9 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, int ret = 0; lockdep_assert_held(&vm->i915->drm.struct_mutex); + GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); + trace_i915_gem_evict_node(vm, target, flags); /* Retire before we search the active list. Although we have @@ -271,11 +274,13 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, check_color = vm->mm.color_adjust; if (check_color) { /* Expand search to cover neighbouring guard pages (or lack!) */ - if (start > vm->start) + if (start) start -= I915_GTT_PAGE_SIZE; - if (end < vm->start + vm->total) - end += I915_GTT_PAGE_SIZE; + + /* Always look at the page afterwards to avoid the end-of-GTT */ + end += I915_GTT_PAGE_SIZE; } + GEM_BUG_ON(start >= end); drm_mm_for_each_node_in_range(node, &vm->mm, start, end) { /* If we find any non-objects (!vma), we cannot evict them */ @@ -284,6 +289,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, break; } + GEM_BUG_ON(!node->allocated); vma = container_of(node, typeof(*vma), node); /* If we are using coloring to insert guard pages between @@ -387,3 +393,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_evict.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d02cfaefe1c8..dd7181ed5eca 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -28,12 +28,14 @@ #include <linux/dma_remapping.h> #include <linux/reservation.h> +#include <linux/sync_file.h> #include <linux/uaccess.h> #include <drm/drmP.h> #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" @@ -1110,13 +1112,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; + if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) + continue; + + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) { + i915_gem_clflush_object(obj, 0); + obj->base.write_domain = 0; + } + ret = i915_gem_request_await_object (req, obj, obj->base.pending_write_domain); if (ret) return ret; - - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - i915_gem_clflush_object(obj, false); } /* Unconditionally flush any chipset caches (for streaming writes). */ @@ -1297,12 +1304,12 @@ static void eb_export_fence(struct drm_i915_gem_object *obj, * handle an error right now. Worst case should be missed * synchronisation leading to rendering corruption. */ - ww_mutex_lock(&resv->lock, NULL); + reservation_object_lock(resv, NULL); if (flags & EXEC_OBJECT_WRITE) reservation_object_add_excl_fence(resv, &req->fence); else if (reservation_object_reserve_shared(resv) == 0) reservation_object_add_shared_fence(resv, &req->fence); - ww_mutex_unlock(&resv->lock); + reservation_object_unlock(resv); } static void @@ -1313,8 +1320,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - u32 old_read = obj->base.read_domains; - u32 old_write = obj->base.write_domain; obj->base.write_domain = obj->base.pending_write_domain; if (obj->base.write_domain) @@ -1325,32 +1330,31 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, i915_vma_move_to_active(vma, req, vma->exec_entry->flags); eb_export_fence(obj, req, vma->exec_entry->flags); - trace_i915_gem_object_change_domain(obj, old_read, old_write); } } static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; - int ret, i; + u32 *cs; + int i; if (!IS_GEN7(req->i915) || req->engine->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } - ret = intel_ring_begin(req, 4 * 3); - if (ret) - return ret; + cs = intel_ring_begin(req, 4 * 3); + if (IS_ERR(cs)) + return PTR_ERR(cs); for (i = 0; i < 4; i++) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); - intel_ring_emit(ring, 0); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); + *cs++ = 0; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -1403,15 +1407,20 @@ out: return vma; } +static void +add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file) +{ + req->file_priv = file->driver_priv; + list_add_tail(&req->client_link, &req->file_priv->mm.request_list); +} + static int execbuf_submit(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas) { - struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; - int instp_mode; - u32 instp_mask; int ret; ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); @@ -1422,56 +1431,11 @@ execbuf_submit(struct i915_execbuffer_params *params, if (ret) return ret; - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; - instp_mask = I915_EXEC_CONSTANTS_MASK; - switch (instp_mode) { - case I915_EXEC_CONSTANTS_REL_GENERAL: - case I915_EXEC_CONSTANTS_ABSOLUTE: - case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && params->engine->id != RCS) { - DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); - return -EINVAL; - } - - if (instp_mode != dev_priv->relative_constants_mode) { - if (INTEL_INFO(dev_priv)->gen < 4) { - DRM_DEBUG("no rel constants on pre-gen4\n"); - return -EINVAL; - } - - if (INTEL_INFO(dev_priv)->gen > 5 && - instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { - DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); - return -EINVAL; - } - - /* The HW changed the meaning on this bit on gen6 */ - if (INTEL_INFO(dev_priv)->gen >= 6) - instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; - } - break; - default: - DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); + if (args->flags & I915_EXEC_CONSTANTS_MASK) { + DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n"); return -EINVAL; } - if (params->engine->id == RCS && - instp_mode != dev_priv->relative_constants_mode) { - struct intel_ring *ring = params->request->ring; - - ret = intel_ring_begin(params->request, 4); - if (ret) - return ret; - - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, INSTPM); - intel_ring_emit(ring, instp_mask << 16 | instp_mode); - intel_ring_advance(ring); - - dev_priv->relative_constants_mode = instp_mode; - } - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { ret = i915_reset_gen7_sol_offsets(params->request); if (ret) @@ -1491,8 +1455,6 @@ execbuf_submit(struct i915_execbuffer_params *params, if (ret) return ret; - trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); - i915_gem_execbuffer_move_to_active(vmas, params->request); return 0; @@ -1591,6 +1553,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_execbuffer_params *params = ¶ms_master; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 dispatch_flags; + struct dma_fence *in_fence = NULL; + struct sync_file *out_fence = NULL; + int out_fence_fd = -1; int ret; bool need_relocs; @@ -1634,6 +1599,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, dispatch_flags |= I915_DISPATCH_RS; } + if (args->flags & I915_EXEC_FENCE_IN) { + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); + if (!in_fence) + return -EINVAL; + } + + if (args->flags & I915_EXEC_FENCE_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + ret = out_fence_fd; + goto err_in_fence; + } + } + /* Take a local wakeref for preparing to dispatch the execbuf as * we expect to access the hardware fairly frequently in the * process. Upon first dispatch, we acquire another prolonged @@ -1778,6 +1757,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err_batch_unpin; } + if (in_fence) { + ret = i915_gem_request_await_dma_fence(params->request, + in_fence); + if (ret < 0) + goto err_request; + } + + if (out_fence_fd != -1) { + out_fence = sync_file_create(¶ms->request->fence); + if (!out_fence) { + ret = -ENOMEM; + goto err_request; + } + } + /* Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this * request is retired will the the batch_obj be moved onto the @@ -1786,10 +1780,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, */ params->request->batch = params->batch; - ret = i915_gem_request_add_to_client(params->request, file); - if (ret) - goto err_request; - /* * Save assorted stuff away to pass through to *_submission(). * NB: This data should be 'persistent' and not local as it will @@ -1802,9 +1792,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->dispatch_flags = dispatch_flags; params->ctx = ctx; + trace_i915_gem_request_queue(params->request, dispatch_flags); + ret = execbuf_submit(params, args, &eb->vmas); err_request: __i915_add_request(params->request, ret == 0); + add_to_client(params->request, file); + + if (out_fence) { + if (ret == 0) { + fd_install(out_fence_fd, out_fence->file); + args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ + args->rsvd2 |= (u64)out_fence_fd << 32; + out_fence_fd = -1; + } else { + fput(out_fence->file); + } + } err_batch_unpin: /* @@ -1826,6 +1830,10 @@ pre_mutex_err: /* intel_gpu_busy should also get a ref, so it will free when the device * is really idle. */ intel_runtime_pm_put(dev_priv); + if (out_fence_fd != -1) + put_unused_fd(out_fence_fd); +err_in_fence: + dma_fence_put(in_fence); return ret; } @@ -1933,11 +1941,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EINVAL; } - if (args->rsvd2 != 0) { - DRM_DEBUG("dirty rvsd2 field\n"); - return -EINVAL; - } - exec2_list = drm_malloc_gfp(args->buffer_count, sizeof(*exec2_list), GFP_TEMPORARY); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2801a4d56324..cee9c4fec52a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -23,6 +23,9 @@ * */ +#include <linux/slab.h> /* fault-inject.h is not standalone! */ + +#include <linux/fault-inject.h> #include <linux/log2.h> #include <linux/random.h> #include <linux/seq_file.h> @@ -187,11 +190,17 @@ static int ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 unused) { - u32 pte_flags = 0; + u32 pte_flags; + int ret; + + ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); + if (ret) + return ret; vma->pages = vma->obj->mm.pages; /* Currently applicable only to VLV */ + pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -203,9 +212,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, static void ppgtt_unbind_vma(struct i915_vma *vma) { - vma->vm->clear_range(vma->vm, - vma->node.start, - vma->size); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } static gen8_pte_t gen8_pte_encode(dma_addr_t addr, @@ -340,268 +347,229 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr, return pte; } -static int __setup_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, gfp_t flags) +static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) { - struct device *kdev = &dev_priv->drm.pdev->dev; + struct page *page; - p->page = alloc_page(flags); - if (!p->page) - return -ENOMEM; + if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) + i915_gem_shrink_all(vm->i915); - p->daddr = dma_map_page(kdev, - p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (vm->free_pages.nr) + return vm->free_pages.pages[--vm->free_pages.nr]; - if (dma_mapping_error(kdev, p->daddr)) { - __free_page(p->page); - return -EINVAL; - } + page = alloc_page(gfp); + if (!page) + return NULL; - return 0; + if (vm->pt_kmap_wc) + set_pages_array_wc(&page, 1); + + return page; } -static int setup_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p) +static void vm_free_pages_release(struct i915_address_space *vm) { - return __setup_page_dma(dev_priv, p, I915_GFP_DMA); + GEM_BUG_ON(!pagevec_count(&vm->free_pages)); + + if (vm->pt_kmap_wc) + set_pages_array_wb(vm->free_pages.pages, + pagevec_count(&vm->free_pages)); + + __pagevec_release(&vm->free_pages); } -static void cleanup_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p) +static void vm_free_page(struct i915_address_space *vm, struct page *page) { - struct pci_dev *pdev = dev_priv->drm.pdev; + if (!pagevec_add(&vm->free_pages, page)) + vm_free_pages_release(vm); +} - if (WARN_ON(!p->page)) - return; +static int __setup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + gfp_t gfp) +{ + p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY); + if (unlikely(!p->page)) + return -ENOMEM; + + p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { + vm_free_page(vm, p->page); + return -ENOMEM; + } - dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - __free_page(p->page); - memset(p, 0, sizeof(*p)); + return 0; } -static void *kmap_page_dma(struct i915_page_dma *p) +static int setup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p) { - return kmap_atomic(p->page); + return __setup_page_dma(vm, p, I915_GFP_DMA); } -/* We use the flushing unmap only with ppgtt structures: - * page directories, page tables and scratch pages. - */ -static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr) +static void cleanup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p) { - /* There are only few exceptions for gen >=6. chv and bxt. - * And we are not sure about the latter so play safe for now. - */ - if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) - drm_clflush_virt_range(vaddr, PAGE_SIZE); - - kunmap_atomic(vaddr); + dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + vm_free_page(vm, p->page); } -#define kmap_px(px) kmap_page_dma(px_base(px)) -#define kunmap_px(ppgtt, vaddr) \ - kunmap_page_dma((ppgtt)->base.i915, (vaddr)) +#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) -#define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px)) -#define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px)) -#define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v)) -#define fill32_px(dev_priv, px, v) \ - fill_page_dma_32((dev_priv), px_base(px), (v)) +#define setup_px(vm, px) setup_page_dma((vm), px_base(px)) +#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px)) +#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v)) +#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v)) -static void fill_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, const uint64_t val) +static void fill_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + const u64 val) { + u64 * const vaddr = kmap_atomic(p->page); int i; - uint64_t * const vaddr = kmap_page_dma(p); for (i = 0; i < 512; i++) vaddr[i] = val; - kunmap_page_dma(dev_priv, vaddr); + kunmap_atomic(vaddr); } -static void fill_page_dma_32(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, const uint32_t val32) +static void fill_page_dma_32(struct i915_address_space *vm, + struct i915_page_dma *p, + const u32 v) { - uint64_t v = val32; - - v = v << 32 | val32; - - fill_page_dma(dev_priv, p, v); + fill_page_dma(vm, p, (u64)v << 32 | v); } static int -setup_scratch_page(struct drm_i915_private *dev_priv, - struct i915_page_dma *scratch, - gfp_t gfp) +setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) { - return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO); + return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO); } -static void cleanup_scratch_page(struct drm_i915_private *dev_priv, - struct i915_page_dma *scratch) +static void cleanup_scratch_page(struct i915_address_space *vm) { - cleanup_page_dma(dev_priv, scratch); + cleanup_page_dma(vm, &vm->scratch_page); } -static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv) +static struct i915_page_table *alloc_pt(struct i915_address_space *vm) { struct i915_page_table *pt; - const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES; - int ret = -ENOMEM; - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - if (!pt) + pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pt)) return ERR_PTR(-ENOMEM); - pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), - GFP_KERNEL); - - if (!pt->used_ptes) - goto fail_bitmap; - - ret = setup_px(dev_priv, pt); - if (ret) - goto fail_page_m; + if (unlikely(setup_px(vm, pt))) { + kfree(pt); + return ERR_PTR(-ENOMEM); + } + pt->used_ptes = 0; return pt; - -fail_page_m: - kfree(pt->used_ptes); -fail_bitmap: - kfree(pt); - - return ERR_PTR(ret); } -static void free_pt(struct drm_i915_private *dev_priv, - struct i915_page_table *pt) +static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - cleanup_px(dev_priv, pt); - kfree(pt->used_ptes); + cleanup_px(vm, pt); kfree(pt); } static void gen8_initialize_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - gen8_pte_t scratch_pte; - - scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); - - fill_px(vm->i915, pt, scratch_pte); + fill_px(vm, pt, + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC)); } static void gen6_initialize_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - gen6_pte_t scratch_pte; - - WARN_ON(vm->scratch_page.daddr == 0); - - scratch_pte = vm->pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC, 0); - - fill32_px(vm->i915, pt, scratch_pte); + fill32_px(vm, pt, + vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0)); } -static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv) +static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) { struct i915_page_directory *pd; - int ret = -ENOMEM; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) + pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), - sizeof(*pd->used_pdes), GFP_KERNEL); - if (!pd->used_pdes) - goto fail_bitmap; - - ret = setup_px(dev_priv, pd); - if (ret) - goto fail_page_m; + if (unlikely(setup_px(vm, pd))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + pd->used_pdes = 0; return pd; - -fail_page_m: - kfree(pd->used_pdes); -fail_bitmap: - kfree(pd); - - return ERR_PTR(ret); } -static void free_pd(struct drm_i915_private *dev_priv, +static void free_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - if (px_page(pd)) { - cleanup_px(dev_priv, pd); - kfree(pd->used_pdes); - kfree(pd); - } + cleanup_px(vm, pd); + kfree(pd); } static void gen8_initialize_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - gen8_pde_t scratch_pde; - - scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); + unsigned int i; - fill_px(vm->i915, pd, scratch_pde); + fill_px(vm, pd, + gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC)); + for (i = 0; i < I915_PDES; i++) + pd->page_table[i] = vm->scratch_pt; } -static int __pdp_init(struct drm_i915_private *dev_priv, +static int __pdp_init(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { - size_t pdpes = I915_PDPES_PER_PDP(dev_priv); + const unsigned int pdpes = i915_pdpes_per_pdp(vm); + unsigned int i; - pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), - sizeof(unsigned long), - GFP_KERNEL); - if (!pdp->used_pdpes) + pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), + GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pdp->page_directory)) return -ENOMEM; - pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), - GFP_KERNEL); - if (!pdp->page_directory) { - kfree(pdp->used_pdpes); - /* the PDP might be the statically allocated top level. Keep it - * as clean as possible */ - pdp->used_pdpes = NULL; - return -ENOMEM; - } + for (i = 0; i < pdpes; i++) + pdp->page_directory[i] = vm->scratch_pd; return 0; } static void __pdp_fini(struct i915_page_directory_pointer *pdp) { - kfree(pdp->used_pdpes); kfree(pdp->page_directory); pdp->page_directory = NULL; } -static struct -i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv) +static inline bool use_4lvl(const struct i915_address_space *vm) +{ + return i915_vm_is_48bit(vm); +} + +static struct i915_page_directory_pointer * +alloc_pdp(struct i915_address_space *vm) { struct i915_page_directory_pointer *pdp; int ret = -ENOMEM; - WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv)); + WARN_ON(!use_4lvl(vm)); pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); if (!pdp) return ERR_PTR(-ENOMEM); - ret = __pdp_init(dev_priv, pdp); + ret = __pdp_init(vm, pdp); if (ret) goto fail_bitmap; - ret = setup_px(dev_priv, pdp); + ret = setup_px(vm, pdp); if (ret) goto fail_page_m; @@ -615,14 +583,16 @@ fail_bitmap: return ERR_PTR(ret); } -static void free_pdp(struct drm_i915_private *dev_priv, +static void free_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { __pdp_fini(pdp); - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - cleanup_px(dev_priv, pdp); - kfree(pdp); - } + + if (!use_4lvl(vm)) + return; + + cleanup_px(vm, pdp); + kfree(pdp); } static void gen8_initialize_pdp(struct i915_address_space *vm, @@ -632,47 +602,18 @@ static void gen8_initialize_pdp(struct i915_address_space *vm, scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); - fill_px(vm->i915, pdp, scratch_pdpe); + fill_px(vm, pdp, scratch_pdpe); } static void gen8_initialize_pml4(struct i915_address_space *vm, struct i915_pml4 *pml4) { - gen8_ppgtt_pml4e_t scratch_pml4e; - - scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), - I915_CACHE_LLC); - - fill_px(vm->i915, pml4, scratch_pml4e); -} - -static void -gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt, - struct i915_page_directory_pointer *pdp, - struct i915_page_directory *pd, - int index) -{ - gen8_ppgtt_pdpe_t *page_directorypo; + unsigned int i; - if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))) - return; - - page_directorypo = kmap_px(pdp); - page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); - kunmap_px(ppgtt, page_directorypo); -} - -static void -gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt, - struct i915_pml4 *pml4, - struct i915_page_directory_pointer *pdp, - int index) -{ - gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); - - WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))); - pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); - kunmap_px(ppgtt, pagemap); + fill_px(vm, pml4, + gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC)); + for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) + pml4->pdps[i] = vm->scratch_pdp; } /* Broadwell Page Directory Pointer Descriptors */ @@ -680,33 +621,32 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, dma_addr_t addr) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - int ret; + u32 *cs; BUG_ON(entry >= 4); - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); - intel_ring_emit(ring, upper_32_bits(addr)); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); - intel_ring_emit(ring, lower_32_bits(addr)); - intel_ring_advance(ring); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry)); + *cs++ = upper_32_bits(addr); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry)); + *cs++ = lower_32_bits(addr); + intel_ring_advance(req, cs); return 0; } -static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) +static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_request *req) { int i, ret; - for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { + for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); ret = gen8_write_pdp(req, i, pd_daddr); @@ -717,8 +657,8 @@ static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, return 0; } -static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) +static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_request *req) { return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); } @@ -738,70 +678,80 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) */ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, struct i915_page_table *pt, - uint64_t start, - uint64_t length) + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned int num_entries = gen8_pte_count(start, length); unsigned int pte = gen8_pte_index(start); unsigned int pte_end = pte + num_entries; - gen8_pte_t *pt_vaddr; - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_t *vaddr; - if (WARN_ON(!px_page(pt))) - return false; + GEM_BUG_ON(num_entries > pt->used_ptes); - GEM_BUG_ON(pte_end > GEN8_PTES); + pt->used_ptes -= num_entries; + if (!pt->used_ptes) + return true; - bitmap_clear(pt->used_ptes, pte, num_entries); - if (USES_FULL_PPGTT(vm->i915)) { - if (bitmap_empty(pt->used_ptes, GEN8_PTES)) - return true; - } + vaddr = kmap_atomic_px(pt); + while (pte < pte_end) + vaddr[pte++] = scratch_pte; + kunmap_atomic(vaddr); - pt_vaddr = kmap_px(pt); + return false; +} - while (pte < pte_end) - pt_vaddr[pte++] = scratch_pte; +static void gen8_ppgtt_set_pde(struct i915_address_space *vm, + struct i915_page_directory *pd, + struct i915_page_table *pt, + unsigned int pde) +{ + gen8_pde_t *vaddr; - kunmap_px(ppgtt, pt_vaddr); + pd->page_table[pde] = pt; - return false; + vaddr = kmap_atomic_px(pd); + vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC); + kunmap_atomic(vaddr); } -/* Removes entries from a single page dir, releasing it if it's empty. - * Caller can use the return value to update higher-level entries - */ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, struct i915_page_directory *pd, - uint64_t start, - uint64_t length) + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_table *pt; - uint64_t pde; - gen8_pde_t *pde_vaddr; - gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), - I915_CACHE_LLC); + u32 pde; gen8_for_each_pde(pt, pd, start, length, pde) { - if (WARN_ON(!pd->page_table[pde])) - break; - - if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { - __clear_bit(pde, pd->used_pdes); - pde_vaddr = kmap_px(pd); - pde_vaddr[pde] = scratch_pde; - kunmap_px(ppgtt, pde_vaddr); - free_pt(vm->i915, pt); - } + GEM_BUG_ON(pt == vm->scratch_pt); + + if (!gen8_ppgtt_clear_pt(vm, pt, start, length)) + continue; + + gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); + GEM_BUG_ON(!pd->used_pdes); + pd->used_pdes--; + + free_pt(vm, pt); } - if (bitmap_empty(pd->used_pdes, I915_PDES)) - return true; + return !pd->used_pdes; +} - return false; +static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + struct i915_page_directory *pd, + unsigned int pdpe) +{ + gen8_ppgtt_pdpe_t *vaddr; + + pdp->page_directory[pdpe] = pd; + if (!use_4lvl(vm)) + return; + + vaddr = kmap_atomic_px(pdp); + vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); + kunmap_atomic(vaddr); } /* Removes entries from a single page dir pointer, releasing it if it's empty. @@ -809,138 +759,189 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, */ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length) + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd; - uint64_t pdpe; + unsigned int pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (WARN_ON(!pdp->page_directory[pdpe])) - break; + GEM_BUG_ON(pd == vm->scratch_pd); - if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { - __clear_bit(pdpe, pdp->used_pdpes); - gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe); - free_pd(vm->i915, pd); - } + if (!gen8_ppgtt_clear_pd(vm, pd, start, length)) + continue; + + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + GEM_BUG_ON(!pdp->used_pdpes); + pdp->used_pdpes--; + + free_pd(vm, pd); } - mark_tlbs_dirty(ppgtt); + return !pdp->used_pdpes; +} - if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) - return true; +static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, + u64 start, u64 length) +{ + gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length); +} - return false; +static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, + struct i915_page_directory_pointer *pdp, + unsigned int pml4e) +{ + gen8_ppgtt_pml4e_t *vaddr; + + pml4->pdps[pml4e] = pdp; + + vaddr = kmap_atomic_px(pml4); + vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); + kunmap_atomic(vaddr); } /* Removes entries from a single pml4. * This is the top-level structure in 4-level page tables used on gen8+. * Empty entries are always scratch pml4e. */ -static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) +static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, + u64 start, u64 length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; - uint64_t pml4e; + unsigned int pml4e; - GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); + GEM_BUG_ON(!use_4lvl(vm)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (WARN_ON(!pml4->pdps[pml4e])) - break; + GEM_BUG_ON(pdp == vm->scratch_pdp); - if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { - __clear_bit(pml4e, pml4->used_pml4es); - gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e); - free_pdp(vm->i915, pdp); - } + if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length)) + continue; + + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + + free_pdp(vm, pdp); } } -static void gen8_ppgtt_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) -{ - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); +struct sgt_dma { + struct scatterlist *sg; + dma_addr_t dma, max; +}; - if (USES_FULL_48BIT_PPGTT(vm->i915)) - gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length); - else - gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); +struct gen8_insert_pte { + u16 pml4e; + u16 pdpe; + u16 pde; + u16 pte; +}; + +static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) +{ + return (struct gen8_insert_pte) { + gen8_pml4e_index(start), + gen8_pdpe_index(start), + gen8_pde_index(start), + gen8_pte_index(start), + }; } -static void -gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, +static __always_inline bool +gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, - struct sg_page_iter *sg_iter, - uint64_t start, + struct sgt_dma *iter, + struct gen8_insert_pte *idx, enum i915_cache_level cache_level) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - gen8_pte_t *pt_vaddr; - unsigned pdpe = gen8_pdpe_index(start); - unsigned pde = gen8_pde_index(start); - unsigned pte = gen8_pte_index(start); - - pt_vaddr = NULL; - - while (__sg_page_iter_next(sg_iter)) { - if (pt_vaddr == NULL) { - struct i915_page_directory *pd = pdp->page_directory[pdpe]; - struct i915_page_table *pt = pd->page_table[pde]; - pt_vaddr = kmap_px(pt); + struct i915_page_directory *pd; + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + gen8_pte_t *vaddr; + bool ret; + + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); + pd = pdp->page_directory[idx->pdpe]; + vaddr = kmap_atomic_px(pd->page_table[idx->pde]); + do { + vaddr[idx->pte] = pte_encode | iter->dma; + + iter->dma += PAGE_SIZE; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) { + ret = false; + break; + } + + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + iter->sg->length; } - pt_vaddr[pte] = - gen8_pte_encode(sg_page_iter_dma_address(sg_iter), - cache_level); - if (++pte == GEN8_PTES) { - kunmap_px(ppgtt, pt_vaddr); - pt_vaddr = NULL; - if (++pde == I915_PDES) { - if (++pdpe == I915_PDPES_PER_PDP(vm->i915)) + if (++idx->pte == GEN8_PTES) { + idx->pte = 0; + + if (++idx->pde == I915_PDES) { + idx->pde = 0; + + /* Limited by sg length for 3lvl */ + if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { + idx->pdpe = 0; + ret = true; break; - pde = 0; + } + + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); + pd = pdp->page_directory[idx->pdpe]; } - pte = 0; + + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(pd->page_table[idx->pde]); } - } + } while (1); + kunmap_atomic(vaddr); - if (pt_vaddr) - kunmap_px(ppgtt, pt_vaddr); + return ret; } -static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, - struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, - u32 unused) +static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, + struct sg_table *pages, + u64 start, + enum i915_cache_level cache_level, + u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sg_page_iter sg_iter; + struct sgt_dma iter = { + .sg = pages->sgl, + .dma = sg_dma_address(iter.sg), + .max = iter.dma + iter.sg->length, + }; + struct gen8_insert_pte idx = gen8_insert_pte(start); - __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); + gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, + cache_level); +} - if (!USES_FULL_48BIT_PPGTT(vm->i915)) { - gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, - cache_level); - } else { - struct i915_page_directory_pointer *pdp; - uint64_t pml4e; - uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; +static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, + struct sg_table *pages, + u64 start, + enum i915_cache_level cache_level, + u32 unused) +{ + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct sgt_dma iter = { + .sg = pages->sgl, + .dma = sg_dma_address(iter.sg), + .max = iter.dma + iter.sg->length, + }; + struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; + struct gen8_insert_pte idx = gen8_insert_pte(start); - gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { - gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, - start, cache_level); - } - } + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, + &idx, cache_level)) + GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); } -static void gen8_free_page_tables(struct drm_i915_private *dev_priv, +static void gen8_free_page_tables(struct i915_address_space *vm, struct i915_page_directory *pd) { int i; @@ -948,38 +949,34 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv, if (!px_page(pd)) return; - for_each_set_bit(i, pd->used_pdes, I915_PDES) { - if (WARN_ON(!pd->page_table[i])) - continue; - - free_pt(dev_priv, pd->page_table[i]); - pd->page_table[i] = NULL; + for (i = 0; i < I915_PDES; i++) { + if (pd->page_table[i] != vm->scratch_pt) + free_pt(vm, pd->page_table[i]); } } static int gen8_init_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; int ret; - ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); + ret = setup_scratch_page(vm, I915_GFP_DMA); if (ret) return ret; - vm->scratch_pt = alloc_pt(dev_priv); + vm->scratch_pt = alloc_pt(vm); if (IS_ERR(vm->scratch_pt)) { ret = PTR_ERR(vm->scratch_pt); goto free_scratch_page; } - vm->scratch_pd = alloc_pd(dev_priv); + vm->scratch_pd = alloc_pd(vm); if (IS_ERR(vm->scratch_pd)) { ret = PTR_ERR(vm->scratch_pd); goto free_pt; } - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - vm->scratch_pdp = alloc_pdp(dev_priv); + if (use_4lvl(vm)) { + vm->scratch_pdp = alloc_pdp(vm); if (IS_ERR(vm->scratch_pdp)) { ret = PTR_ERR(vm->scratch_pdp); goto free_pd; @@ -988,29 +985,30 @@ static int gen8_init_scratch(struct i915_address_space *vm) gen8_initialize_pt(vm, vm->scratch_pt); gen8_initialize_pd(vm, vm->scratch_pd); - if (USES_FULL_48BIT_PPGTT(dev_priv)) + if (use_4lvl(vm)) gen8_initialize_pdp(vm, vm->scratch_pdp); return 0; free_pd: - free_pd(dev_priv, vm->scratch_pd); + free_pd(vm, vm->scratch_pd); free_pt: - free_pt(dev_priv, vm->scratch_pt); + free_pt(vm, vm->scratch_pt); free_scratch_page: - cleanup_scratch_page(dev_priv, &vm->scratch_page); + cleanup_scratch_page(vm); return ret; } static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) { + struct i915_address_space *vm = &ppgtt->base; + struct drm_i915_private *dev_priv = vm->i915; enum vgt_g2v_type msg; - struct drm_i915_private *dev_priv = ppgtt->base.i915; int i; - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - u64 daddr = px_dma(&ppgtt->pml4); + if (use_4lvl(vm)) { + const u64 daddr = px_dma(&ppgtt->pml4); I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); @@ -1018,8 +1016,8 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); } else { - for (i = 0; i < GEN8_LEGACY_PDPES; i++) { - u64 daddr = i915_page_dir_dma_addr(ppgtt, i); + for (i = 0; i < GEN8_3LVL_PDPES; i++) { + const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); @@ -1036,44 +1034,42 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) static void gen8_free_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; - - if (USES_FULL_48BIT_PPGTT(dev_priv)) - free_pdp(dev_priv, vm->scratch_pdp); - free_pd(dev_priv, vm->scratch_pd); - free_pt(dev_priv, vm->scratch_pt); - cleanup_scratch_page(dev_priv, &vm->scratch_page); + if (use_4lvl(vm)) + free_pdp(vm, vm->scratch_pdp); + free_pd(vm, vm->scratch_pd); + free_pt(vm, vm->scratch_pt); + cleanup_scratch_page(vm); } -static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv, +static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { + const unsigned int pdpes = i915_pdpes_per_pdp(vm); int i; - for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) { - if (WARN_ON(!pdp->page_directory[i])) + for (i = 0; i < pdpes; i++) { + if (pdp->page_directory[i] == vm->scratch_pd) continue; - gen8_free_page_tables(dev_priv, pdp->page_directory[i]); - free_pd(dev_priv, pdp->page_directory[i]); + gen8_free_page_tables(vm, pdp->page_directory[i]); + free_pd(vm, pdp->page_directory[i]); } - free_pdp(dev_priv, pdp); + free_pdp(vm, pdp); } static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) { - struct drm_i915_private *dev_priv = ppgtt->base.i915; int i; - for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { - if (WARN_ON(!ppgtt->pml4.pdps[i])) + for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { + if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp) continue; - gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]); + gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]); } - cleanup_px(dev_priv, &ppgtt->pml4); + cleanup_px(&ppgtt->base, &ppgtt->pml4); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -1084,414 +1080,162 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) if (intel_vgpu_active(dev_priv)) gen8_ppgtt_notify_vgt(ppgtt, false); - if (!USES_FULL_48BIT_PPGTT(dev_priv)) - gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp); - else + if (use_4lvl(vm)) gen8_ppgtt_cleanup_4lvl(ppgtt); + else + gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp); gen8_free_scratch(vm); } -/** - * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. - * @vm: Master vm structure. - * @pd: Page directory for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pts: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page tables. Extremely similar to - * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by - * the page directory boundary (instead of the page directory pointer). That - * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is - * possible, and likely that the caller will need to use multiple calls of this - * function to achieve the appropriate allocation. - * - * Return: 0 if success; negative error code otherwise. - */ -static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, - struct i915_page_directory *pd, - uint64_t start, - uint64_t length, - unsigned long *new_pts) +static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, + struct i915_page_directory *pd, + u64 start, u64 length) { - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_table *pt; - uint32_t pde; + u64 from = start; + unsigned int pde; gen8_for_each_pde(pt, pd, start, length, pde) { - /* Don't reallocate page tables */ - if (test_bit(pde, pd->used_pdes)) { - /* Scratch is never allocated this way */ - WARN_ON(pt == vm->scratch_pt); - continue; - } - - pt = alloc_pt(dev_priv); - if (IS_ERR(pt)) - goto unwind_out; - - gen8_initialize_pt(vm, pt); - pd->page_table[pde] = pt; - __set_bit(pde, new_pts); - trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); - } - - return 0; - -unwind_out: - for_each_set_bit(pde, new_pts, I915_PDES) - free_pt(dev_priv, pd->page_table[pde]); - - return -ENOMEM; -} - -/** - * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. - * @vm: Master vm structure. - * @pdp: Page directory pointer for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pds: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page directories starting at the pde index of - * @start, and ending at the pde index @start + @length. This function will skip - * over already allocated page directories within the range, and only allocate - * new ones, setting the appropriate pointer within the pdp as well as the - * correct position in the bitmap @new_pds. - * - * The function will only allocate the pages within the range for a give page - * directory pointer. In other words, if @start + @length straddles a virtually - * addressed PDP boundary (512GB for 4k pages), there will be more allocations - * required by the caller, This is not currently possible, and the BUG in the - * code will prevent it. - * - * Return: 0 if success; negative error code otherwise. - */ -static int -gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length, - unsigned long *new_pds) -{ - struct drm_i915_private *dev_priv = vm->i915; - struct i915_page_directory *pd; - uint32_t pdpe; - uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); - - WARN_ON(!bitmap_empty(new_pds, pdpes)); - - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (test_bit(pdpe, pdp->used_pdpes)) - continue; - - pd = alloc_pd(dev_priv); - if (IS_ERR(pd)) - goto unwind_out; - - gen8_initialize_pd(vm, pd); - pdp->page_directory[pdpe] = pd; - __set_bit(pdpe, new_pds); - trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); - } + if (pt == vm->scratch_pt) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) + goto unwind; - return 0; - -unwind_out: - for_each_set_bit(pdpe, new_pds, pdpes) - free_pd(dev_priv, pdp->page_directory[pdpe]); - - return -ENOMEM; -} - -/** - * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. - * @vm: Master vm structure. - * @pml4: Page map level 4 for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pdps: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page directory pointers. Extremely similar to - * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). - * The main difference is here we are limited by the pml4 boundary (instead of - * the page directory pointer). - * - * Return: 0 if success; negative error code otherwise. - */ -static int -gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length, - unsigned long *new_pdps) -{ - struct drm_i915_private *dev_priv = vm->i915; - struct i915_page_directory_pointer *pdp; - uint32_t pml4e; - - WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); - - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (!test_bit(pml4e, pml4->used_pml4es)) { - pdp = alloc_pdp(dev_priv); - if (IS_ERR(pdp)) - goto unwind_out; + gen8_initialize_pt(vm, pt); - gen8_initialize_pdp(vm, pdp); - pml4->pdps[pml4e] = pdp; - __set_bit(pml4e, new_pdps); - trace_i915_page_directory_pointer_entry_alloc(vm, - pml4e, - start, - GEN8_PML4E_SHIFT); + gen8_ppgtt_set_pde(vm, pd, pt, pde); + pd->used_pdes++; + GEM_BUG_ON(pd->used_pdes > I915_PDES); } - } - - return 0; - -unwind_out: - for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - free_pdp(dev_priv, pml4->pdps[pml4e]); - - return -ENOMEM; -} - -static void -free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) -{ - kfree(new_pts); - kfree(new_pds); -} - -/* Fills in the page directory bitmap, and the array of page tables bitmap. Both - * of these are based on the number of PDPEs in the system. - */ -static -int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, - unsigned long **new_pts, - uint32_t pdpes) -{ - unsigned long *pds; - unsigned long *pts; - - pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); - if (!pds) - return -ENOMEM; - - pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), - GFP_TEMPORARY); - if (!pts) - goto err_out; - - *new_pds = pds; - *new_pts = pts; + pt->used_ptes += gen8_pte_count(start, length); + } return 0; -err_out: - free_gen8_temp_bitmaps(pds, pts); +unwind: + gen8_ppgtt_clear_pd(vm, pd, from, start - from); return -ENOMEM; } -static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length) +static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - unsigned long *new_page_dirs, *new_page_tables; - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_directory *pd; - const uint64_t orig_start = start; - const uint64_t orig_length = length; - uint32_t pdpe; - uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); + u64 from = start; + unsigned int pdpe; int ret; - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); - if (ret) - return ret; - - /* Do the allocations first so we can easily bail out */ - ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, - new_page_dirs); - if (ret) { - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); - return ret; - } - - /* For every page directory referenced, allocate page tables */ gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, - new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); - if (ret) - goto err_out; - } + if (pd == vm->scratch_pd) { + pd = alloc_pd(vm); + if (IS_ERR(pd)) + goto unwind; - start = orig_start; - length = orig_length; + gen8_initialize_pd(vm, pd); + gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + pdp->used_pdpes++; + GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm)); - /* Allocations have completed successfully, so set the bitmaps, and do - * the mappings. */ - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - gen8_pde_t *const page_directory = kmap_px(pd); - struct i915_page_table *pt; - uint64_t pd_len = length; - uint64_t pd_start = start; - uint32_t pde; - - /* Every pd should be allocated, we just did that above. */ - WARN_ON(!pd); - - gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { - /* Same reasoning as pd */ - WARN_ON(!pt); - WARN_ON(!pd_len); - WARN_ON(!gen8_pte_count(pd_start, pd_len)); - - /* Set our used ptes within the page table */ - bitmap_set(pt->used_ptes, - gen8_pte_index(pd_start), - gen8_pte_count(pd_start, pd_len)); - - /* Our pde is now pointing to the pagetable, pt */ - __set_bit(pde, pd->used_pdes); - - /* Map the PDE to the page table */ - page_directory[pde] = gen8_pde_encode(px_dma(pt), - I915_CACHE_LLC); - trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, - gen8_pte_index(start), - gen8_pte_count(start, length), - GEN8_PTES); - - /* NB: We haven't yet mapped ptes to pages. At this - * point we're still relying on insert_entries() */ + mark_tlbs_dirty(i915_vm_to_ppgtt(vm)); } - kunmap_px(ppgtt, page_directory); - __set_bit(pdpe, pdp->used_pdpes); - gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); + ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); + if (unlikely(ret)) + goto unwind_pd; } - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); - mark_tlbs_dirty(ppgtt); return 0; -err_out: - while (pdpe--) { - unsigned long temp; - - for_each_set_bit(temp, new_page_tables + pdpe * - BITS_TO_LONGS(I915_PDES), I915_PDES) - free_pt(dev_priv, - pdp->page_directory[pdpe]->page_table[temp]); +unwind_pd: + if (!pd->used_pdes) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + GEM_BUG_ON(!pdp->used_pdpes); + pdp->used_pdpes--; + free_pd(vm, pd); } +unwind: + gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); + return -ENOMEM; +} - for_each_set_bit(pdpe, new_page_dirs, pdpes) - free_pd(dev_priv, pdp->page_directory[pdpe]); - - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); - mark_tlbs_dirty(ppgtt); - return ret; +static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, + u64 start, u64 length) +{ + return gen8_ppgtt_alloc_pdp(vm, + &i915_vm_to_ppgtt(vm)->pdp, start, length); } -static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) +static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, + u64 start, u64 length) { - DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; - uint64_t pml4e; - int ret = 0; - - /* Do the pml4 allocations first, so we don't need to track the newly - * allocated tables below the pdp */ - bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); - - /* The pagedirectory and pagetable allocations are done in the shared 3 - * and 4 level code. Just allocate the pdps. - */ - ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, - new_pdps); - if (ret) - return ret; - - WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, - "The allocation has spanned more than 512GB. " - "It is highly likely this is incorrect."); + u64 from = start; + u32 pml4e; + int ret; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - WARN_ON(!pdp); + if (pml4->pdps[pml4e] == vm->scratch_pdp) { + pdp = alloc_pdp(vm); + if (IS_ERR(pdp)) + goto unwind; - ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); - if (ret) - goto err_out; + gen8_initialize_pdp(vm, pdp); + gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); + } - gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e); + ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); + if (unlikely(ret)) + goto unwind_pdp; } - bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, - GEN8_PML4ES_PER_PML4); - return 0; -err_out: - for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - gen8_ppgtt_cleanup_3lvl(vm->i915, pml4->pdps[pml4e]); - - return ret; -} - -static int gen8_alloc_va_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) -{ - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - - if (USES_FULL_48BIT_PPGTT(vm->i915)) - return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); - else - return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); +unwind_pdp: + if (!pdp->used_pdpes) { + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + free_pdp(vm, pdp); + } +unwind: + gen8_ppgtt_clear_4lvl(vm, from, start - from); + return -ENOMEM; } -static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, - uint64_t start, uint64_t length, +static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, + struct i915_page_directory_pointer *pdp, + u64 start, u64 length, gen8_pte_t scratch_pte, struct seq_file *m) { + struct i915_address_space *vm = &ppgtt->base; struct i915_page_directory *pd; - uint32_t pdpe; + u32 pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { struct i915_page_table *pt; - uint64_t pd_len = length; - uint64_t pd_start = start; - uint32_t pde; + u64 pd_len = length; + u64 pd_start = start; + u32 pde; - if (!test_bit(pdpe, pdp->used_pdpes)) + if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd) continue; seq_printf(m, "\tPDPE #%d\n", pdpe); gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { - uint32_t pte; + u32 pte; gen8_pte_t *pt_vaddr; - if (!test_bit(pde, pd->used_pdes)) + if (pd->page_table[pde] == ppgtt->base.scratch_pt) continue; - pt_vaddr = kmap_px(pt); + pt_vaddr = kmap_atomic_px(pt); for (pte = 0; pte < GEN8_PTES; pte += 4) { - uint64_t va = - (pdpe << GEN8_PDPE_SHIFT) | - (pde << GEN8_PDE_SHIFT) | - (pte << GEN8_PTE_SHIFT); + u64 va = (pdpe << GEN8_PDPE_SHIFT | + pde << GEN8_PDE_SHIFT | + pte << GEN8_PTE_SHIFT); int i; bool found = false; @@ -1510,9 +1254,6 @@ static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, } seq_puts(m, "\n"); } - /* don't use kunmap_px, it could trigger - * an unnecessary flush. - */ kunmap_atomic(pt_vaddr); } } @@ -1521,53 +1262,57 @@ static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) { struct i915_address_space *vm = &ppgtt->base; - uint64_t start = ppgtt->base.start; - uint64_t length = ppgtt->base.total; - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + u64 start = 0, length = ppgtt->base.total; - if (!USES_FULL_48BIT_PPGTT(vm->i915)) { - gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); - } else { - uint64_t pml4e; + if (use_4lvl(vm)) { + u64 pml4e; struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (!test_bit(pml4e, pml4->used_pml4es)) + if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp) continue; seq_printf(m, " PML4E #%llu\n", pml4e); - gen8_dump_pdp(pdp, start, length, scratch_pte, m); + gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m); } + } else { + gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); } } -static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) +static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) { - unsigned long *new_page_dirs, *new_page_tables; - uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev)); - int ret; + struct i915_address_space *vm = &ppgtt->base; + struct i915_page_directory_pointer *pdp = &ppgtt->pdp; + struct i915_page_directory *pd; + u64 start = 0, length = ppgtt->base.total; + u64 from = start; + unsigned int pdpe; - /* We allocate temp bitmap for page tables for no gain - * but as this is for init only, lets keep the things simple - */ - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); - if (ret) - return ret; + gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { + pd = alloc_pd(vm); + if (IS_ERR(pd)) + goto unwind; - /* Allocate for all pdps regardless of how the ppgtt - * was defined. - */ - ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, - 0, 1ULL << 32, - new_page_dirs); - if (!ret) - *ppgtt->pdp.used_pdpes = *new_page_dirs; + gen8_initialize_pd(vm, pd); + gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + pdp->used_pdpes++; + } - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + pdp->used_pdpes++; /* never remove */ + return 0; - return ret; +unwind: + start -= from; + gen8_for_each_pdpe(pd, pdp, from, start, pdpe) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + free_pd(vm, pd); + } + pdp->used_pdpes = 0; + return -ENOMEM; } /* @@ -1579,52 +1324,64 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) */ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) { - struct drm_i915_private *dev_priv = ppgtt->base.i915; + struct i915_address_space *vm = &ppgtt->base; + struct drm_i915_private *dev_priv = vm->i915; int ret; + ppgtt->base.total = USES_FULL_48BIT_PPGTT(dev_priv) ? + 1ULL << 48 : + 1ULL << 32; + ret = gen8_init_scratch(&ppgtt->base); - if (ret) + if (ret) { + ppgtt->base.total = 0; return ret; + } - ppgtt->base.start = 0; - ppgtt->base.cleanup = gen8_ppgtt_cleanup; - ppgtt->base.allocate_va_range = gen8_alloc_va_range; - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; - ppgtt->base.clear_range = gen8_ppgtt_clear_range; - ppgtt->base.unbind_vma = ppgtt_unbind_vma; - ppgtt->base.bind_vma = ppgtt_bind_vma; - ppgtt->debug_dump = gen8_dump_ppgtt; + /* There are only few exceptions for gen >=6. chv and bxt. + * And we are not sure about the latter so play safe for now. + */ + if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) + ppgtt->base.pt_kmap_wc = true; - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - ret = setup_px(dev_priv, &ppgtt->pml4); + if (use_4lvl(vm)) { + ret = setup_px(&ppgtt->base, &ppgtt->pml4); if (ret) goto free_scratch; gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); - ppgtt->base.total = 1ULL << 48; - ppgtt->switch_mm = gen8_48b_mm_switch; + ppgtt->switch_mm = gen8_mm_switch_4lvl; + ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; + ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; + ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; } else { - ret = __pdp_init(dev_priv, &ppgtt->pdp); + ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); if (ret) goto free_scratch; - ppgtt->base.total = 1ULL << 32; - ppgtt->switch_mm = gen8_legacy_mm_switch; - trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, - 0, 0, - GEN8_PML4E_SHIFT); - if (intel_vgpu_active(dev_priv)) { - ret = gen8_preallocate_top_level_pdps(ppgtt); - if (ret) + ret = gen8_preallocate_top_level_pdp(ppgtt); + if (ret) { + __pdp_fini(&ppgtt->pdp); goto free_scratch; + } } + + ppgtt->switch_mm = gen8_mm_switch_3lvl; + ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl; + ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; + ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl; } if (intel_vgpu_active(dev_priv)) gen8_ppgtt_notify_vgt(ppgtt, true); + ppgtt->base.cleanup = gen8_ppgtt_cleanup; + ppgtt->base.unbind_vma = ppgtt_unbind_vma; + ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->debug_dump = gen8_dump_ppgtt; + return 0; free_scratch: @@ -1637,9 +1394,8 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) struct i915_address_space *vm = &ppgtt->base; struct i915_page_table *unused; gen6_pte_t scratch_pte; - uint32_t pd_entry; - uint32_t pte, pde; - uint32_t start = ppgtt->base.start, length = ppgtt->base.total; + u32 pd_entry, pte, pde; + u32 start = 0, length = ppgtt->base.total; scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); @@ -1658,7 +1414,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) expected); seq_printf(m, "\tPDE: %x\n", pd_entry); - pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); + pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]); for (pte = 0; pte < GEN6_PTES; pte+=4) { unsigned long va = @@ -1681,73 +1437,59 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } seq_puts(m, "\n"); } - kunmap_px(ppgtt, pt_vaddr); + kunmap_atomic(pt_vaddr); } } /* Write pde (index) from the page directory @pd to the page table @pt */ -static void gen6_write_pde(struct i915_page_directory *pd, - const int pde, struct i915_page_table *pt) +static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt, + const unsigned int pde, + const struct i915_page_table *pt) { /* Caller needs to make sure the write completes if necessary */ - struct i915_hw_ppgtt *ppgtt = - container_of(pd, struct i915_hw_ppgtt, pd); - u32 pd_entry; - - pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); - pd_entry |= GEN6_PDE_VALID; - - writel(pd_entry, ppgtt->pd_addr + pde); + writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, + ppgtt->pd_addr + pde); } /* Write all the page tables found in the ppgtt structure to incrementing page * directories. */ -static void gen6_write_page_range(struct drm_i915_private *dev_priv, - struct i915_page_directory *pd, - uint32_t start, uint32_t length) +static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, + u32 start, u32 length) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_page_table *pt; - uint32_t pde; + unsigned int pde; - gen6_for_each_pde(pt, pd, start, length, pde) - gen6_write_pde(pd, pde, pt); + gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) + gen6_write_pde(ppgtt, pde, pt); - /* Make sure write is complete before other code can use this page - * table. Also require for WC mapped PTEs */ - readl(ggtt->gsm); + mark_tlbs_dirty(ppgtt); + wmb(); } -static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) +static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) { - BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); - - return (ppgtt->pd.base.ggtt_offset / 64) << 16; + GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); + return ppgtt->pd.base.ggtt_offset << 10; } static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - int ret; + u32 *cs; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); - if (ret) - return ret; - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(ring, PP_DIR_DCLV_2G); - intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); - intel_ring_emit(ring, get_pd_offset(ppgtt)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = get_pd_offset(ppgtt); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1755,33 +1497,21 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - int ret; + u32 *cs; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); - if (ret) - return ret; - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(ring, PP_DIR_DCLV_2G); - intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); - intel_ring_emit(ring, get_pd_offset(ppgtt)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - - /* XXX: RCS is the only one to auto invalidate the TLBs? */ - if (engine->id != RCS) { - ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); - if (ret) - return ret; - } + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = get_pd_offset(ppgtt); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1813,7 +1543,7 @@ static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - uint32_t ecochk, ecobits; + u32 ecochk, ecobits; enum intel_engine_id id; ecobits = I915_READ(GAC_ECO_BITS); @@ -1837,7 +1567,7 @@ static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) { - uint32_t ecochk, gab_ctl, ecobits; + u32 ecochk, gab_ctl, ecobits; ecobits = I915_READ(GAC_ECO_BITS); I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | @@ -1854,168 +1584,124 @@ static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) /* PPGTT support for Sandybdrige/Gen6 and later */ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - gen6_pte_t *pt_vaddr, scratch_pte; - unsigned first_entry = start >> PAGE_SHIFT; - unsigned num_entries = length >> PAGE_SHIFT; - unsigned act_pt = first_entry / GEN6_PTES; - unsigned first_pte = first_entry % GEN6_PTES; - unsigned last_pte, i; - - scratch_pte = vm->pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC, 0); + unsigned int first_entry = start >> PAGE_SHIFT; + unsigned int pde = first_entry / GEN6_PTES; + unsigned int pte = first_entry % GEN6_PTES; + unsigned int num_entries = length >> PAGE_SHIFT; + gen6_pte_t scratch_pte = + vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); while (num_entries) { - last_pte = first_pte + num_entries; - if (last_pte > GEN6_PTES) - last_pte = GEN6_PTES; + struct i915_page_table *pt = ppgtt->pd.page_table[pde++]; + unsigned int end = min(pte + num_entries, GEN6_PTES); + gen6_pte_t *vaddr; - pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + num_entries -= end - pte; - for (i = first_pte; i < last_pte; i++) - pt_vaddr[i] = scratch_pte; + /* Note that the hw doesn't support removing PDE on the fly + * (they are cached inside the context with no means to + * invalidate the cache), so we can only reset the PTE + * entries back to scratch. + */ - kunmap_px(ppgtt, pt_vaddr); + vaddr = kmap_atomic_px(pt); + do { + vaddr[pte++] = scratch_pte; + } while (pte < end); + kunmap_atomic(vaddr); - num_entries -= last_pte - first_pte; - first_pte = 0; - act_pt++; + pte = 0; } } static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, u32 flags) + u64 start, + enum i915_cache_level cache_level, + u32 flags) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned first_entry = start >> PAGE_SHIFT; unsigned act_pt = first_entry / GEN6_PTES; unsigned act_pte = first_entry % GEN6_PTES; - gen6_pte_t *pt_vaddr = NULL; - struct sgt_iter sgt_iter; - dma_addr_t addr; + const u32 pte_encode = vm->pte_encode(0, cache_level, flags); + struct sgt_dma iter; + gen6_pte_t *vaddr; + + vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); + iter.sg = pages->sgl; + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + do { + vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); - for_each_sgt_dma(addr, sgt_iter, pages) { - if (pt_vaddr == NULL) - pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + iter.dma += PAGE_SIZE; + if (iter.dma == iter.max) { + iter.sg = __sg_next(iter.sg); + if (!iter.sg) + break; - pt_vaddr[act_pte] = - vm->pte_encode(addr, cache_level, flags); + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + } if (++act_pte == GEN6_PTES) { - kunmap_px(ppgtt, pt_vaddr); - pt_vaddr = NULL; - act_pt++; + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]); act_pte = 0; } - } - - if (pt_vaddr) - kunmap_px(ppgtt, pt_vaddr); + } while (1); + kunmap_atomic(vaddr); } static int gen6_alloc_va_range(struct i915_address_space *vm, - uint64_t start_in, uint64_t length_in) + u64 start, u64 length) { - DECLARE_BITMAP(new_page_tables, I915_PDES); - struct drm_i915_private *dev_priv = vm->i915; - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_table *pt; - uint32_t start, length, start_save, length_save; - uint32_t pde; - int ret; + u64 from = start; + unsigned int pde; + bool flush = false; - start = start_save = start_in; - length = length_save = length_in; - - bitmap_zero(new_page_tables, I915_PDES); - - /* The allocation is done in two stages so that we can bail out with - * minimal amount of pain. The first stage finds new page tables that - * need allocation. The second stage marks use ptes within the page - * tables. - */ gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { - if (pt != vm->scratch_pt) { - WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); - continue; - } - - /* We've already allocated a page table */ - WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); + if (pt == vm->scratch_pt) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) + goto unwind_out; - pt = alloc_pt(dev_priv); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind_out; + gen6_initialize_pt(vm, pt); + ppgtt->pd.page_table[pde] = pt; + gen6_write_pde(ppgtt, pde, pt); + flush = true; } - - gen6_initialize_pt(vm, pt); - - ppgtt->pd.page_table[pde] = pt; - __set_bit(pde, new_page_tables); - trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); } - start = start_save; - length = length_save; - - gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { - DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); - - bitmap_zero(tmp_bitmap, GEN6_PTES); - bitmap_set(tmp_bitmap, gen6_pte_index(start), - gen6_pte_count(start, length)); - - if (__test_and_clear_bit(pde, new_page_tables)) - gen6_write_pde(&ppgtt->pd, pde, pt); - - trace_i915_page_table_entry_map(vm, pde, pt, - gen6_pte_index(start), - gen6_pte_count(start, length), - GEN6_PTES); - bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, - GEN6_PTES); + if (flush) { + mark_tlbs_dirty(ppgtt); + wmb(); } - WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); - - /* Make sure write is complete before other code can use this page - * table. Also require for WC mapped PTEs */ - readl(ggtt->gsm); - - mark_tlbs_dirty(ppgtt); return 0; unwind_out: - for_each_set_bit(pde, new_page_tables, I915_PDES) { - struct i915_page_table *pt = ppgtt->pd.page_table[pde]; - - ppgtt->pd.page_table[pde] = vm->scratch_pt; - free_pt(dev_priv, pt); - } - - mark_tlbs_dirty(ppgtt); - return ret; + gen6_ppgtt_clear_range(vm, from, start); + return -ENOMEM; } static int gen6_init_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; int ret; - ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); + ret = setup_scratch_page(vm, I915_GFP_DMA); if (ret) return ret; - vm->scratch_pt = alloc_pt(dev_priv); + vm->scratch_pt = alloc_pt(vm); if (IS_ERR(vm->scratch_pt)) { - cleanup_scratch_page(dev_priv, &vm->scratch_page); + cleanup_scratch_page(vm); return PTR_ERR(vm->scratch_pt); } @@ -2026,25 +1712,22 @@ static int gen6_init_scratch(struct i915_address_space *vm) static void gen6_free_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; - - free_pt(dev_priv, vm->scratch_pt); - cleanup_scratch_page(dev_priv, &vm->scratch_page); + free_pt(vm, vm->scratch_pt); + cleanup_scratch_page(vm); } static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd = &ppgtt->pd; - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_table *pt; - uint32_t pde; + u32 pde; drm_mm_remove_node(&ppgtt->node); gen6_for_all_pdes(pt, pd, pde) if (pt != vm->scratch_pt) - free_pt(dev_priv, pt); + free_pt(vm, pt); gen6_free_scratch(vm); } @@ -2077,6 +1760,12 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) if (ppgtt->node.start < ggtt->mappable_end) DRM_DEBUG("Forced to use aperture for PDEs\n"); + ppgtt->pd.base.ggtt_offset = + ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); + + ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + + ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); + return 0; err_out: @@ -2090,10 +1779,10 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) } static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, - uint64_t start, uint64_t length) + u64 start, u64 length) { struct i915_page_table *unused; - uint32_t pde; + u32 pde; gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; @@ -2119,32 +1808,30 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ppgtt->base.allocate_va_range = gen6_alloc_va_range; + ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; + + gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); + gen6_write_page_range(ppgtt, 0, ppgtt->base.total); + + ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total); + if (ret) { + gen6_ppgtt_cleanup(&ppgtt->base); + return ret; + } + ppgtt->base.clear_range = gen6_ppgtt_clear_range; ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; ppgtt->base.cleanup = gen6_ppgtt_cleanup; - ppgtt->base.start = 0; - ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; ppgtt->debug_dump = gen6_dump_ppgtt; - ppgtt->pd.base.ggtt_offset = - ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); - - ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + - ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); - - gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); - - gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); - DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); - DRM_DEBUG("Adding PPGTT at offset %x\n", - ppgtt->pd.base.ggtt_offset << 10); + DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n", + ppgtt->pd.base.ggtt_offset << 10); return 0; } @@ -2153,6 +1840,7 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, struct drm_i915_private *dev_priv) { ppgtt->base.i915 = dev_priv; + ppgtt->base.dma = &dev_priv->drm.pdev->dev; if (INTEL_INFO(dev_priv)->gen < 8) return gen6_ppgtt_init(ppgtt); @@ -2165,15 +1853,23 @@ static void i915_address_space_init(struct i915_address_space *vm, const char *name) { i915_gem_timeline_init(dev_priv, &vm->timeline, name); - drm_mm_init(&vm->mm, vm->start, vm->total); + + drm_mm_init(&vm->mm, 0, vm->total); + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; + INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); INIT_LIST_HEAD(&vm->unbound_list); + list_add_tail(&vm->global_link, &dev_priv->vm_list); + pagevec_init(&vm->free_pages, false); } static void i915_address_space_fini(struct i915_address_space *vm) { + if (pagevec_count(&vm->free_pages)) + vm_free_pages_release(vm); + i915_gem_timeline_fini(&vm->timeline); drm_mm_takedown(&vm->mm); list_del(&vm->global_link); @@ -2185,34 +1881,17 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_SKYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); - else if (IS_BROXTON(dev_priv)) + else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); } -static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_private *dev_priv, - struct drm_i915_file_private *file_priv, - const char *name) -{ - int ret; - - ret = __hw_ppgtt_init(ppgtt, dev_priv); - if (ret == 0) { - kref_init(&ppgtt->ref); - i915_address_space_init(&ppgtt->base, dev_priv, name); - ppgtt->base.file = file_priv; - } - - return ret; -} - int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) { gtt_write_workarounds(dev_priv); @@ -2250,12 +1929,16 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name); + ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret) { kfree(ppgtt); return ERR_PTR(ret); } + kref_init(&ppgtt->ref); + i915_address_space_init(&ppgtt->base, dev_priv, name); + ppgtt->base.file = fpriv; + trace_i915_ppgtt_create(&ppgtt->base); return ppgtt; @@ -2294,9 +1977,8 @@ void i915_ppgtt_release(struct kref *kref) WARN_ON(!list_empty(&ppgtt->base.inactive_list)); WARN_ON(!list_empty(&ppgtt->base.unbound_list)); - i915_address_space_fini(&ppgtt->base); - ppgtt->base.cleanup(&ppgtt->base); + i915_address_space_fini(&ppgtt->base); kfree(ppgtt); } @@ -2358,7 +2040,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); - ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); + ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); i915_ggtt_invalidate(dev_priv); } @@ -2395,7 +2077,7 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void gen8_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level level, u32 unused) { @@ -2410,32 +2092,22 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level level, u32 unused) + u64 start, + enum i915_cache_level level, + u32 unused) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; gen8_pte_t __iomem *gtt_entries; - gen8_pte_t gtt_entry; + const gen8_pte_t pte_encode = gen8_pte_encode(0, level); dma_addr_t addr; - int i = 0; - gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); + gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; + gtt_entries += start >> PAGE_SHIFT; + for_each_sgt_dma(addr, sgt_iter, st) + gen8_set_pte(gtt_entries++, pte_encode | addr); - for_each_sgt_dma(addr, sgt_iter, st) { - gtt_entry = gen8_pte_encode(addr, level); - gen8_set_pte(>t_entries[i++], gtt_entry); - } - - /* - * XXX: This serves as a posting read to make sure that the PTE has - * actually been updated. There is some concern that even though - * registers and PTEs are within the same BAR that they are potentially - * of NUMA access patterns. Therefore, even with the way we assume - * hardware should work, we must keep this posting read for paranoia. - */ - if (i != 0) - WARN_ON(readq(>t_entries[i-1]) != gtt_entry); + wmb(); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates @@ -2444,35 +2116,9 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, ggtt->invalidate(vm->i915); } -struct insert_entries { - struct i915_address_space *vm; - struct sg_table *st; - uint64_t start; - enum i915_cache_level level; - u32 flags; -}; - -static int gen8_ggtt_insert_entries__cb(void *_arg) -{ - struct insert_entries *arg = _arg; - gen8_ggtt_insert_entries(arg->vm, arg->st, - arg->start, arg->level, arg->flags); - return 0; -} - -static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, - struct sg_table *st, - uint64_t start, - enum i915_cache_level level, - u32 flags) -{ - struct insert_entries arg = { vm, st, start, level, flags }; - stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); -} - static void gen6_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level level, u32 flags) { @@ -2493,31 +2139,18 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm, */ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level level, u32 flags) + u64 start, + enum i915_cache_level level, + u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - struct sgt_iter sgt_iter; - gen6_pte_t __iomem *gtt_entries; - gen6_pte_t gtt_entry; + gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; + unsigned int i = start >> PAGE_SHIFT; + struct sgt_iter iter; dma_addr_t addr; - int i = 0; - - gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); - - for_each_sgt_dma(addr, sgt_iter, st) { - gtt_entry = vm->pte_encode(addr, level, flags); - iowrite32(gtt_entry, >t_entries[i++]); - } - - /* XXX: This serves as a posting read to make sure that the PTE has - * actually been updated. There is some concern that even though - * registers and PTEs are within the same BAR that they are potentially - * of NUMA access patterns. Therefore, even with the way we assume - * hardware should work, we must keep this posting read for paranoia. - */ - if (i != 0) - WARN_ON(readl(>t_entries[i-1]) != gtt_entry); + for_each_sgt_dma(addr, iter, st) + iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); + wmb(); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates @@ -2527,17 +2160,19 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, } static void nop_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) + u64 start, u64 length) { } static void gen8_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) + u64 start, u64 length) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; unsigned num_entries = length >> PAGE_SHIFT; - gen8_pte_t scratch_pte, __iomem *gtt_base = + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_t __iomem *gtt_base = (gen8_pte_t __iomem *)ggtt->gsm + first_entry; const int max_entries = ggtt_total_entries(ggtt) - first_entry; int i; @@ -2547,16 +2182,12 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); for (i = 0; i < num_entries; i++) gen8_set_pte(>t_base[i], scratch_pte); - readl(gtt_base); } static void gen6_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; @@ -2576,12 +2207,11 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); - readl(gtt_base); } static void i915_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level cache_level, u32 unused) { @@ -2593,19 +2223,18 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm, static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, u32 unused) + u64 start, + enum i915_cache_level cache_level, + u32 unused) { unsigned int flags = (cache_level == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); - } static void i915_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); } @@ -2616,14 +2245,16 @@ static int ggtt_bind_vma(struct i915_vma *vma, { struct drm_i915_private *i915 = vma->vm->i915; struct drm_i915_gem_object *obj = vma->obj; - u32 pte_flags = 0; - int ret; + u32 pte_flags; - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; + if (unlikely(!vma->pages)) { + int ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + } /* Currently applicable only to VLV */ + pte_flags = 0; if (obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -2642,6 +2273,15 @@ static int ggtt_bind_vma(struct i915_vma *vma, return 0; } +static void ggtt_unbind_vma(struct i915_vma *vma) +{ + struct drm_i915_private *i915 = vma->vm->i915; + + intel_runtime_pm_get(i915); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); + intel_runtime_pm_put(i915); +} + static int aliasing_gtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) @@ -2650,15 +2290,32 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, u32 pte_flags; int ret; - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; + if (unlikely(!vma->pages)) { + ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + } /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; + if (flags & I915_VMA_LOCAL_BIND) { + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; + + if (appgtt->base.allocate_va_range) { + ret = appgtt->base.allocate_va_range(&appgtt->base, + vma->node.start, + vma->node.size); + if (ret) + goto err_pages; + } + + appgtt->base.insert_entries(&appgtt->base, + vma->pages, vma->node.start, + cache_level, pte_flags); + } if (flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); @@ -2668,32 +2325,35 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, intel_runtime_pm_put(i915); } - if (flags & I915_VMA_LOCAL_BIND) { - struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - appgtt->base.insert_entries(&appgtt->base, - vma->pages, vma->node.start, - cache_level, pte_flags); - } - return 0; + +err_pages: + if (!(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND))) { + if (vma->pages != vma->obj->mm.pages) { + GEM_BUG_ON(!vma->pages); + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + } + return ret; } -static void ggtt_unbind_vma(struct i915_vma *vma) +static void aliasing_gtt_unbind_vma(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - const u64 size = min(vma->size, vma->node.size); if (vma->flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); - vma->vm->clear_range(vma->vm, - vma->node.start, size); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); intel_runtime_pm_put(i915); } - if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) - appgtt->base.clear_range(&appgtt->base, - vma->node.start, size); + if (vma->flags & I915_VMA_LOCAL_BIND) { + struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base; + + vm->clear_range(vm, vma->node.start, vma->size); + } } void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, @@ -2719,14 +2379,76 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node, u64 *start, u64 *end) { - if (node->color != color) + if (node->allocated && node->color != color) *start += I915_GTT_PAGE_SIZE; + /* Also leave a space between the unallocated reserved node after the + * GTT and any objects within the GTT, i.e. we use the color adjustment + * to insert a guard page to prevent prefetches crossing over the + * GTT boundary. + */ node = list_next_entry(node, node_list); - if (node->allocated && node->color != color) + if (node->color != color) *end -= I915_GTT_PAGE_SIZE; } +int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_hw_ppgtt *ppgtt; + int err; + + ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]"); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); + + if (WARN_ON(ppgtt->base.total < ggtt->base.total)) { + err = -ENODEV; + goto err_ppgtt; + } + + if (ppgtt->base.allocate_va_range) { + /* Note we only pre-allocate as far as the end of the global + * GTT. On 48b / 4-level page-tables, the difference is very, + * very significant! We have to preallocate as GVT/vgpu does + * not like the page directory disappearing. + */ + err = ppgtt->base.allocate_va_range(&ppgtt->base, + 0, ggtt->base.total); + if (err) + goto err_ppgtt; + } + + i915->mm.aliasing_ppgtt = ppgtt; + + WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); + ggtt->base.bind_vma = aliasing_gtt_bind_vma; + + WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma); + ggtt->base.unbind_vma = aliasing_gtt_unbind_vma; + + return 0; + +err_ppgtt: + i915_ppgtt_put(ppgtt); + return err; +} + +void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_hw_ppgtt *ppgtt; + + ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt); + if (!ppgtt) + return; + + i915_ppgtt_put(ppgtt); + + ggtt->base.bind_vma = ggtt_bind_vma; + ggtt->base.unbind_vma = ggtt_unbind_vma; +} + int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) { /* Let GEM Manage all of the aperture. @@ -2740,7 +2462,6 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) */ struct i915_ggtt *ggtt = &dev_priv->ggtt; unsigned long hole_start, hole_end; - struct i915_hw_ppgtt *ppgtt; struct drm_mm_node *entry; int ret; @@ -2769,38 +2490,13 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) ggtt->base.total - PAGE_SIZE, PAGE_SIZE); if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) { - ret = -ENOMEM; - goto err; - } - - ret = __hw_ppgtt_init(ppgtt, dev_priv); + ret = i915_gem_init_aliasing_ppgtt(dev_priv); if (ret) - goto err_ppgtt; - - if (ppgtt->base.allocate_va_range) { - ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, - ppgtt->base.total); - if (ret) - goto err_ppgtt_cleanup; - } - - ppgtt->base.clear_range(&ppgtt->base, - ppgtt->base.start, - ppgtt->base.total); - - dev_priv->mm.aliasing_ppgtt = ppgtt; - WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); - ggtt->base.bind_vma = aliasing_gtt_bind_vma; + goto err; } return 0; -err_ppgtt_cleanup: - ppgtt->base.cleanup(&ppgtt->base); -err_ppgtt: - kfree(ppgtt); err: drm_mm_remove_node(&ggtt->error_capture); return ret; @@ -2813,27 +2509,31 @@ err: void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_vma *vma, *vn; - if (dev_priv->mm.aliasing_ppgtt) { - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - ppgtt->base.cleanup(&ppgtt->base); - kfree(ppgtt); - } + ggtt->base.closed = true; + + mutex_lock(&dev_priv->drm.struct_mutex); + WARN_ON(!list_empty(&ggtt->base.active_list)); + list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link) + WARN_ON(i915_vma_unbind(vma)); + mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_cleanup_stolen(&dev_priv->drm); + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_fini_aliasing_ppgtt(dev_priv); + if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); if (drm_mm_initialized(&ggtt->base.mm)) { intel_vgt_deballoon(dev_priv); - - mutex_lock(&dev_priv->drm.struct_mutex); i915_address_space_fini(&ggtt->base); - mutex_unlock(&dev_priv->drm.struct_mutex); } ggtt->base.cleanup(&ggtt->base); + mutex_unlock(&dev_priv->drm.struct_mutex); arch_phys_wc_del(ggtt->mtrr); io_mapping_fini(&ggtt->mappable); @@ -2943,7 +2643,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return -ENOMEM; } - ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32); + ret = setup_scratch_page(&ggtt->base, GFP_DMA32); if (ret) { DRM_ERROR("Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ @@ -2959,7 +2659,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) * writing this data shouldn't be harmful even in those cases. */ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) { - uint64_t pat; + u64 pat; pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ @@ -2994,7 +2694,7 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) { - uint64_t pat; + u64 pat; /* * Map WB on BDW to snooped on CHV. @@ -3032,7 +2732,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm) struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); iounmap(ggtt->gsm); - cleanup_scratch_page(vm->i915, &vm->scratch_page); + cleanup_scratch_page(vm); } static int gen8_gmch_probe(struct i915_ggtt *ggtt) @@ -3078,8 +2778,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.clear_range = gen8_ggtt_clear_range; ggtt->base.insert_entries = gen8_ggtt_insert_entries; - if (IS_CHERRYVIEW(dev_priv)) - ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; ggtt->invalidate = gen6_ggtt_invalidate; @@ -3183,6 +2881,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) int ret; ggtt->base.i915 = dev_priv; + ggtt->base.dma = &dev_priv->drm.pdev->dev; if (INTEL_GEN(dev_priv) <= 5) ret = i915_gmch_probe(ggtt); @@ -3242,14 +2941,14 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->vm_list); - /* Subtract the guard page before address space initialization to - * shrink the range used by drm_mm. + /* Note that we use page colouring to enforce a guard page at the + * end of the address space. This is required as the CS may prefetch + * beyond the end of the batch buffer, across the page boundary, + * and beyond the end of the GTT if we do not provide a guard. */ mutex_lock(&dev_priv->drm.struct_mutex); - ggtt->base.total -= PAGE_SIZE; i915_address_space_init(&ggtt->base, dev_priv, "[global]"); - ggtt->base.total += PAGE_SIZE; - if (!HAS_LLC(dev_priv)) + if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); @@ -3303,7 +3002,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); /* First fill our portion of the GTT with scratch pages */ - ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); + ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ @@ -3344,8 +3043,6 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) struct i915_address_space *vm; list_for_each_entry(vm, &dev_priv->vm_list, global_link) { - /* TODO: Perhaps it shouldn't be gen6 specific */ - struct i915_hw_ppgtt *ppgtt; if (i915_is_ggtt(vm)) @@ -3353,8 +3050,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) else ppgtt = i915_vm_to_ppgtt(vm); - gen6_write_page_range(dev_priv, &ppgtt->pd, - 0, ppgtt->base.total); + gen6_write_page_range(ppgtt, 0, ppgtt->base.total); } } @@ -3389,11 +3085,11 @@ rotate_pages(const dma_addr_t *in, unsigned int offset, return sg; } -static struct sg_table * -intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, - struct drm_i915_gem_object *obj) +static noinline struct sg_table * +intel_rotate_pages(struct intel_rotation_info *rot_info, + struct drm_i915_gem_object *obj) { - const size_t n_pages = obj->base.size / PAGE_SIZE; + const unsigned long n_pages = obj->base.size / PAGE_SIZE; unsigned int size = intel_rotation_info_size(rot_info); struct sgt_iter sgt_iter; dma_addr_t dma_addr; @@ -3452,7 +3148,7 @@ err_st_alloc: return ERR_PTR(ret); } -static struct sg_table * +static noinline struct sg_table * intel_partial_pages(const struct i915_ggtt_view *view, struct drm_i915_gem_object *obj) { @@ -3506,7 +3202,7 @@ err_st_alloc: static int i915_get_ggtt_vma_pages(struct i915_vma *vma) { - int ret = 0; + int ret; /* The vma->pages are only valid within the lifespan of the borrowed * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so @@ -3515,32 +3211,33 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) */ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); - if (vma->pages) + switch (vma->ggtt_view.type) { + case I915_GGTT_VIEW_NORMAL: + vma->pages = vma->obj->mm.pages; return 0; - if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) - vma->pages = vma->obj->mm.pages; - else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) + case I915_GGTT_VIEW_ROTATED: vma->pages = - intel_rotate_fb_obj_pages(&vma->ggtt_view.rotated, - vma->obj); - else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) + intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); + break; + + case I915_GGTT_VIEW_PARTIAL: vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); - else + break; + + default: WARN_ONCE(1, "GGTT view %u not implemented!\n", vma->ggtt_view.type); + return -EINVAL; + } - if (!vma->pages) { - DRM_ERROR("Failed to get pages for GGTT view type %u!\n", - vma->ggtt_view.type); - ret = -EINVAL; - } else if (IS_ERR(vma->pages)) { + ret = 0; + if (unlikely(IS_ERR(vma->pages))) { ret = PTR_ERR(vma->pages); vma->pages = NULL; DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", vma->ggtt_view.type, ret); } - return ret; } @@ -3743,3 +3440,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, size, alignment, color, start, end, DRM_MM_INSERT_EVICT); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_gtt.c" +#include "selftests/i915_gem_gtt.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 3c5ef5358cef..fb15684c1d83 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -36,9 +36,11 @@ #include <linux/io-mapping.h> #include <linux/mm.h> +#include <linux/pagevec.h> #include "i915_gem_timeline.h" #include "i915_gem_request.h" +#include "i915_selftest.h" #define I915_GTT_PAGE_SIZE 4096UL #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE @@ -51,11 +53,11 @@ struct drm_i915_file_private; struct drm_i915_fence_reg; -typedef uint32_t gen6_pte_t; -typedef uint64_t gen8_pte_t; -typedef uint64_t gen8_pde_t; -typedef uint64_t gen8_ppgtt_pdpe_t; -typedef uint64_t gen8_ppgtt_pml4e_t; +typedef u32 gen6_pte_t; +typedef u64 gen8_pte_t; +typedef u64 gen8_pde_t; +typedef u64 gen8_ppgtt_pdpe_t; +typedef u64 gen8_ppgtt_pml4e_t; #define ggtt_total_entries(ggtt) ((ggtt)->base.total >> PAGE_SHIFT) @@ -67,7 +69,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN6_PTE_UNCACHED (1 << 1) #define GEN6_PTE_VALID (1 << 0) -#define I915_PTES(pte_len) (PAGE_SIZE / (pte_len)) +#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len))) #define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1) #define I915_PDES 512 #define I915_PDE_MASK (I915_PDES - 1) @@ -99,13 +101,20 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) -/* GEN8 legacy style address is defined as a 3 level page table: +/* GEN8 32b style address is defined as a 3 level page table: * 31:30 | 29:21 | 20:12 | 11:0 * PDPE | PDE | PTE | offset * The difference as compared to normal x86 3 level page table is the PDPEs are * programmed via register. - * - * GEN8 48b legacy style address is defined as a 4 level page table: + */ +#define GEN8_3LVL_PDPES 4 +#define GEN8_PDE_SHIFT 21 +#define GEN8_PDE_MASK 0x1ff +#define GEN8_PTE_SHIFT 12 +#define GEN8_PTE_MASK 0x1ff +#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) + +/* GEN8 48b style address is defined as a 4 level page table: * 47:39 | 38:30 | 29:21 | 20:12 | 11:0 * PML4E | PDPE | PDE | PTE | offset */ @@ -116,15 +125,6 @@ typedef uint64_t gen8_ppgtt_pml4e_t; /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page * tables */ #define GEN8_PDPE_MASK 0x1ff -#define GEN8_PDE_SHIFT 21 -#define GEN8_PDE_MASK 0x1ff -#define GEN8_PTE_SHIFT 12 -#define GEN8_PTE_MASK 0x1ff -#define GEN8_LEGACY_PDPES 4 -#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) - -#define I915_PDPES_PER_PDP(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\ - GEN8_PML4ES_PER_PML4 : GEN8_LEGACY_PDPES) #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ @@ -141,7 +141,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN8_PPAT_WC (1<<0) #define GEN8_PPAT_UC (0<<0) #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) -#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) +#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) struct sg_table; @@ -208,7 +208,7 @@ struct i915_page_dma { /* For gen6/gen7 only. This is the offset in the GGTT * where the page directory entries for PPGTT begin */ - uint32_t ggtt_offset; + u32 ggtt_offset; }; }; @@ -218,28 +218,24 @@ struct i915_page_dma { struct i915_page_table { struct i915_page_dma base; - - unsigned long *used_ptes; + unsigned int used_ptes; }; struct i915_page_directory { struct i915_page_dma base; - unsigned long *used_pdes; struct i915_page_table *page_table[I915_PDES]; /* PDEs */ + unsigned int used_pdes; }; struct i915_page_directory_pointer { struct i915_page_dma base; - - unsigned long *used_pdpes; struct i915_page_directory **page_directory; + unsigned int used_pdpes; }; struct i915_pml4 { struct i915_page_dma base; - - DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4); struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; }; @@ -247,6 +243,7 @@ struct i915_address_space { struct drm_mm mm; struct i915_gem_timeline timeline; struct drm_i915_private *i915; + struct device *dma; /* Every address space belongs to a struct file - except for the global * GTT that is owned by the driver (and so @file is set to NULL). In * principle, no information should leak from one context to another @@ -257,7 +254,6 @@ struct i915_address_space { */ struct drm_i915_file_private *file; struct list_head global_link; - u64 start; /* Start offset always 0 for dri2 */ u64 total; /* size addr space maps (ex. 2GB for ggtt) */ bool closed; @@ -297,6 +293,9 @@ struct i915_address_space { */ struct list_head unbound_list; + struct pagevec free_pages; + bool pt_kmap_wc; + /* FIXME: Need a more generic return type */ gen6_pte_t (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, @@ -304,20 +303,19 @@ struct i915_address_space { /* flags for pte_encode */ #define PTE_READ_ONLY (1<<0) int (*allocate_va_range)(struct i915_address_space *vm, - uint64_t start, - uint64_t length); + u64 start, u64 length); void (*clear_range)(struct i915_address_space *vm, - uint64_t start, - uint64_t length); + u64 start, u64 length); void (*insert_page)(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level cache_level, u32 flags); void (*insert_entries)(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level cache_level, u32 flags); + u64 start, + enum i915_cache_level cache_level, + u32 flags); void (*cleanup)(struct i915_address_space *vm); /** Unmap an object from an address space. This usually consists of * setting the valid PTE entries to a reserved scratch page. */ @@ -326,10 +324,18 @@ struct i915_address_space { int (*bind_vma)(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); + + I915_SELFTEST_DECLARE(struct fault_attr fault_attr); }; #define i915_is_ggtt(V) (!(V)->file) +static inline bool +i915_vm_is_48bit(const struct i915_address_space *vm) +{ + return (vm->total - 1) >> 32; +} + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a @@ -381,7 +387,6 @@ struct i915_hw_ppgtt { gen6_pte_t __iomem *pd_addr; - int (*enable)(struct i915_hw_ppgtt *ppgtt); int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); @@ -409,9 +414,9 @@ struct i915_hw_ppgtt { (pt = (pd)->page_table[iter], true); \ ++iter) -static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) +static inline u32 i915_pte_index(u64 address, unsigned int pde_shift) { - const uint32_t mask = NUM_PTE(pde_shift) - 1; + const u32 mask = NUM_PTE(pde_shift) - 1; return (address >> PAGE_SHIFT) & mask; } @@ -420,11 +425,10 @@ static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) * does not cross a page table boundary, so the max value would be * GEN6_PTES for GEN6, and GEN8_PTES for GEN8. */ -static inline uint32_t i915_pte_count(uint64_t addr, size_t length, - uint32_t pde_shift) +static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift) { - const uint64_t mask = ~((1ULL << pde_shift) - 1); - uint64_t end; + const u64 mask = ~((1ULL << pde_shift) - 1); + u64 end; WARN_ON(length == 0); WARN_ON(offset_in_page(addr|length)); @@ -437,26 +441,35 @@ static inline uint32_t i915_pte_count(uint64_t addr, size_t length, return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift); } -static inline uint32_t i915_pde_index(uint64_t addr, uint32_t shift) +static inline u32 i915_pde_index(u64 addr, u32 shift) { return (addr >> shift) & I915_PDE_MASK; } -static inline uint32_t gen6_pte_index(uint32_t addr) +static inline u32 gen6_pte_index(u32 addr) { return i915_pte_index(addr, GEN6_PDE_SHIFT); } -static inline size_t gen6_pte_count(uint32_t addr, uint32_t length) +static inline u32 gen6_pte_count(u32 addr, u32 length) { return i915_pte_count(addr, length, GEN6_PDE_SHIFT); } -static inline uint32_t gen6_pde_index(uint32_t addr) +static inline u32 gen6_pde_index(u32 addr) { return i915_pde_index(addr, GEN6_PDE_SHIFT); } +static inline unsigned int +i915_pdpes_per_pdp(const struct i915_address_space *vm) +{ + if (i915_vm_is_48bit(vm)) + return GEN8_PML4ES_PER_PML4; + + return GEN8_3LVL_PDPES; +} + /* Equivalent to the gen6 version, For each pde iterates over every pde * between from start until start + length. On gen8+ it simply iterates * over every page directory entry in a page directory. @@ -471,7 +484,7 @@ static inline uint32_t gen6_pde_index(uint32_t addr) #define gen8_for_each_pdpe(pd, pdp, start, length, iter) \ for (iter = gen8_pdpe_index(start); \ - length > 0 && iter < I915_PDPES_PER_PDP(dev) && \ + length > 0 && iter < i915_pdpes_per_pdp(vm) && \ (pd = (pdp)->page_directory[iter], true); \ ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT); \ temp = min(temp - start, length); \ @@ -485,27 +498,27 @@ static inline uint32_t gen6_pde_index(uint32_t addr) temp = min(temp - start, length); \ start += temp, length -= temp; }), ++iter) -static inline uint32_t gen8_pte_index(uint64_t address) +static inline u32 gen8_pte_index(u64 address) { return i915_pte_index(address, GEN8_PDE_SHIFT); } -static inline uint32_t gen8_pde_index(uint64_t address) +static inline u32 gen8_pde_index(u64 address) { return i915_pde_index(address, GEN8_PDE_SHIFT); } -static inline uint32_t gen8_pdpe_index(uint64_t address) +static inline u32 gen8_pdpe_index(u64 address) { return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK; } -static inline uint32_t gen8_pml4e_index(uint64_t address) +static inline u32 gen8_pml4e_index(u64 address) { return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; } -static inline size_t gen8_pte_count(uint64_t address, uint64_t length) +static inline u64 gen8_pte_count(u64 address, u64 length) { return i915_pte_count(address, length, GEN8_PDE_SHIFT); } @@ -513,9 +526,7 @@ static inline size_t gen8_pte_count(uint64_t address, uint64_t length) static inline dma_addr_t i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) { - return test_bit(n, ppgtt->pdp.used_pdpes) ? - px_dma(ppgtt->pdp.page_directory[n]) : - px_dma(ppgtt->base.scratch_pd); + return px_dma(ppgtt->pdp.page_directory[n]); } static inline struct i915_ggtt * @@ -525,6 +536,9 @@ i915_vm_to_ggtt(struct i915_address_space *vm) return container_of(vm, struct i915_ggtt, base); } +int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915); +void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915); + int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 933019e1b206..fc950abbe400 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -35,8 +35,10 @@ static void internal_free_pages(struct sg_table *st) { struct scatterlist *sg; - for (sg = st->sgl; sg; sg = __sg_next(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } sg_free_table(st); kfree(st); @@ -133,6 +135,7 @@ create_st: return st; err: + sg_set_page(sg, NULL, 0, 0); sg_mark_end(sg); internal_free_pages(st); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index bf90b07163d1..33b0dc4782a9 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -33,6 +33,8 @@ #include <drm/i915_drm.h> +#include "i915_selftest.h" + struct drm_i915_gem_object_ops { unsigned int flags; #define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 @@ -84,6 +86,7 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + I915_SELFTEST_DECLARE(struct list_head st_link); unsigned long flags; @@ -162,19 +165,23 @@ struct drm_i915_gem_object { struct reservation_object *resv; /** References from framebuffers, locks out tiling changes. */ - unsigned long framebuffer_references; + unsigned int framebuffer_references; /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; - struct i915_gem_userptr { - uintptr_t ptr; - unsigned read_only :1; + union { + struct i915_gem_userptr { + uintptr_t ptr; + unsigned read_only :1; + + struct i915_mm_struct *mm; + struct i915_mmu_object *mmu_object; + struct work_struct *work; + } userptr; - struct i915_mm_struct *mm; - struct i915_mmu_object *mmu_object; - struct work_struct *work; - } userptr; + unsigned long scratch; + }; /** for phys allocated objects */ struct drm_dma_handle *phys_handle; @@ -253,6 +260,16 @@ extern void drm_gem_object_unreference(struct drm_gem_object *); __deprecated extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); +static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) +{ + reservation_object_lock(obj->resv, NULL); +} + +static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) +{ + reservation_object_unlock(obj->resv); +} + static inline bool i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) { @@ -299,6 +316,12 @@ i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); +static inline bool +i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj) +{ + return READ_ONCE(obj->framebuffer_references); +} + static inline unsigned int i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) { @@ -357,5 +380,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) return engine; } +void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); + #endif diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index e7c3c0318ff6..1e1d9f2072cd 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -72,7 +72,6 @@ static void i915_fence_release(struct dma_fence *fence) * caught trying to reuse dead objects. */ i915_sw_fence_fini(&req->submit); - i915_sw_fence_fini(&req->execute); kmem_cache_free(req->i915->requests, req); } @@ -86,42 +85,20 @@ const struct dma_fence_ops i915_fence_ops = { .release = i915_fence_release, }; -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file) -{ - struct drm_i915_private *dev_private; - struct drm_i915_file_private *file_priv; - - WARN_ON(!req || !file || req->file_priv); - - if (!req || !file) - return -EINVAL; - - if (req->file_priv) - return -EINVAL; - - dev_private = req->i915; - file_priv = file->driver_priv; - - spin_lock(&file_priv->mm.lock); - req->file_priv = file_priv; - list_add_tail(&req->client_list, &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); - - return 0; -} - static inline void i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) { - struct drm_i915_file_private *file_priv = request->file_priv; + struct drm_i915_file_private *file_priv; + file_priv = request->file_priv; if (!file_priv) return; spin_lock(&file_priv->mm.lock); - list_del(&request->client_list); - request->file_priv = NULL; + if (request->file_priv) { + list_del(&request->client_link); + request->file_priv = NULL; + } spin_unlock(&file_priv->mm.lock); } @@ -201,6 +178,92 @@ i915_priotree_init(struct i915_priotree *pt) pt->priority = INT_MIN; } +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) +{ + struct i915_gem_timeline *timeline = &i915->gt.global_timeline; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + /* Carefully retire all requests without writing to the rings */ + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret) + return ret; + + i915_gem_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests > 1); + + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ + for_each_engine(engine, i915, id) { + struct intel_timeline *tl = &timeline->engine[id]; + + if (wait_for(intel_engine_is_idle(engine), 50)) + return -EBUSY; + + if (!i915_seqno_passed(seqno, tl->seqno)) { + /* spin until threads are complete */ + while (intel_breadcrumbs_busy(engine)) + cond_resched(); + } + + /* Finally reset hw state */ + tl->seqno = seqno; + intel_engine_init_global_seqno(engine, seqno); + } + + list_for_each_entry(timeline, &i915->gt.timelines, link) { + for_each_engine(engine, i915, id) { + struct intel_timeline *tl = &timeline->engine[id]; + + memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); + } + } + + return 0; +} + +int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (seqno == 0) + return -EINVAL; + + /* HWS page needs to be set less than what we + * will inject to ring + */ + return reset_all_global_seqno(dev_priv, seqno - 1); +} + +static int reserve_seqno(struct intel_engine_cs *engine) +{ + u32 active = ++engine->timeline->inflight_seqnos; + u32 seqno = engine->timeline->seqno; + int ret; + + /* Reservation is fine until we need to wrap around */ + if (likely(!add_overflows(seqno, active))) + return 0; + + ret = reset_all_global_seqno(engine->i915, 0); + if (ret) { + engine->timeline->inflight_seqnos--; + return ret; + } + + return 0; +} + +static void unreserve_seqno(struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!engine->timeline->inflight_seqnos); + engine->timeline->inflight_seqnos--; +} + void i915_gem_retire_noop(struct i915_gem_active *active, struct drm_i915_gem_request *request) { @@ -214,7 +277,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); - GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute)); GEM_BUG_ON(!i915_gem_request_completed(request)); GEM_BUG_ON(!request->i915->gt.active_requests); @@ -240,6 +302,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) &request->i915->gt.idle_work, msecs_to_jiffies(100)); } + unreserve_seqno(request->engine); /* Walk through the active list, calling retire on each. This allows * objects to track their GPU activity and mark themselves as idle @@ -310,88 +373,9 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) } while (tmp != req); } -static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) -{ - struct i915_gem_timeline *timeline = &i915->gt.global_timeline; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int ret; - - /* Carefully retire all requests without writing to the rings */ - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); - if (ret) - return ret; - - i915_gem_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests > 1); - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) { - while (intel_breadcrumbs_busy(i915)) - cond_resched(); /* spin until threads are complete */ - } - atomic_set(&timeline->seqno, seqno); - - /* Finally reset hw state */ - for_each_engine(engine, i915, id) - intel_engine_init_global_seqno(engine, seqno); - - list_for_each_entry(timeline, &i915->gt.timelines, link) { - for_each_engine(engine, i915, id) { - struct intel_timeline *tl = &timeline->engine[id]; - - memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); - } - } - - return 0; -} - -int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) +static u32 timeline_get_seqno(struct intel_timeline *tl) { - struct drm_i915_private *dev_priv = to_i915(dev); - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - return i915_gem_init_global_seqno(dev_priv, seqno - 1); -} - -static int reserve_global_seqno(struct drm_i915_private *i915) -{ - u32 active_requests = ++i915->gt.active_requests; - u32 seqno = atomic_read(&i915->gt.global_timeline.seqno); - int ret; - - /* Reservation is fine until we need to wrap around */ - if (likely(seqno + active_requests > seqno)) - return 0; - - ret = i915_gem_init_global_seqno(i915, 0); - if (ret) { - i915->gt.active_requests--; - return ret; - } - - return 0; -} - -static u32 __timeline_get_seqno(struct i915_gem_timeline *tl) -{ - /* seqno only incremented under a mutex */ - return ++tl->seqno.counter; -} - -static u32 timeline_get_seqno(struct i915_gem_timeline *tl) -{ - return atomic_inc_return(&tl->seqno); + return ++tl->seqno; } void __i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -400,19 +384,19 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) struct intel_timeline *timeline; u32 seqno; + GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->timeline->lock); + + trace_i915_gem_request_execute(request); + /* Transfer from per-context onto the global per-engine timeline */ timeline = engine->timeline; GEM_BUG_ON(timeline == request->timeline); - assert_spin_locked(&timeline->lock); - seqno = timeline_get_seqno(timeline->common); + seqno = timeline_get_seqno(timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno)); - request->previous_seqno = timeline->last_submitted_seqno; - timeline->last_submitted_seqno = seqno; - /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); request->global_seqno = seqno; @@ -420,7 +404,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) intel_engine_enable_signaling(request); spin_unlock(&request->lock); - GEM_BUG_ON(!request->global_seqno); engine->emit_breadcrumb(request, request->ring->vaddr + request->postfix); @@ -428,7 +411,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); - i915_sw_fence_commit(&request->execute); + wake_up_all(&request->execute); } void i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -444,33 +427,66 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static int __i915_sw_fence_call -submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) { - struct drm_i915_gem_request *request = - container_of(fence, typeof(*request), submit); + struct intel_engine_cs *engine = request->engine; + struct intel_timeline *timeline; - switch (state) { - case FENCE_COMPLETE: - request->engine->submit_request(request); - break; + GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->timeline->lock); - case FENCE_FREE: - i915_gem_request_put(request); - break; - } + /* Only unwind in reverse order, required so that the per-context list + * is kept in seqno/ring order. + */ + GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + engine->timeline->seqno--; - return NOTIFY_DONE; + /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = 0; + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_cancel_signaling(request); + spin_unlock(&request->lock); + + /* Transfer back from the global per-engine timeline to per-context */ + timeline = request->timeline; + GEM_BUG_ON(timeline == engine->timeline); + + spin_lock(&timeline->lock); + list_move(&request->link, &timeline->requests); + spin_unlock(&timeline->lock); + + /* We don't need to wake_up any waiters on request->execute, they + * will get woken by any other event or us re-adding this request + * to the engine timeline (__i915_gem_request_submit()). The waiters + * should be quite adapt at finding that the request now has a new + * global_seqno to the one they went to sleep on. + */ +} + +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); + + __i915_gem_request_unsubmit(request); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); } static int __i915_sw_fence_call -execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct drm_i915_gem_request *request = - container_of(fence, typeof(*request), execute); + container_of(fence, typeof(*request), submit); switch (state) { case FENCE_COMPLETE: + trace_i915_gem_request_submit(request); + request->engine->submit_request(request); break; case FENCE_FREE: @@ -517,14 +533,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) return ERR_PTR(ret); - ret = reserve_global_seqno(dev_priv); + ret = reserve_seqno(engine); if (ret) goto err_unpin; /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); - if (req && __i915_gem_request_completed(req)) + if (req && i915_gem_request_completed(req)) i915_gem_request_retire(req); /* Beware: Dragons be flying overhead. @@ -569,17 +585,11 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, &i915_fence_ops, &req->lock, req->timeline->fence_context, - __timeline_get_seqno(req->timeline->common)); + timeline_get_seqno(req->timeline)); /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); - i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify); - - /* Ensure that the execute fence completes after the submit fence - - * as we complete the execute fence from within the submit fence - * callback, its completion would otherwise be visible first. - */ - i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq); + init_waitqueue_head(&req->execute); i915_priotree_init(&req->priotree); @@ -614,6 +624,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, */ req->head = req->ring->tail; + /* Check that we didn't interrupt ourselves with a new request */ + GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); return req; err_ctx: @@ -624,7 +636,7 @@ err_ctx: kmem_cache_free(dev_priv->requests, req); err_unreserve: - dev_priv->gt.active_requests--; + unreserve_seqno(engine); err_unpin: engine->context_unpin(engine, ctx); return ERR_PTR(ret); @@ -634,6 +646,7 @@ static int i915_gem_request_await_request(struct drm_i915_gem_request *to, struct drm_i915_gem_request *from) { + u32 seqno; int ret; GEM_BUG_ON(to == from); @@ -656,14 +669,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret < 0 ? ret : 0; } - if (!from->global_seqno) { + seqno = i915_gem_request_global_seqno(from); + if (!seqno) { ret = i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, GFP_KERNEL); return ret < 0 ? ret : 0; } - if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id]) + if (seqno <= to->timeline->sync_seqno[from->engine->id]) return 0; trace_i915_gem_ring_sync_to(to, from); @@ -681,7 +695,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret; } - to->timeline->sync_seqno[from->engine->id] = from->global_seqno; + to->timeline->sync_seqno[from->engine->id] = seqno; return 0; } @@ -827,6 +841,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) struct intel_ring *ring = request->ring; struct intel_timeline *timeline = request->timeline; struct drm_i915_gem_request *prev; + u32 *cs; int err; lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -836,8 +851,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * our i915_gem_request_alloc() and called __i915_add_request() before * us, the timeline will hold its seqno which is later than ours. */ - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); + GEM_BUG_ON(timeline->seqno != request->fence.seqno); /* * To ensure that this call will not fail, space for its emissions @@ -865,10 +879,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - err = intel_ring_begin(request, engine->emit_breadcrumb_sz); - GEM_BUG_ON(err); - request->postfix = ring->tail; - ring->tail += engine->emit_breadcrumb_sz * sizeof(u32); + cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); + GEM_BUG_ON(IS_ERR(cs)); + request->postfix = intel_ring_offset(request, cs); /* Seal the request and mark it as pending execution. Note that * we may inspect this state, without holding any locks, during @@ -892,16 +905,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) list_add_tail(&request->link, &timeline->requests); spin_unlock_irq(&timeline->lock); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); - - timeline->last_submitted_seqno = request->fence.seqno; + GEM_BUG_ON(timeline->seqno != request->fence.seqno); i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); request->emitted_jiffies = jiffies; - i915_gem_mark_busy(engine); + if (!request->i915->gt.active_requests++) + i915_gem_mark_busy(engine); /* Let the backend know a new request has arrived that may need * to adjust the existing execution schedule due to a high priority @@ -921,16 +932,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ } -static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) -{ - unsigned long flags; - - spin_lock_irqsave(&q->lock, flags); - if (list_empty(&wait->task_list)) - __add_wait_queue(q, wait); - spin_unlock_irqrestore(&q->lock, flags); -} - static unsigned long local_clock_us(unsigned int *cpu) { unsigned long t; @@ -964,9 +965,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) } bool __i915_spin_request(const struct drm_i915_gem_request *req, - int state, unsigned long timeout_us) + u32 seqno, int state, unsigned long timeout_us) { - unsigned int cpu; + struct intel_engine_cs *engine = req->engine; + unsigned int irq, cpu; /* When waiting for high frequency requests, e.g. during synchronous * rendering split between the CPU and GPU, the finite amount of time @@ -978,11 +980,24 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, * takes to sleep on a request, on the order of a microsecond. */ + irq = atomic_read(&engine->irq_count); timeout_us += local_clock_us(&cpu); do { - if (__i915_gem_request_completed(req)) + if (seqno != i915_gem_request_global_seqno(req)) + break; + + if (i915_seqno_passed(intel_engine_get_seqno(req->engine), + seqno)) return true; + /* Seqno are meant to be ordered *before* the interrupt. If + * we see an interrupt without a corresponding seqno advance, + * assume we won't see one in the near future but require + * the engine->seqno_barrier() to fixup coherency. + */ + if (atomic_read(&engine->irq_count) != irq) + break; + if (signal_pending_state(state, current)) break; @@ -995,52 +1010,14 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, return false; } -static long -__i915_request_wait_for_execute(struct drm_i915_gem_request *request, - unsigned int flags, - long timeout) +static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *request) { - const int state = flags & I915_WAIT_INTERRUPTIBLE ? - TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - wait_queue_head_t *q = &request->i915->gpu_error.wait_queue; - DEFINE_WAIT(reset); - DEFINE_WAIT(wait); - - if (flags & I915_WAIT_LOCKED) - add_wait_queue(q, &reset); - - do { - prepare_to_wait(&request->execute.wait, &wait, state); - - if (i915_sw_fence_done(&request->execute)) - break; - - if (flags & I915_WAIT_LOCKED && - i915_reset_in_progress(&request->i915->gpu_error)) { - __set_current_state(TASK_RUNNING); - i915_reset(request->i915); - reset_wait_queue(q, &reset); - continue; - } - - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - break; - } - - if (!timeout) { - timeout = -ETIME; - break; - } - - timeout = io_schedule_timeout(timeout); - } while (1); - finish_wait(&request->execute.wait, &wait); - - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(q, &reset); + if (likely(!i915_reset_in_progress(&request->i915->gpu_error))) + return false; - return timeout; + __set_current_state(TASK_RUNNING); + i915_reset(request->i915); + return true; } /** @@ -1068,7 +1045,9 @@ long i915_wait_request(struct drm_i915_gem_request *req, { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - DEFINE_WAIT(reset); + wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; + DEFINE_WAIT_FUNC(reset, default_wake_function); + DEFINE_WAIT_FUNC(exec, default_wake_function); struct intel_wait wait; might_sleep(); @@ -1085,27 +1064,45 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (!timeout) return -ETIME; - trace_i915_gem_request_wait_begin(req); + trace_i915_gem_request_wait_begin(req, flags); + + add_wait_queue(&req->execute, &exec); + if (flags & I915_WAIT_LOCKED) + add_wait_queue(errq, &reset); + + intel_wait_init(&wait, req); + +restart: + do { + set_current_state(state); + if (intel_wait_update_request(&wait, req)) + break; + + if (flags & I915_WAIT_LOCKED && + __i915_wait_request_check_and_reset(req)) + continue; - if (!i915_sw_fence_done(&req->execute)) { - timeout = __i915_request_wait_for_execute(req, flags, timeout); - if (timeout < 0) + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; goto complete; + } - GEM_BUG_ON(!i915_sw_fence_done(&req->execute)); - } - GEM_BUG_ON(!i915_sw_fence_done(&req->submit)); - GEM_BUG_ON(!req->global_seqno); + if (!timeout) { + timeout = -ETIME; + goto complete; + } + + timeout = io_schedule_timeout(timeout); + } while (1); + + GEM_BUG_ON(!intel_wait_has_seqno(&wait)); + GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); /* Optimistic short spin before touching IRQs */ if (i915_spin_request(req, state, 5)) goto complete; set_current_state(state); - if (flags & I915_WAIT_LOCKED) - add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_wait_init(&wait, req->global_seqno); if (intel_engine_add_wait(req->engine, &wait)) /* In order to check that we haven't missed the interrupt * as we enabled it, we need to kick ourselves to do a @@ -1113,6 +1110,9 @@ long i915_wait_request(struct drm_i915_gem_request *req, */ goto wakeup; + if (flags & I915_WAIT_LOCKED) + __i915_wait_request_check_and_reset(req); + for (;;) { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; @@ -1126,7 +1126,8 @@ long i915_wait_request(struct drm_i915_gem_request *req, timeout = io_schedule_timeout(timeout); - if (intel_wait_complete(&wait)) + if (intel_wait_complete(&wait) && + intel_wait_check_request(&wait, req)) break; set_current_state(state); @@ -1151,25 +1152,25 @@ wakeup: * itself, or indirectly by recovering the GPU). */ if (flags & I915_WAIT_LOCKED && - i915_reset_in_progress(&req->i915->gpu_error)) { - __set_current_state(TASK_RUNNING); - i915_reset(req->i915); - reset_wait_queue(&req->i915->gpu_error.wait_queue, - &reset); + __i915_wait_request_check_and_reset(req)) continue; - } /* Only spin if we know the GPU is processing this request */ if (i915_spin_request(req, state, 2)) break; + + if (!intel_wait_check_request(&wait, req)) { + intel_engine_remove_wait(req->engine, &wait); + goto restart; + } } intel_engine_remove_wait(req->engine, &wait); - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - __set_current_state(TASK_RUNNING); - complete: + __set_current_state(TASK_RUNNING); + if (flags & I915_WAIT_LOCKED) + remove_wait_queue(errq, &reset); + remove_wait_queue(&req->execute, &exec); trace_i915_gem_request_wait_end(req); return timeout; @@ -1178,14 +1179,21 @@ complete: static void engine_retire_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request, *next; + u32 seqno = intel_engine_get_seqno(engine); + LIST_HEAD(retire); + spin_lock_irq(&engine->timeline->lock); list_for_each_entry_safe(request, next, &engine->timeline->requests, link) { - if (!__i915_gem_request_completed(request)) - return; + if (!i915_seqno_passed(seqno, request->global_seqno)) + break; - i915_gem_request_retire(request); + list_move_tail(&request->link, &retire); } + spin_unlock_irq(&engine->timeline->lock); + + list_for_each_entry_safe(request, next, &retire, link) + i915_gem_request_retire(request); } void i915_gem_retire_requests(struct drm_i915_private *dev_priv) @@ -1201,3 +1209,8 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) engine_retire_requests(engine); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_request.c" +#include "selftests/i915_gem_request.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index ea511f06efaf..5018e55922f0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -32,10 +32,12 @@ struct drm_file; struct drm_i915_gem_object; +struct drm_i915_gem_request; struct intel_wait { struct rb_node node; struct task_struct *tsk; + struct drm_i915_gem_request *request; u32 seqno; }; @@ -119,18 +121,10 @@ struct drm_i915_gem_request { * The submit fence is used to await upon all of the request's * dependencies. When it is signaled, the request is ready to run. * It is used by the driver to then queue the request for execution. - * - * The execute fence is used to signal when the request has been - * sent to hardware. - * - * It is illegal for the submit fence of one request to wait upon the - * execute fence of an earlier request. It should be sufficient to - * wait upon the submit fence of the earlier request. */ struct i915_sw_fence submit; - struct i915_sw_fence execute; wait_queue_t submitq; - wait_queue_t execq; + wait_queue_head_t execute; /* A list of everyone we wait upon, and everyone who waits upon us. * Even though we will not be submitted to the hardware before the @@ -143,13 +137,12 @@ struct drm_i915_gem_request { struct i915_priotree priotree; struct i915_dependency dep; - u32 global_seqno; - - /** GEM sequence number associated with the previous request, - * when the HWS breadcrumb is equal to this the GPU is processing - * this request. + /** GEM sequence number associated with this request on the + * global execution timeline. It is zero when the request is not + * on the HW queue (i.e. not on the engine timeline list). + * Its value is guarded by the timeline spinlock. */ - u32 previous_seqno; + u32 global_seqno; /** Position in the ring of the start of the request */ u32 head; @@ -187,7 +180,7 @@ struct drm_i915_gem_request { struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ - struct list_head client_list; + struct list_head client_link; }; extern const struct dma_fence_ops i915_fence_ops; @@ -200,8 +193,6 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence) struct drm_i915_gem_request * __must_check i915_gem_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx); -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file); void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); static inline struct drm_i915_gem_request * @@ -243,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, *pdst = src; } +/** + * i915_gem_request_global_seqno - report the current global seqno + * @request - the request + * + * A request is assigned a global seqno only when it is on the hardware + * execution queue. The global seqno can be used to maintain a list of + * requests on the same engine in retirement order, for example for + * constructing a priority queue for waiting. Prior to its execution, or + * if it is subsequently removed in the event of preemption, its global + * seqno is zero. As both insertion and removal from the execution queue + * may operate in IRQ context, it is not guarded by the usual struct_mutex + * BKL. Instead those relying on the global seqno must be prepared for its + * value to change between reads. Only when the request is complete can + * the global seqno be stable (due to the memory barriers on submitting + * the commands to the hardware to write the breadcrumb, if the HWS shows + * that it has passed the global seqno and the global seqno is unchanged + * after the read, it is indeed complete). + */ +static u32 +i915_gem_request_global_seqno(const struct drm_i915_gem_request *request) +{ + return READ_ONCE(request->global_seqno); +} + int i915_gem_request_await_object(struct drm_i915_gem_request *to, struct drm_i915_gem_object *obj, @@ -259,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); void __i915_gem_request_submit(struct drm_i915_gem_request *request); void i915_gem_request_submit(struct drm_i915_gem_request *request); +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request); +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request); + struct intel_rps_client; #define NO_WAITBOOST ERR_PTR(-1) #define IS_RPS_CLIENT(p) (!IS_ERR(p)) @@ -283,46 +301,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) } static inline bool -__i915_gem_request_started(const struct drm_i915_gem_request *req) +__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno) { - GEM_BUG_ON(!req->global_seqno); + GEM_BUG_ON(!seqno); return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->previous_seqno); + seqno - 1); } static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) { - if (!req->global_seqno) + u32 seqno; + + seqno = i915_gem_request_global_seqno(req); + if (!seqno) return false; - return __i915_gem_request_started(req); + return __i915_gem_request_started(req, seqno); } static inline bool -__i915_gem_request_completed(const struct drm_i915_gem_request *req) +__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno) { - GEM_BUG_ON(!req->global_seqno); - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->global_seqno); + GEM_BUG_ON(!seqno); + return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) && + seqno == i915_gem_request_global_seqno(req); } static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) { - if (!req->global_seqno) + u32 seqno; + + seqno = i915_gem_request_global_seqno(req); + if (!seqno) return false; - return __i915_gem_request_completed(req); + return __i915_gem_request_completed(req, seqno); } bool __i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us); + u32 seqno, int state, unsigned long timeout_us); static inline bool i915_spin_request(const struct drm_i915_gem_request *request, int state, unsigned long timeout_us) { - return (__i915_gem_request_started(request) && - __i915_spin_request(request, state, timeout_us)); + u32 seqno; + + seqno = i915_gem_request_global_seqno(request); + return (__i915_gem_request_started(request, seqno) && + __i915_spin_request(request, seqno, state, timeout_us)); } /* We treat requests as fences. This is not be to confused with our diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 401006b4c6a3..006a8b908f77 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -207,7 +207,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!(flags & I915_SHRINK_ACTIVE) && (i915_gem_object_is_active(obj) || - obj->framebuffer_references)) + i915_gem_object_is_framebuffer(obj))) continue; if (!can_release_pages(obj)) @@ -259,10 +259,13 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) { unsigned long freed; + intel_runtime_pm_get(dev_priv); freed = i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE); + intel_runtime_pm_put(dev_priv); + rcu_barrier(); /* wait until our RCU delayed slab frees are completed */ return freed; @@ -380,9 +383,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000)) return NOTIFY_DONE; - intel_runtime_pm_get(dev_priv); freed_pages = i915_gem_shrink_all(dev_priv); - intel_runtime_pm_put(dev_priv); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 9673bcc3b6ad..f3abdc27c5dd 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -79,12 +79,12 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->mm.stolen_lock); } -static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) +static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; struct i915_ggtt *ggtt = &dev_priv->ggtt; struct resource *r; - u32 base; + dma_addr_t base; /* Almost universally we can find the Graphics Base of Stolen Memory * at register BSM (0x5c) in the igfx configuration space. On a few @@ -189,14 +189,14 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) base = tom - tseg_size - ggtt->stolen_size; } - if (base == 0) + if (base == 0 || add_overflows(base, ggtt->stolen_size)) return 0; /* make sure we don't clobber the GTT if it's within stolen memory */ if (INTEL_GEN(dev_priv) <= 4 && !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { struct { - u32 start, end; + dma_addr_t start, end; } stolen[2] = { { .start = base, .end = base + ggtt->stolen_size, }, { .start = base, .end = base + ggtt->stolen_size, }, @@ -228,11 +228,13 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) if (stolen[0].start != stolen[1].start || stolen[0].end != stolen[1].end) { + dma_addr_t end = base + ggtt->stolen_size - 1; + DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n", (unsigned long long)ggtt_start, (unsigned long long)ggtt_end - 1); - DRM_DEBUG_KMS("Stolen memory adjusted to 0x%x-0x%x\n", - base, base + (u32)ggtt->stolen_size - 1); + DRM_DEBUG_KMS("Stolen memory adjusted to %pad - %pad\n", + &base, &end); } } @@ -261,8 +263,10 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) * range. Apparently this works. */ if (r == NULL && !IS_GEN3(dev_priv)) { - DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n", - base, base + (uint32_t)ggtt->stolen_size); + dma_addr_t end = base + ggtt->stolen_size; + + DRM_ERROR("conflict detected with stolen region: [%pad - %pad]\n", + &base, &end); base = 0; } } @@ -281,13 +285,13 @@ void i915_gem_cleanup_stolen(struct drm_device *dev) } static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ? CTG_STOLEN_RESERVED : ELK_STOLEN_RESERVED); - phys_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; + dma_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; @@ -304,7 +308,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -330,7 +334,7 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -350,7 +354,7 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -376,11 +380,11 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); - phys_addr_t stolen_top; + dma_addr_t stolen_top; stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; @@ -399,7 +403,7 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - phys_addr_t reserved_base, stolen_top; + dma_addr_t reserved_base, stolen_top; u32 reserved_total, reserved_size; u32 stolen_usable_start; @@ -420,7 +424,7 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (ggtt->stolen_size == 0) return 0; - dev_priv->mm.stolen_base = i915_stolen_to_physical(dev_priv); + dev_priv->mm.stolen_base = i915_stolen_to_dma(dev_priv); if (dev_priv->mm.stolen_base == 0) return 0; @@ -469,8 +473,8 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (reserved_base < dev_priv->mm.stolen_base || reserved_base + reserved_size > stolen_top) { - phys_addr_t reserved_top = reserved_base + reserved_size; - DRM_DEBUG_KMS("Stolen reserved area [%pa - %pa] outside stolen memory [%pa - %pa]\n", + dma_addr_t reserved_top = reserved_base + reserved_size; + DRM_DEBUG_KMS("Stolen reserved area [%pad - %pad] outside stolen memory [%pad - %pad]\n", &reserved_base, &reserved_top, &dev_priv->mm.stolen_base, &stolen_top); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 974ac08df473..a0d6d4317a49 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -158,13 +158,8 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, if (stride > 8192) return false; - if (IS_GEN3(i915)) { - if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 20) - return false; - } else { - if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 19) - return false; - } + if (!is_power_of_2(stride)) + return false; } if (IS_GEN2(i915) || @@ -176,12 +171,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, if (!stride || !IS_ALIGNED(stride, tile_width)) return false; - /* 965+ just needs multiples of tile width */ - if (INTEL_GEN(i915) >= 4) - return true; - - /* Pre-965 needs power of two tile widths */ - return is_power_of_2(stride); + return true; } static bool i915_vma_fence_prepare(struct i915_vma *vma, @@ -248,7 +238,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, if ((tiling | stride) == obj->tiling_and_stride) return 0; - if (obj->framebuffer_references) + if (i915_gem_object_is_framebuffer(obj)) return -EBUSY; /* We need to rebind the object if its current allocation @@ -268,6 +258,12 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, if (err) return err; + i915_gem_object_lock(obj); + if (i915_gem_object_is_framebuffer(obj)) { + i915_gem_object_unlock(obj); + return -EBUSY; + } + /* If the memory has unknown (i.e. varying) swizzling, we pin the * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. @@ -304,6 +300,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, } obj->tiling_and_stride = tiling | stride; + i915_gem_object_unlock(obj); /* Force the fence to be reacquired for GTT access */ i915_gem_release_mmap(obj); diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index f2e51f42cc2f..6c53e14cab2a 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -33,7 +33,13 @@ struct i915_gem_timeline; struct intel_timeline { u64 fence_context; - u32 last_submitted_seqno; + u32 seqno; + + /** + * Count of outstanding requests, from the time they are constructed + * to the moment they are retired. Loosely coupled to hardware. + */ + u32 inflight_seqnos; spinlock_t lock; @@ -56,7 +62,6 @@ struct intel_timeline { struct i915_gem_timeline { struct list_head link; - atomic_t seqno; struct drm_i915_private *i915; const char *name; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9cd22cda17af..8effc59f5cb5 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -342,7 +342,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, } static void error_print_instdone(struct drm_i915_error_state_buf *m, - struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee) { int slice; int subslice; @@ -372,7 +372,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, static void error_print_request(struct drm_i915_error_state_buf *m, const char *prefix, - struct drm_i915_error_request *erq) + const struct drm_i915_error_request *erq) { if (!erq->seqno) return; @@ -384,8 +384,17 @@ static void error_print_request(struct drm_i915_error_state_buf *m, erq->head, erq->tail); } +static void error_print_context(struct drm_i915_error_state_buf *m, + const char *header, + const struct drm_i915_error_context *ctx) +{ + err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n", + header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, + ctx->ban_score, ctx->guilty, ctx->active); +} + static void error_print_engine(struct drm_i915_error_state_buf *m, - struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee) { err_printf(m, "%s command stream:\n", engine_str(ee->engine_id)); err_printf(m, " START: 0x%08x\n", ee->start); @@ -457,6 +466,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, error_print_request(m, " ELSP[0]: ", &ee->execlist[0]); error_print_request(m, " ELSP[1]: ", &ee->execlist[1]); + error_print_context(m, " Active context: ", &ee->context); } void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) @@ -536,21 +546,57 @@ static void err_print_capabilities(struct drm_i915_error_state_buf *m, #undef PRINT_FLAG } +static __always_inline void err_print_param(struct drm_i915_error_state_buf *m, + const char *name, + const char *type, + const void *x) +{ + if (!__builtin_strcmp(type, "bool")) + err_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); + else if (!__builtin_strcmp(type, "int")) + err_printf(m, "i915.%s=%d\n", name, *(const int *)x); + else if (!__builtin_strcmp(type, "unsigned int")) + err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "char *")) + err_printf(m, "i915.%s=%s\n", name, *(const char **)x); + else + BUILD_BUG(); +} + +static void err_print_params(struct drm_i915_error_state_buf *m, + const struct i915_params *p) +{ +#define PRINT(T, x) err_print_param(m, #x, #T, &p->x); + I915_PARAMS_FOR_EACH(PRINT); +#undef PRINT +} + +static void err_print_pciid(struct drm_i915_error_state_buf *m, + struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + err_printf(m, "PCI ID: 0x%04x\n", pdev->device); + err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision); + err_printf(m, "PCI Subsystem: %04x:%04x\n", + pdev->subsystem_vendor, + pdev->subsystem_device); +} + int i915_error_state_to_str(struct drm_i915_error_state_buf *m, - const struct i915_error_state_file_priv *error_priv) + const struct i915_gpu_state *error) { - struct drm_i915_private *dev_priv = error_priv->i915; - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_i915_error_state *error = error_priv->error; + struct drm_i915_private *dev_priv = m->i915; struct drm_i915_error_object *obj; int i, j; if (!error) { - err_printf(m, "no error state collected\n"); - goto out; + err_printf(m, "No error state collected\n"); + return 0; } - err_printf(m, "%s\n", error->error_msg); + if (*error->error_msg) + err_printf(m, "%s\n", error->error_msg); err_printf(m, "Kernel: " UTS_RELEASE "\n"); err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, error->time.tv_usec); @@ -558,26 +604,22 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->boottime.tv_sec, error->boottime.tv_usec); err_printf(m, "Uptime: %ld s %ld us\n", error->uptime.tv_sec, error->uptime.tv_usec); - err_print_capabilities(m, &error->device_info); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_stalled && - error->engine[i].pid != -1) { - err_printf(m, "Active process (on ring %s): %s [%d], context bans %d\n", + error->engine[i].context.pid) { + err_printf(m, "Active process (on ring %s): %s [%d], score %d\n", engine_str(i), - error->engine[i].comm, - error->engine[i].pid, - error->engine[i].context_bans); + error->engine[i].context.comm, + error->engine[i].context.pid, + error->engine[i].context.ban_score); } } err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Suspend count: %u\n", error->suspend_count); err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform)); - err_printf(m, "PCI ID: 0x%04x\n", pdev->device); - err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision); - err_printf(m, "PCI Subsystem: %04x:%04x\n", - pdev->subsystem_vendor, - pdev->subsystem_device); + err_print_pciid(m, error->i915); + err_printf(m, "IOMMU enabled?: %d\n", error->iommu); if (HAS_CSR(dev_priv)) { @@ -590,21 +632,20 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, CSR_VERSION_MINOR(csr->version)); } + err_printf(m, "GT awake: %s\n", yesno(error->awake)); + err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock)); + err_printf(m, "PM suspended: %s\n", yesno(error->suspended)); err_printf(m, "EIR: 0x%08x\n", error->eir); err_printf(m, "IER: 0x%08x\n", error->ier); - if (INTEL_GEN(dev_priv) >= 8) { - for (i = 0; i < 4; i++) - err_printf(m, "GTIER gt %d: 0x%08x\n", i, - error->gtier[i]); - } else if (HAS_PCH_SPLIT(dev_priv) || IS_VALLEYVIEW(dev_priv)) - err_printf(m, "GTIER: 0x%08x\n", error->gtier[0]); + for (i = 0; i < error->ngtier; i++) + err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]); err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er); err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); err_printf(m, "CCID: 0x%08x\n", error->ccid); err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings); - for (i = 0; i < dev_priv->num_fence_regs; i++) + for (i = 0; i < error->nfence; i++) err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); if (INTEL_GEN(dev_priv) >= 6) { @@ -653,16 +694,18 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->pinned_bo_count); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - struct drm_i915_error_engine *ee = &error->engine[i]; + const struct drm_i915_error_engine *ee = &error->engine[i]; obj = ee->batchbuffer; if (obj) { err_puts(m, dev_priv->engine[i]->name); - if (ee->pid != -1) - err_printf(m, " (submitted by %s [%d], bans %d)", - ee->comm, - ee->pid, - ee->context_bans); + if (ee->context.pid) + err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d)", + ee->context.comm, + ee->context.pid, + ee->context.handle, + ee->context.hw_id, + ee->context.ban_score); err_printf(m, " --- gtt_offset = 0x%08x %08x\n", upper_32_bits(obj->gtt_offset), lower_32_bits(obj->gtt_offset)); @@ -716,9 +759,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, intel_overlay_print_error_state(m, error->overlay); if (error->display) - intel_display_print_error_state(m, dev_priv, error->display); + intel_display_print_error_state(m, error->display); + + err_print_capabilities(m, &error->device_info); + err_print_params(m, &error->params); -out: if (m->bytes == 0 && m->err) return m->err; @@ -770,10 +815,16 @@ static void i915_error_object_free(struct drm_i915_error_object *obj) kfree(obj); } -static void i915_error_state_free(struct kref *error_ref) +static __always_inline void free_param(const char *type, void *x) +{ + if (!__builtin_strcmp(type, "char *")) + kfree(*(void **)x); +} + +void __i915_gpu_state_free(struct kref *error_ref) { - struct drm_i915_error_state *error = container_of(error_ref, - typeof(*error), ref); + struct i915_gpu_state *error = + container_of(error_ref, typeof(*error), ref); int i; for (i = 0; i < ARRAY_SIZE(error->engine); i++) { @@ -800,6 +851,11 @@ static void i915_error_state_free(struct kref *error_ref) kfree(error->overlay); kfree(error->display); + +#define FREE(T, x) free_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(FREE); +#undef FREE + kfree(error); } @@ -938,7 +994,7 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, * It's only a small step better than a random number in its current form. */ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, int *engine_id) { uint32_t error_code = 0; @@ -963,20 +1019,21 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, } static void i915_gem_record_fences(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int i; - if (IS_GEN3(dev_priv) || IS_GEN2(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 6) { for (i = 0; i < dev_priv->num_fence_regs; i++) - error->fence[i] = I915_READ(FENCE_REG(i)); - } else if (IS_GEN5(dev_priv) || IS_GEN4(dev_priv)) { + error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i)); + } else if (INTEL_GEN(dev_priv) >= 4) { for (i = 0; i < dev_priv->num_fence_regs; i++) error->fence[i] = I915_READ64(FENCE_REG_965_LO(i)); - } else if (INTEL_GEN(dev_priv) >= 6) { + } else { for (i = 0; i < dev_priv->num_fence_regs; i++) - error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i)); + error->fence[i] = I915_READ(FENCE_REG(i)); } + error->nfence = i; } static inline u32 @@ -1000,7 +1057,7 @@ gen8_engine_sync_index(struct intel_engine_cs *engine, return idx; } -static void gen8_record_semaphore_state(struct drm_i915_error_state *error, +static void gen8_record_semaphore_state(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1054,7 +1111,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (RB_EMPTY_ROOT(&b->waiters)) return; - if (!spin_trylock_irq(&b->lock)) { + if (!spin_trylock_irq(&b->rb_lock)) { ee->waiters = ERR_PTR(-EDEADLK); return; } @@ -1062,7 +1119,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, count = 0; for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) count++; - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); waiter = NULL; if (count) @@ -1072,7 +1129,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (!waiter) return; - if (!spin_trylock_irq(&b->lock)) { + if (!spin_trylock_irq(&b->rb_lock)) { kfree(waiter); ee->waiters = ERR_PTR(-EDEADLK); return; @@ -1080,7 +1137,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, ee->waiters = waiter; for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = container_of(rb, typeof(*w), node); + struct intel_wait *w = rb_entry(rb, typeof(*w), node); strcpy(waiter->comm, w->tsk->comm); waiter->pid = w->tsk->pid; @@ -1090,10 +1147,10 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (++ee->num_waiters == count) break; } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); } -static void error_record_engine_registers(struct drm_i915_error_state *error, +static void error_record_engine_registers(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1267,8 +1324,30 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine, &ee->execlist[n]); } +static void record_context(struct drm_i915_error_context *e, + struct i915_gem_context *ctx) +{ + if (ctx->pid) { + struct task_struct *task; + + rcu_read_lock(); + task = pid_task(ctx->pid, PIDTYPE_PID); + if (task) { + strcpy(e->comm, task->comm); + e->pid = task->pid; + } + rcu_read_unlock(); + } + + e->handle = ctx->user_handle; + e->hw_id = ctx->hw_id; + e->ban_score = ctx->ban_score; + e->guilty = ctx->guilty_count; + e->active = ctx->active_count; +} + static void i915_gem_record_rings(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { struct i915_ggtt *ggtt = &dev_priv->ggtt; int i; @@ -1281,7 +1360,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_error_engine *ee = &error->engine[i]; struct drm_i915_gem_request *request; - ee->pid = -1; ee->engine_id = -1; if (!engine) @@ -1296,11 +1374,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, request = i915_gem_find_active_request(engine); if (request) { struct intel_ring *ring; - struct pid *pid; ee->vm = request->ctx->ppgtt ? &request->ctx->ppgtt->base : &ggtt->base; + record_context(&ee->context, request->ctx); + /* We need to copy these to an anonymous buffer * as the simplest method to avoid being overwritten * by userspace. @@ -1318,19 +1397,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, i915_error_object_create(dev_priv, request->ctx->engine[i].state); - pid = request->ctx->pid; - if (pid) { - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(pid, PIDTYPE_PID); - if (task) { - strcpy(ee->comm, task->comm); - ee->pid = task->pid; - } - rcu_read_unlock(); - } - error->simulated |= i915_gem_context_no_error_capture(request->ctx); @@ -1357,7 +1423,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, } static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, struct i915_address_space *vm, int idx) { @@ -1383,7 +1449,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, } static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int cnt = 0, i, j; @@ -1408,7 +1474,7 @@ static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, } static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { struct i915_address_space *vm = &dev_priv->ggtt.base; struct drm_i915_error_buffer *bo; @@ -1439,7 +1505,7 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, } static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { /* Capturing log buf contents won't be useful if logging was disabled */ if (!dev_priv->guc.log.vma || (i915.guc_log_level < 0)) @@ -1451,7 +1517,7 @@ static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, /* Capture all registers which don't fit into another category. */ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int i; @@ -1508,9 +1574,11 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, error->ier = I915_READ(GEN8_DE_MISC_IER); for (i = 0; i < 4; i++) error->gtier[i] = I915_READ(GEN8_GT_IER(i)); + error->ngtier = 4; } else if (HAS_PCH_SPLIT(dev_priv)) { error->ier = I915_READ(DEIER); error->gtier[0] = I915_READ(GTIER); + error->ngtier = 1; } else if (IS_GEN2(dev_priv)) { error->ier = I915_READ16(IER); } else if (!IS_VALLEYVIEW(dev_priv)) { @@ -1521,7 +1589,7 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, } static void i915_error_capture_msg(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, u32 engine_mask, const char *error_msg) { @@ -1534,12 +1602,12 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, "GPU HANG: ecode %d:%d:0x%08x", INTEL_GEN(dev_priv), engine_id, ecode); - if (engine_id != -1 && error->engine[engine_id].pid != -1) + if (engine_id != -1 && error->engine[engine_id].context.pid) len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->engine[engine_id].comm, - error->engine[engine_id].pid); + error->engine[engine_id].context.comm, + error->engine[engine_id].context.pid); scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", reason: %s, action: %s", @@ -1548,8 +1616,12 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, } static void i915_capture_gen_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { + error->awake = dev_priv->gt.awake; + error->wakelock = atomic_read(&dev_priv->pm.wakeref_count); + error->suspended = dev_priv->pm.suspended; + error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU error->iommu = intel_iommu_gfx_mapped; @@ -1562,9 +1634,26 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, sizeof(error->device_info)); } +static __always_inline void dup_param(const char *type, void *x) +{ + if (!__builtin_strcmp(type, "char *")) + *(void **)x = kstrdup(*(void **)x, GFP_ATOMIC); +} + static int capture(void *data) { - struct drm_i915_error_state *error = data; + struct i915_gpu_state *error = data; + + do_gettimeofday(&error->time); + error->boottime = ktime_to_timeval(ktime_get_boottime()); + error->uptime = + ktime_to_timeval(ktime_sub(ktime_get(), + error->i915->gt.last_init_time)); + + error->params = i915; +#define DUP(T, x) dup_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(DUP); +#undef DUP i915_capture_gen_state(error->i915, error); i915_capture_reg_state(error->i915, error); @@ -1574,12 +1663,6 @@ static int capture(void *data) i915_capture_pinned_buffers(error->i915, error); i915_gem_capture_guc_log_buffer(error->i915, error); - do_gettimeofday(&error->time); - error->boottime = ktime_to_timeval(ktime_get_boottime()); - error->uptime = - ktime_to_timeval(ktime_sub(ktime_get(), - error->i915->gt.last_init_time)); - error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); @@ -1588,6 +1671,23 @@ static int capture(void *data) #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) +struct i915_gpu_state * +i915_capture_gpu_state(struct drm_i915_private *i915) +{ + struct i915_gpu_state *error; + + error = kzalloc(sizeof(*error), GFP_ATOMIC); + if (!error) + return NULL; + + kref_init(&error->ref); + error->i915 = i915; + + stop_machine(capture, error, NULL); + + return error; +} + /** * i915_capture_error_state - capture an error record for later analysis * @dev: drm device @@ -1602,7 +1702,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, const char *error_msg) { static bool warned; - struct drm_i915_error_state *error; + struct i915_gpu_state *error; unsigned long flags; if (!i915.error_capture) @@ -1611,18 +1711,12 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, if (READ_ONCE(dev_priv->gpu_error.first_error)) return; - /* Account for pipe specific data like PIPE*STAT */ - error = kzalloc(sizeof(*error), GFP_ATOMIC); + error = i915_capture_gpu_state(dev_priv); if (!error) { DRM_DEBUG_DRIVER("out of memory, not capturing error state\n"); return; } - kref_init(&error->ref); - error->i915 = dev_priv; - - stop_machine(capture, error, NULL); - i915_error_capture_msg(dev_priv, error, engine_mask, error_msg); DRM_INFO("%s\n", error->error_msg); @@ -1636,7 +1730,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, } if (error) { - i915_error_state_free(&error->ref); + __i915_gpu_state_free(&error->ref); return; } @@ -1652,33 +1746,28 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, } } -void i915_error_state_get(struct drm_device *dev, - struct i915_error_state_file_priv *error_priv) +struct i915_gpu_state * +i915_first_error_state(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct i915_gpu_state *error; - spin_lock_irq(&dev_priv->gpu_error.lock); - error_priv->error = dev_priv->gpu_error.first_error; - if (error_priv->error) - kref_get(&error_priv->error->ref); - spin_unlock_irq(&dev_priv->gpu_error.lock); -} + spin_lock_irq(&i915->gpu_error.lock); + error = i915->gpu_error.first_error; + if (error) + i915_gpu_state_get(error); + spin_unlock_irq(&i915->gpu_error.lock); -void i915_error_state_put(struct i915_error_state_file_priv *error_priv) -{ - if (error_priv->error) - kref_put(&error_priv->error->ref, i915_error_state_free); + return error; } -void i915_destroy_error_state(struct drm_i915_private *dev_priv) +void i915_reset_error_state(struct drm_i915_private *i915) { - struct drm_i915_error_state *error; + struct i915_gpu_state *error; - spin_lock_irq(&dev_priv->gpu_error.lock); - error = dev_priv->gpu_error.first_error; - dev_priv->gpu_error.first_error = NULL; - spin_unlock_irq(&dev_priv->gpu_error.lock); + spin_lock_irq(&i915->gpu_error.lock); + error = i915->gpu_error.first_error; + i915->gpu_error.first_error = NULL; + spin_unlock_irq(&i915->gpu_error.lock); - if (error) - kref_put(&error->ref, i915_error_state_free); + i915_gpu_state_put(error); } diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 8ced9e26f075..beb38e30d0e9 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -348,7 +348,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) u32 freespace; int ret; - spin_lock(&client->wq_lock); + spin_lock_irq(&client->wq_lock); freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); freespace -= client->wq_rsvd; if (likely(freespace >= wqi_size)) { @@ -358,21 +358,27 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) client->no_wq_space++; ret = -EAGAIN; } - spin_unlock(&client->wq_lock); + spin_unlock_irq(&client->wq_lock); return ret; } +static void guc_client_update_wq_rsvd(struct i915_guc_client *client, int size) +{ + unsigned long flags; + + spin_lock_irqsave(&client->wq_lock, flags); + client->wq_rsvd += size; + spin_unlock_irqrestore(&client->wq_lock, flags); +} + void i915_guc_wq_unreserve(struct drm_i915_gem_request *request) { - const size_t wqi_size = sizeof(struct guc_wq_item); + const int wqi_size = sizeof(struct guc_wq_item); struct i915_guc_client *client = request->i915->guc.execbuf_client; GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size); - - spin_lock(&client->wq_lock); - client->wq_rsvd -= wqi_size; - spin_unlock(&client->wq_lock); + guc_client_update_wq_rsvd(client, -wqi_size); } /* Construct a Work Item and append it to the GuC's Work Queue */ @@ -509,15 +515,18 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) unsigned int engine_id = engine->id; struct intel_guc *guc = &rq->i915->guc; struct i915_guc_client *client = guc->execbuf_client; + unsigned long flags; int b_ret; - spin_lock(&client->wq_lock); - guc_wq_item_append(client, rq); - /* WA to flush out the pending GMADR writes to ring buffer. */ if (i915_vma_is_map_and_fenceable(rq->ring->vma)) POSTING_READ_FW(GUC_STATUS); + trace_i915_gem_request_in(rq, 0); + + spin_lock_irqsave(&client->wq_lock, flags); + + guc_wq_item_append(client, rq); b_ret = guc_ring_doorbell(client); client->submissions[engine_id] += 1; @@ -527,7 +536,8 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) guc->submissions[engine_id] += 1; guc->last_seqno[engine_id] = rq->global_seqno; - spin_unlock(&client->wq_lock); + + spin_unlock_irqrestore(&client->wq_lock, flags); } static void i915_guc_submit(struct drm_i915_gem_request *rq) @@ -943,16 +953,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) /* Take over from manual control of ELSP (execlists) */ for_each_engine(engine, dev_priv, id) { + const int wqi_size = sizeof(struct guc_wq_item); struct drm_i915_gem_request *rq; engine->submit_request = i915_guc_submit; engine->schedule = NULL; /* Replay the current set of previously submitted requests */ + spin_lock_irq(&engine->timeline->lock); list_for_each_entry(rq, &engine->timeline->requests, link) { - client->wq_rsvd += sizeof(struct guc_wq_item); + guc_client_update_wq_rsvd(client, wqi_size); __i915_guc_submit(rq); } + spin_unlock_irq(&engine->timeline->lock); } return 0; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a62feb686895..df95733cf112 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -180,7 +180,7 @@ i915_hotplug_interrupt_update_locked(struct drm_i915_private *dev_priv, { uint32_t val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(bits & ~mask); val = I915_READ(PORT_HOTPLUG_EN); @@ -222,7 +222,7 @@ void ilk_update_display_irq(struct drm_i915_private *dev_priv, { uint32_t new_val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -250,7 +250,7 @@ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, uint32_t interrupt_mask, uint32_t enabled_irq_mask) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -302,7 +302,7 @@ static void snb_update_pm_irq(struct drm_i915_private *dev_priv, WARN_ON(enabled_irq_mask & ~interrupt_mask); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); new_val = dev_priv->pm_imr; new_val &= ~interrupt_mask; @@ -340,7 +340,7 @@ void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) { i915_reg_t reg = gen6_pm_iir(dev_priv); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); I915_WRITE(reg, reset_mask); I915_WRITE(reg, reset_mask); @@ -349,7 +349,7 @@ void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); dev_priv->pm_ier |= enable_mask; I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier); @@ -359,7 +359,7 @@ void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_mask) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); dev_priv->pm_ier &= ~disable_mask; __gen6_mask_pm_irq(dev_priv, disable_mask); @@ -463,7 +463,7 @@ static void bdw_update_port_irq(struct drm_i915_private *dev_priv, uint32_t new_val; uint32_t old_val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -496,7 +496,7 @@ void bdw_update_pipe_irq(struct drm_i915_private *dev_priv, { uint32_t new_val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -530,7 +530,7 @@ void ibx_display_interrupt_update(struct drm_i915_private *dev_priv, WARN_ON(enabled_irq_mask & ~interrupt_mask); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (WARN_ON(!intel_irqs_enabled(dev_priv))) return; @@ -546,7 +546,7 @@ __i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, i915_reg_t reg = PIPESTAT(pipe); u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(!intel_irqs_enabled(dev_priv)); if (WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK || @@ -573,7 +573,7 @@ __i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, i915_reg_t reg = PIPESTAT(pipe); u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(!intel_irqs_enabled(dev_priv)); if (WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK || @@ -783,6 +783,9 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) enum pipe pipe = crtc->pipe; int position, vtotal; + if (!crtc->active) + return -1; + vtotal = mode->crtc_vtotal; if (mode->flags & DRM_MODE_FLAG_INTERLACE) vtotal /= 2; @@ -1033,9 +1036,42 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { - smp_store_mb(engine->breadcrumbs.irq_posted, true); - if (intel_engine_wakeup(engine)) - trace_i915_gem_request_notify(engine); + struct drm_i915_gem_request *rq = NULL; + struct intel_wait *wait; + + atomic_inc(&engine->irq_count); + set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); + + spin_lock(&engine->breadcrumbs.irq_lock); + wait = engine->breadcrumbs.irq_wait; + if (wait) { + /* We use a callback from the dma-fence to submit + * requests after waiting on our own requests. To + * ensure minimum delay in queuing the next request to + * hardware, signal the fence now rather than wait for + * the signaler to be woken up. We still wake up the + * waiter in order to handle the irq-seqno coherency + * issues (we may receive the interrupt before the + * seqno is written, see __i915_request_irq_complete()) + * and to handle coalescing of multiple seqno updates + * and many waiters. + */ + if (i915_seqno_passed(intel_engine_get_seqno(engine), + wait->seqno)) + rq = i915_gem_request_get(wait->request); + + wake_up_process(wait->tsk); + } else { + __intel_engine_disarm_breadcrumbs(engine); + } + spin_unlock(&engine->breadcrumbs.irq_lock); + + if (rq) { + dma_fence_signal(&rq->fence); + i915_gem_request_put(rq); + } + + trace_intel_engine_notify(engine, wait); } static void vlv_c0_read(struct drm_i915_private *dev_priv, @@ -1173,20 +1209,12 @@ static void gen6_pm_rps_work(struct work_struct *work) if (new_delay >= dev_priv->rps.max_freq_softlimit) adj = 0; - /* - * For better performance, jump directly - * to RPe if we're below it. - */ - if (new_delay < dev_priv->rps.efficient_freq - adj) { - new_delay = dev_priv->rps.efficient_freq; - adj = 0; - } } else if (client_boost || any_waiters(dev_priv)) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) new_delay = dev_priv->rps.efficient_freq; - else + else if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit) new_delay = dev_priv->rps.min_freq_softlimit; adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { @@ -1209,7 +1237,10 @@ static void gen6_pm_rps_work(struct work_struct *work) new_delay += adj; new_delay = clamp_t(int, new_delay, min, max); - intel_set_rps(dev_priv, new_delay); + if (intel_set_rps(dev_priv, new_delay)) { + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); + dev_priv->rps.last_adj = 0; + } mutex_unlock(&dev_priv->rps.hw_lock); } @@ -1349,8 +1380,11 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) { if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) notify_ring(engine); - if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) - tasklet_schedule(&engine->irq_tasklet); + + if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { + set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet_hi_schedule(&engine->irq_tasklet); + } } static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, @@ -3106,19 +3140,9 @@ static u32 intel_hpd_enabled_irqs(struct drm_i915_private *dev_priv, return enabled_irqs; } -static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) +static void ibx_hpd_detection_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, hotplug, enabled_irqs; - - if (HAS_PCH_IBX(dev_priv)) { - hotplug_irqs = SDE_HOTPLUG_MASK; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ibx); - } else { - hotplug_irqs = SDE_HOTPLUG_MASK_CPT; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_cpt); - } - - ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); + u32 hotplug; /* * Enable digital hotplug on the PCH, and configure the DP short pulse @@ -3126,10 +3150,12 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) * The pulse duration bits are reserved on LPT+. */ hotplug = I915_READ(PCH_PORT_HOTPLUG); - hotplug &= ~(PORTD_PULSE_DURATION_MASK|PORTC_PULSE_DURATION_MASK|PORTB_PULSE_DURATION_MASK); - hotplug |= PORTD_HOTPLUG_ENABLE | PORTD_PULSE_DURATION_2ms; - hotplug |= PORTC_HOTPLUG_ENABLE | PORTC_PULSE_DURATION_2ms; + hotplug &= ~(PORTB_PULSE_DURATION_MASK | + PORTC_PULSE_DURATION_MASK | + PORTD_PULSE_DURATION_MASK); hotplug |= PORTB_HOTPLUG_ENABLE | PORTB_PULSE_DURATION_2ms; + hotplug |= PORTC_HOTPLUG_ENABLE | PORTC_PULSE_DURATION_2ms; + hotplug |= PORTD_HOTPLUG_ENABLE | PORTD_PULSE_DURATION_2ms; /* * When CPU and PCH are on the same package, port A * HPD must be enabled in both north and south. @@ -3139,6 +3165,23 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) I915_WRITE(PCH_PORT_HOTPLUG, hotplug); } +static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug_irqs, enabled_irqs; + + if (HAS_PCH_IBX(dev_priv)) { + hotplug_irqs = SDE_HOTPLUG_MASK; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ibx); + } else { + hotplug_irqs = SDE_HOTPLUG_MASK_CPT; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_cpt); + } + + ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); + + ibx_hpd_detection_setup(dev_priv); +} + static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) { u32 hotplug; @@ -3168,9 +3211,25 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) spt_hpd_detection_setup(dev_priv); } +static void ilk_hpd_detection_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug; + + /* + * Enable digital hotplug on the CPU, and configure the DP short pulse + * duration to 2ms (which is the minimum in the Display Port spec) + * The pulse duration bits are reserved on HSW+. + */ + hotplug = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); + hotplug &= ~DIGITAL_PORTA_PULSE_DURATION_MASK; + hotplug |= DIGITAL_PORTA_HOTPLUG_ENABLE | + DIGITAL_PORTA_PULSE_DURATION_2ms; + I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, hotplug); +} + static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, hotplug, enabled_irqs; + u32 hotplug_irqs, enabled_irqs; if (INTEL_GEN(dev_priv) >= 8) { hotplug_irqs = GEN8_PORT_DP_A_HOTPLUG; @@ -3189,15 +3248,7 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); } - /* - * Enable digital hotplug on the CPU, and configure the DP short pulse - * duration to 2ms (which is the minimum in the Display Port spec) - * The pulse duration bits are reserved on HSW+. - */ - hotplug = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); - hotplug &= ~DIGITAL_PORTA_PULSE_DURATION_MASK; - hotplug |= DIGITAL_PORTA_HOTPLUG_ENABLE | DIGITAL_PORTA_PULSE_DURATION_2ms; - I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, hotplug); + ilk_hpd_detection_setup(dev_priv); ibx_hpd_irq_setup(dev_priv); } @@ -3268,7 +3319,7 @@ static void ibx_irq_postinstall(struct drm_device *dev) if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || HAS_PCH_LPT(dev_priv)) - ; /* TODO: Enable HPD detection on older PCH platforms too */ + ibx_hpd_detection_setup(dev_priv); else spt_hpd_detection_setup(dev_priv); } @@ -3345,6 +3396,8 @@ static int ironlake_irq_postinstall(struct drm_device *dev) gen5_gt_irq_postinstall(dev); + ilk_hpd_detection_setup(dev_priv); + ibx_irq_postinstall(dev); if (IS_IRONLAKE_M(dev_priv)) { @@ -3363,7 +3416,7 @@ static int ironlake_irq_postinstall(struct drm_device *dev) void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (dev_priv->display_irqs_enabled) return; @@ -3378,7 +3431,7 @@ void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv) void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (!dev_priv->display_irqs_enabled) return; @@ -3485,6 +3538,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (IS_GEN9_LP(dev_priv)) bxt_hpd_detection_setup(dev_priv); + else if (IS_BROADWELL(dev_priv)) + ilk_hpd_detection_setup(dev_priv); } static int gen8_irq_postinstall(struct drm_device *dev) @@ -4052,7 +4107,7 @@ static void i915_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_en; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); /* Note HDMI and DP share hotplug bits */ /* enable bits are the same for all generations */ @@ -4265,6 +4320,18 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (!IS_GEN2(dev_priv)) dev->vblank_disable_immediate = true; + /* Most platforms treat the display irq block as an always-on + * power domain. vlv/chv can disable it at runtime and need + * special care to avoid writing any of the display block registers + * outside of the power domain. We defer setting up the display irqs + * in this case to the runtime pm. + */ + dev_priv->display_irqs_enabled = true; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->display_irqs_enabled = false; + + dev_priv->hotplug.hpd_storm_threshold = HPD_STORM_DEFAULT_THRESHOLD; + dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 0e280fbd52f1..2e9645e6555a 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -145,7 +145,7 @@ MODULE_PARM_DESC(enable_psr, "Enable PSR " "(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) " "Default: -1 (use per-chip default)"); -module_param_named_unsafe(alpha_support, i915.alpha_support, int, 0400); +module_param_named_unsafe(alpha_support, i915.alpha_support, bool, 0400); MODULE_PARM_DESC(alpha_support, "Enable alpha quality driver support for latest hardware. " "See also CONFIG_DRM_I915_ALPHA_SUPPORT."); @@ -205,9 +205,9 @@ module_param_named(verbose_state_checks, i915.verbose_state_checks, bool, 0600); MODULE_PARM_DESC(verbose_state_checks, "Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions."); -module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0600); +module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0400); MODULE_PARM_DESC(nuclear_pageflip, - "Force atomic modeset functionality; asynchronous mode is not yet supported. (default: false)."); + "Force enable atomic functionality on platforms that don't have full support yet."); /* WA to get away with the default setting in VBT for early platforms.Will be removed */ module_param_named_unsafe(edp_vswing, i915.edp_vswing, int, 0400); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 8e433de04679..55d47eea172e 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -27,46 +27,51 @@ #include <linux/cache.h> /* for __read_mostly */ +#define I915_PARAMS_FOR_EACH(func) \ + func(int, modeset); \ + func(int, panel_ignore_lid); \ + func(int, semaphores); \ + func(int, lvds_channel_mode); \ + func(int, panel_use_ssc); \ + func(int, vbt_sdvo_panel_type); \ + func(int, enable_rc6); \ + func(int, enable_dc); \ + func(int, enable_fbc); \ + func(int, enable_ppgtt); \ + func(int, enable_execlists); \ + func(int, enable_psr); \ + func(int, disable_power_well); \ + func(int, enable_ips); \ + func(int, invert_brightness); \ + func(int, enable_guc_loading); \ + func(int, enable_guc_submission); \ + func(int, guc_log_level); \ + func(int, use_mmio_flip); \ + func(int, mmio_debug); \ + func(int, edp_vswing); \ + func(unsigned int, inject_load_failure); \ + /* leave bools at the end to not create holes */ \ + func(bool, alpha_support); \ + func(bool, enable_cmd_parser); \ + func(bool, enable_hangcheck); \ + func(bool, fastboot); \ + func(bool, prefault_disable); \ + func(bool, load_detect_test); \ + func(bool, force_reset_modeset_test); \ + func(bool, reset); \ + func(bool, error_capture); \ + func(bool, disable_display); \ + func(bool, verbose_state_checks); \ + func(bool, nuclear_pageflip); \ + func(bool, enable_dp_mst); \ + func(bool, enable_dpcd_backlight); \ + func(bool, enable_gvt) + +#define MEMBER(T, member) T member struct i915_params { - int modeset; - int panel_ignore_lid; - int semaphores; - int lvds_channel_mode; - int panel_use_ssc; - int vbt_sdvo_panel_type; - int enable_rc6; - int enable_dc; - int enable_fbc; - int enable_ppgtt; - int enable_execlists; - int enable_psr; - unsigned int alpha_support; - int disable_power_well; - int enable_ips; - int invert_brightness; - int enable_guc_loading; - int enable_guc_submission; - int guc_log_level; - int use_mmio_flip; - int mmio_debug; - int edp_vswing; - unsigned int inject_load_failure; - /* leave bools at the end to not create holes */ - bool enable_cmd_parser; - bool enable_hangcheck; - bool fastboot; - bool prefault_disable; - bool load_detect_test; - bool force_reset_modeset_test; - bool reset; - bool error_capture; - bool disable_display; - bool verbose_state_checks; - bool nuclear_pageflip; - bool enable_dp_mst; - bool enable_dpcd_backlight; - bool enable_gvt; + I915_PARAMS_FOR_EACH(MEMBER); }; +#undef MEMBER extern struct i915_params i915 __read_mostly; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ecb487b5356f..732101ed57fb 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -27,6 +27,7 @@ #include <linux/vga_switcheroo.h> #include "i915_drv.h" +#include "i915_selftest.h" #define GEN_DEFAULT_PIPEOFFSETS \ .pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \ @@ -403,6 +404,7 @@ static const struct intel_device_info intel_geminilake_info = { .platform = INTEL_GEMINILAKE, .is_alpha_support = 1, .ddb_size = 1024, + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } }; static const struct intel_device_info intel_kabylake_info = { @@ -472,10 +474,19 @@ static const struct pci_device_id pciidlist[] = { }; MODULE_DEVICE_TABLE(pci, pciidlist); +static void i915_pci_remove(struct pci_dev *pdev) +{ + struct drm_device *dev = pci_get_drvdata(pdev); + + i915_driver_unload(dev); + drm_dev_unref(dev); +} + static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct intel_device_info *intel_info = (struct intel_device_info *) ent->driver_data; + int err; if (IS_ALPHA_SUPPORT(intel_info) && !i915.alpha_support) { DRM_INFO("The driver support for your hardware in this kernel version is alpha quality\n" @@ -499,15 +510,17 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (vga_switcheroo_client_probe_defer(pdev)) return -EPROBE_DEFER; - return i915_driver_load(pdev, ent); -} + err = i915_driver_load(pdev, ent); + if (err) + return err; -static void i915_pci_remove(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); + err = i915_live_selftests(pdev); + if (err) { + i915_pci_remove(pdev); + return err > 0 ? -ENOTTY : err; + } - i915_driver_unload(dev); - drm_dev_unref(dev); + return 0; } static struct pci_driver i915_pci_driver = { @@ -521,6 +534,11 @@ static struct pci_driver i915_pci_driver = { static int __init i915_init(void) { bool use_kms = true; + int err; + + err = i915_mock_selftests(); + if (err) + return err > 0 ? 0 : err; /* * Enable KMS by default, unless explicitly overriden by diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a1b7eec58be2..8c121187ff39 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1008,7 +1008,7 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv) { - assert_spin_locked(&dev_priv->perf.hook_lock); + lockdep_assert_held(&dev_priv->perf.hook_lock); if (dev_priv->perf.oa.exclusive_stream->enabled) { struct i915_gem_context *ctx = diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1c8f5b9a7fcd..cc843f96576f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -48,6 +48,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG); } +#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index]) + #define _PIPE(pipe, a, b) ((a) + (pipe)*((b)-(a))) #define _MMIO_PIPE(pipe, a, b) _MMIO(_PIPE(pipe, a, b)) #define _PLANE(plane, a, b) _PIPE(plane, a, b) @@ -56,14 +58,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_TRANS(tran, a, b) _MMIO(_TRANS(tran, a, b)) #define _PORT(port, a, b) ((a) + (port)*((b)-(a))) #define _MMIO_PORT(port, a, b) _MMIO(_PORT(port, a, b)) -#define _PIPE3(pipe, a, b, c) ((pipe) == PIPE_A ? (a) : \ - (pipe) == PIPE_B ? (b) : (c)) +#define _PIPE3(pipe, ...) _PICK(pipe, __VA_ARGS__) #define _MMIO_PIPE3(pipe, a, b, c) _MMIO(_PIPE3(pipe, a, b, c)) -#define _PORT3(port, a, b, c) ((port) == PORT_A ? (a) : \ - (port) == PORT_B ? (b) : (c)) +#define _PORT3(port, ...) _PICK(port, __VA_ARGS__) #define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PORT3(pipe, a, b, c)) -#define _PHY3(phy, a, b, c) ((phy) == DPIO_PHY0 ? (a) : \ - (phy) == DPIO_PHY1 ? (b) : (c)) +#define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__) #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c)) #define _MASKED_FIELD(mask, value) ({ \ @@ -78,7 +77,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MASKED_BIT_ENABLE(a) ({ typeof(a) _a = (a); _MASKED_FIELD(_a, _a); }) #define _MASKED_BIT_DISABLE(a) (_MASKED_FIELD((a), 0)) +/* Engine ID */ +#define RCS_HW 0 +#define VCS_HW 1 +#define BCS_HW 2 +#define VECS_HW 3 +#define VCS2_HW 4 /* PCI config space */ @@ -120,7 +125,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GCFGC 0xf0 /* 915+ only */ #define GC_LOW_FREQUENCY_ENABLE (1 << 7) #define GC_DISPLAY_CLOCK_190_200_MHZ (0 << 4) -#define GC_DISPLAY_CLOCK_333_MHZ (4 << 4) +#define GC_DISPLAY_CLOCK_333_320_MHZ (4 << 4) #define GC_DISPLAY_CLOCK_267_MHZ_PNV (0 << 4) #define GC_DISPLAY_CLOCK_333_MHZ_PNV (1 << 4) #define GC_DISPLAY_CLOCK_444_MHZ_PNV (2 << 4) @@ -1553,6 +1558,7 @@ enum skl_disp_power_wells { _MMIO(_BXT_PHY_CH(phy, ch, reg_ch0, reg_ch1)) #define BXT_P_CR_GT_DISP_PWRON _MMIO(0x138090) +#define MIPIO_RST_CTRL (1 << 2) #define _BXT_PHY_CTL_DDI_A 0x64C00 #define _BXT_PHY_CTL_DDI_B 0x64C10 @@ -3376,10 +3382,22 @@ enum { INTEL_LEGACY_64B_CONTEXT }; +enum { + FAULT_AND_HANG = 0, + FAULT_AND_HALT, /* Debug only */ + FAULT_AND_STREAM, + FAULT_AND_CONTINUE /* Unsupported */ +}; + +#define GEN8_CTX_VALID (1<<0) +#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) +#define GEN8_CTX_FORCE_RESTORE (1<<2) +#define GEN8_CTX_L3LLC_COHERENT (1<<5) +#define GEN8_CTX_PRIVILEGE (1<<8) #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 -#define GEN8_CTX_ADDRESSING_MODE(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\ - INTEL_LEGACY_64B_CONTEXT : \ - INTEL_LEGACY_32B_CONTEXT) + +#define GEN8_CTX_ID_SHIFT 32 +#define GEN8_CTX_ID_WIDTH 21 #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) @@ -5887,11 +5905,18 @@ enum { #define _PLANE_KEYMSK_2_A 0x70298 #define _PLANE_KEYMAX_1_A 0x701a0 #define _PLANE_KEYMAX_2_A 0x702a0 +#define _PLANE_COLOR_CTL_1_A 0x701CC /* GLK+ */ +#define _PLANE_COLOR_CTL_2_A 0x702CC /* GLK+ */ +#define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ +#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) +#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) +#define PLANE_COLOR_PLANE_GAMMA_DISABLE (1 << 13) #define _PLANE_BUF_CFG_1_A 0x7027c #define _PLANE_BUF_CFG_2_A 0x7037c #define _PLANE_NV12_BUF_CFG_1_A 0x70278 #define _PLANE_NV12_BUF_CFG_2_A 0x70378 + #define _PLANE_CTL_1_B 0x71180 #define _PLANE_CTL_2_B 0x71280 #define _PLANE_CTL_3_B 0x71380 @@ -5986,7 +6011,17 @@ enum { #define PLANE_NV12_BUF_CFG(pipe, plane) \ _MMIO_PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe)) -/* SKL new cursor registers */ +#define _PLANE_COLOR_CTL_1_B 0x711CC +#define _PLANE_COLOR_CTL_2_B 0x712CC +#define _PLANE_COLOR_CTL_3_B 0x713CC +#define _PLANE_COLOR_CTL_1(pipe) \ + _PIPE(pipe, _PLANE_COLOR_CTL_1_A, _PLANE_COLOR_CTL_1_B) +#define _PLANE_COLOR_CTL_2(pipe) \ + _PIPE(pipe, _PLANE_COLOR_CTL_2_A, _PLANE_COLOR_CTL_2_B) +#define PLANE_COLOR_CTL(pipe, plane) \ + _MMIO_PLANE(plane, _PLANE_COLOR_CTL_1(pipe), _PLANE_COLOR_CTL_2(pipe)) + +#/* SKL new cursor registers */ #define _CUR_BUF_CFG_A 0x7017c #define _CUR_BUF_CFG_B 0x7117c #define CUR_BUF_CFG(pipe) _MMIO_PIPE(pipe, _CUR_BUF_CFG_A, _CUR_BUF_CFG_B) @@ -6466,6 +6501,11 @@ enum { #define CHICKEN_PAR2_1 _MMIO(0x42090) #define KVM_CONFIG_CHANGE_NOTIFICATION_SELECT (1 << 14) +#define CHICKEN_MISC_2 _MMIO(0x42084) +#define GLK_CL0_PWR_DOWN (1 << 10) +#define GLK_CL1_PWR_DOWN (1 << 11) +#define GLK_CL2_PWR_DOWN (1 << 12) + #define _CHICKEN_PIPESL_1_A 0x420b0 #define _CHICKEN_PIPESL_1_B 0x420b4 #define HSW_FBCQ_DIS (1 << 22) @@ -8167,6 +8207,7 @@ enum { #define PAL_PREC_10_12_BIT (0 << 31) #define PAL_PREC_SPLIT_MODE (1 << 31) #define PAL_PREC_AUTO_INCREMENT (1 << 15) +#define PAL_PREC_INDEX_VALUE_MASK (0x3ff << 0) #define _PAL_PREC_DATA_A 0x4A404 #define _PAL_PREC_DATA_B 0x4AC04 #define _PAL_PREC_DATA_C 0x4B404 @@ -8176,12 +8217,26 @@ enum { #define _PAL_PREC_EXT_GC_MAX_A 0x4A420 #define _PAL_PREC_EXT_GC_MAX_B 0x4AC20 #define _PAL_PREC_EXT_GC_MAX_C 0x4B420 +#define _PAL_PREC_EXT2_GC_MAX_A 0x4A430 +#define _PAL_PREC_EXT2_GC_MAX_B 0x4AC30 +#define _PAL_PREC_EXT2_GC_MAX_C 0x4B430 #define PREC_PAL_INDEX(pipe) _MMIO_PIPE(pipe, _PAL_PREC_INDEX_A, _PAL_PREC_INDEX_B) #define PREC_PAL_DATA(pipe) _MMIO_PIPE(pipe, _PAL_PREC_DATA_A, _PAL_PREC_DATA_B) #define PREC_PAL_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_GC_MAX_A, _PAL_PREC_GC_MAX_B) + (i) * 4) #define PREC_PAL_EXT_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_EXT_GC_MAX_A, _PAL_PREC_EXT_GC_MAX_B) + (i) * 4) +#define _PRE_CSC_GAMC_INDEX_A 0x4A484 +#define _PRE_CSC_GAMC_INDEX_B 0x4AC84 +#define _PRE_CSC_GAMC_INDEX_C 0x4B484 +#define PRE_CSC_GAMC_AUTO_INCREMENT (1 << 10) +#define _PRE_CSC_GAMC_DATA_A 0x4A488 +#define _PRE_CSC_GAMC_DATA_B 0x4AC88 +#define _PRE_CSC_GAMC_DATA_C 0x4B488 + +#define PRE_CSC_GAMC_INDEX(pipe) _MMIO_PIPE(pipe, _PRE_CSC_GAMC_INDEX_A, _PRE_CSC_GAMC_INDEX_B) +#define PRE_CSC_GAMC_DATA(pipe) _MMIO_PIPE(pipe, _PRE_CSC_GAMC_DATA_A, _PRE_CSC_GAMC_DATA_B) + /* pipe CSC & degamma/gamma LUTs on CHV */ #define _CGM_PIPE_A_CSC_COEFF01 (VLV_DISPLAY_BASE + 0x67900) #define _CGM_PIPE_A_CSC_COEFF23 (VLV_DISPLAY_BASE + 0x67904) @@ -8215,9 +8270,14 @@ enum { /* MIPI DSI registers */ -#define _MIPI_PORT(port, a, c) _PORT3(port, a, 0, c) /* ports A and C only */ +#define _MIPI_PORT(port, a, c) ((port) ? c : a) /* ports A and C only */ #define _MMIO_MIPI(port, a, c) _MMIO(_MIPI_PORT(port, a, c)) +#define MIPIO_TXESC_CLK_DIV1 _MMIO(0x160004) +#define GLK_TX_ESC_CLK_DIV1_MASK 0x3FF +#define MIPIO_TXESC_CLK_DIV2 _MMIO(0x160008) +#define GLK_TX_ESC_CLK_DIV2_MASK 0x3FF + /* BXT MIPI clock controls */ #define BXT_MAX_VAR_OUTPUT_KHZ 39500 @@ -8304,10 +8364,12 @@ enum { #define BXT_DSI_PLL_PVD_RATIO_SHIFT 16 #define BXT_DSI_PLL_PVD_RATIO_MASK (3 << BXT_DSI_PLL_PVD_RATIO_SHIFT) #define BXT_DSI_PLL_PVD_RATIO_1 (1 << BXT_DSI_PLL_PVD_RATIO_SHIFT) +#define BXT_DSIC_16X_BY1 (0 << 10) #define BXT_DSIC_16X_BY2 (1 << 10) #define BXT_DSIC_16X_BY3 (2 << 10) #define BXT_DSIC_16X_BY4 (3 << 10) #define BXT_DSIC_16X_MASK (3 << 10) +#define BXT_DSIA_16X_BY1 (0 << 8) #define BXT_DSIA_16X_BY2 (1 << 8) #define BXT_DSIA_16X_BY3 (2 << 8) #define BXT_DSIA_16X_BY4 (3 << 8) @@ -8317,6 +8379,8 @@ enum { #define BXT_DSI_PLL_RATIO_MAX 0x7D #define BXT_DSI_PLL_RATIO_MIN 0x22 +#define GLK_DSI_PLL_RATIO_MAX 0x6F +#define GLK_DSI_PLL_RATIO_MIN 0x22 #define BXT_DSI_PLL_RATIO_MASK 0xFF #define BXT_REF_CLOCK_KHZ 19200 @@ -8333,6 +8397,12 @@ enum { #define _BXT_MIPIC_PORT_CTRL 0x6B8C0 #define BXT_MIPI_PORT_CTRL(tc) _MMIO_MIPI(tc, _BXT_MIPIA_PORT_CTRL, _BXT_MIPIC_PORT_CTRL) +#define BXT_P_DSI_REGULATOR_CFG _MMIO(0x160020) +#define STAP_SELECT (1 << 0) + +#define BXT_P_DSI_REGULATOR_TX_CTRL _MMIO(0x160054) +#define HS_IO_CTRL_SELECT (1 << 0) + #define DPI_ENABLE (1 << 31) /* A + C */ #define MIPIA_MIPI4DPHY_DELAY_COUNT_SHIFT 27 #define MIPIA_MIPI4DPHY_DELAY_COUNT_MASK (0xf << 27) @@ -8586,6 +8656,14 @@ enum { #define LP_BYTECLK_SHIFT 0 #define LP_BYTECLK_MASK (0xffff << 0) +#define _MIPIA_TLPX_TIME_COUNT (dev_priv->mipi_mmio_base + 0xb0a4) +#define _MIPIC_TLPX_TIME_COUNT (dev_priv->mipi_mmio_base + 0xb8a4) +#define MIPI_TLPX_TIME_COUNT(port) _MMIO_MIPI(port, _MIPIA_TLPX_TIME_COUNT, _MIPIC_TLPX_TIME_COUNT) + +#define _MIPIA_CLK_LANE_TIMING (dev_priv->mipi_mmio_base + 0xb098) +#define _MIPIC_CLK_LANE_TIMING (dev_priv->mipi_mmio_base + 0xb898) +#define MIPI_CLK_LANE_TIMING(port) _MMIO_MIPI(port, _MIPIA_CLK_LANE_TIMING, _MIPIC_CLK_LANE_TIMING) + /* bits 31:0 */ #define _MIPIA_LP_GEN_DATA (dev_priv->mipi_mmio_base + 0xb064) #define _MIPIC_LP_GEN_DATA (dev_priv->mipi_mmio_base + 0xb864) diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h new file mode 100644 index 000000000000..9d7d86f1733d --- /dev/null +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -0,0 +1,106 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __I915_SELFTEST_H__ +#define __I915_SELFTEST_H__ + +struct pci_dev; +struct drm_i915_private; + +struct i915_selftest { + unsigned long timeout_jiffies; + unsigned int timeout_ms; + unsigned int random_seed; + int mock; + int live; +}; + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include <linux/fault-inject.h> + +extern struct i915_selftest i915_selftest; + +int i915_mock_selftests(void); +int i915_live_selftests(struct pci_dev *pdev); + +/* We extract the function declarations from i915_mock_selftests.h and + * i915_live_selftests.h Add your unit test declarations there! + * + * Mock unit tests are run very early upon module load, before the driver + * is probed. All hardware interactions, as well as other subsystems, must + * be "mocked". + * + * Live unit tests are run after the driver is loaded - all hardware + * interactions are real. + */ +#define selftest(name, func) int func(void); +#include "selftests/i915_mock_selftests.h" +#undef selftest +#define selftest(name, func) int func(struct drm_i915_private *i915); +#include "selftests/i915_live_selftests.h" +#undef selftest + +struct i915_subtest { + int (*func)(void *data); + const char *name; +}; + +int __i915_subtests(const char *caller, + const struct i915_subtest *st, + unsigned int count, + void *data); +#define i915_subtests(T, data) \ + __i915_subtests(__func__, T, ARRAY_SIZE(T), data) + +#define SUBTEST(x) { x, #x } + +#define I915_SELFTEST_DECLARE(x) x +#define I915_SELFTEST_ONLY(x) unlikely(x) + +#else /* !IS_ENABLED(CONFIG_DRM_I915_SELFTEST) */ + +static inline int i915_mock_selftests(void) { return 0; } +static inline int i915_live_selftests(struct pci_dev *pdev) { return 0; } + +#define I915_SELFTEST_DECLARE(x) +#define I915_SELFTEST_ONLY(x) 0 + +#endif + +/* Using the i915_selftest_ prefix becomes a little unwieldy with the helpers. + * Instead we use the igt_ shorthand, in reference to the intel-gpu-tools + * suite of uabi test cases (which includes a test runner for our selftests). + */ + +#define IGT_TIMEOUT(name__) \ + unsigned long name__ = jiffies + i915_selftest.timeout_jiffies + +__printf(2, 3) +bool __igt_timeout(unsigned long timeout, const char *fmt, ...); + +#define igt_timeout(t, fmt, ...) \ + __igt_timeout((t), KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) + +#define igt_can_mi_store_dword_imm(D) (INTEL_GEN(D) > 2) + +#endif /* !__I915_SELFTEST_H__ */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 376ac957cd1c..af0ac9f261fd 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -395,13 +395,13 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, /* We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ - intel_set_rps(dev_priv, val); + ret = intel_set_rps(dev_priv, val); mutex_unlock(&dev_priv->rps.hw_lock); intel_runtime_pm_put(dev_priv); - return count; + return ret ?: count; } static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -448,14 +448,13 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, /* We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ - intel_set_rps(dev_priv, val); + ret = intel_set_rps(dev_priv, val); mutex_unlock(&dev_priv->rps.hw_lock); intel_runtime_pm_put(dev_priv); - return count; - + return ret ?: count; } static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); @@ -523,33 +522,27 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct drm_device *dev = &dev_priv->drm; - struct i915_error_state_file_priv error_priv; struct drm_i915_error_state_buf error_str; - ssize_t ret_count = 0; - int ret; - - memset(&error_priv, 0, sizeof(error_priv)); + struct i915_gpu_state *gpu; + ssize_t ret; - ret = i915_error_state_buf_init(&error_str, to_i915(dev), count, off); + ret = i915_error_state_buf_init(&error_str, dev_priv, count, off); if (ret) return ret; - error_priv.i915 = dev_priv; - i915_error_state_get(dev, &error_priv); - - ret = i915_error_state_to_str(&error_str, &error_priv); + gpu = i915_first_error_state(dev_priv); + ret = i915_error_state_to_str(&error_str, gpu); if (ret) goto out; - ret_count = count < error_str.bytes ? count : error_str.bytes; + ret = count < error_str.bytes ? count : error_str.bytes; + memcpy(buf, error_str.buf, ret); - memcpy(buf, error_str.buf, ret_count); out: - i915_error_state_put(&error_priv); + i915_gpu_state_put(gpu); i915_error_state_buf_release(&error_str); - return ret ?: ret_count; + return ret; } static ssize_t error_state_write(struct file *file, struct kobject *kobj, @@ -560,7 +553,7 @@ static ssize_t error_state_write(struct file *file, struct kobject *kobj, struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); DRM_DEBUG_DRIVER("Resetting error state\n"); - i915_destroy_error_state(dev_priv); + i915_reset_error_state(dev_priv); return count; } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 4461df5a94fe..5503f5ab1e98 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -14,6 +14,206 @@ #define TRACE_SYSTEM i915 #define TRACE_INCLUDE_FILE i915_trace +/* watermark/fifo updates */ + +TRACE_EVENT(intel_cpu_fifo_underrun, + TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pipe), + TP_ARGS(dev_priv, pipe), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + __entry->pipe = pipe; + __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); + __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + ), + + TP_printk("pipe %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), + __entry->frame, __entry->scanline) +); + +TRACE_EVENT(intel_pch_fifo_underrun, + TP_PROTO(struct drm_i915_private *dev_priv, enum transcoder pch_transcoder), + TP_ARGS(dev_priv, pch_transcoder), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + enum pipe pipe = (enum pipe)pch_transcoder; + __entry->pipe = pipe; + __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); + __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + ), + + TP_printk("pch transcoder %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), + __entry->frame, __entry->scanline) +); + +TRACE_EVENT(intel_memory_cxsr, + TP_PROTO(struct drm_i915_private *dev_priv, bool old, bool new), + TP_ARGS(dev_priv, old, new), + + TP_STRUCT__entry( + __array(u32, frame, 3) + __array(u32, scanline, 3) + __field(bool, old) + __field(bool, new) + ), + + TP_fast_assign( + enum pipe pipe; + for_each_pipe(dev_priv, pipe) { + __entry->frame[pipe] = + dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); + __entry->scanline[pipe] = + intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + } + __entry->old = old; + __entry->new = new; + ), + + TP_printk("%s->%s, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", + onoff(__entry->old), onoff(__entry->new), + __entry->frame[PIPE_A], __entry->scanline[PIPE_A], + __entry->frame[PIPE_B], __entry->scanline[PIPE_B], + __entry->frame[PIPE_C], __entry->scanline[PIPE_C]) +); + +TRACE_EVENT(vlv_wm, + TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm), + TP_ARGS(crtc, wm), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u32, level) + __field(u32, cxsr) + __field(u32, primary) + __field(u32, sprite0) + __field(u32, sprite1) + __field(u32, cursor) + __field(u32, sr_plane) + __field(u32, sr_cursor) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->level = wm->level; + __entry->cxsr = wm->cxsr; + __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY]; + __entry->sprite0 = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0]; + __entry->sprite1 = wm->pipe[crtc->pipe].plane[PLANE_SPRITE1]; + __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR]; + __entry->sr_plane = wm->sr.plane; + __entry->sr_cursor = wm->sr.cursor; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, level=%d, cxsr=%d, wm %d/%d/%d/%d, sr %d/%d", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline, __entry->level, __entry->cxsr, + __entry->primary, __entry->sprite0, __entry->sprite1, __entry->cursor, + __entry->sr_plane, __entry->sr_cursor) +); + +TRACE_EVENT(vlv_fifo_size, + TP_PROTO(struct intel_crtc *crtc, u32 sprite0_start, u32 sprite1_start, u32 fifo_size), + TP_ARGS(crtc, sprite0_start, sprite1_start, fifo_size), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u32, sprite0_start) + __field(u32, sprite1_start) + __field(u32, fifo_size) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->sprite0_start = sprite0_start; + __entry->sprite1_start = sprite1_start; + __entry->fifo_size = fifo_size; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, %d/%d/%d", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline, __entry->sprite0_start, + __entry->sprite1_start, __entry->fifo_size) +); + +/* plane updates */ + +TRACE_EVENT(intel_update_plane, + TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), + TP_ARGS(plane, crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(const char *, name) + __field(u32, frame) + __field(u32, scanline) + __array(int, src, 4) + __array(int, dst, 4) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->name = plane->name; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + memcpy(__entry->src, &plane->state->src, sizeof(__entry->src)); + memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst)); + ), + + TP_printk("pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT, + pipe_name(__entry->pipe), __entry->name, + __entry->frame, __entry->scanline, + DRM_RECT_FP_ARG((const struct drm_rect *)__entry->src), + DRM_RECT_ARG((const struct drm_rect *)__entry->dst)) +); + +TRACE_EVENT(intel_disable_plane, + TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), + TP_ARGS(plane, crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(const char *, name) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->name = plane->name; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + ), + + TP_printk("pipe %c, plane %s, frame=%u, scanline=%u", + pipe_name(__entry->pipe), __entry->name, + __entry->frame, __entry->scanline) +); + /* pipe updates */ TRACE_EVENT(i915_pipe_update_start, @@ -175,134 +375,6 @@ TRACE_EVENT(i915_vma_unbind, __entry->obj, __entry->offset, __entry->size, __entry->vm) ); -TRACE_EVENT(i915_va_alloc, - TP_PROTO(struct i915_vma *vma), - TP_ARGS(vma), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u64, start) - __field(u64, end) - ), - - TP_fast_assign( - __entry->vm = vma->vm; - __entry->start = vma->node.start; - __entry->end = vma->node.start + vma->node.size - 1; - ), - - TP_printk("vm=%p (%c), 0x%llx-0x%llx", - __entry->vm, i915_is_ggtt(__entry->vm) ? 'G' : 'P', __entry->start, __entry->end) -); - -DECLARE_EVENT_CLASS(i915_px_entry, - TP_PROTO(struct i915_address_space *vm, u32 px, u64 start, u64 px_shift), - TP_ARGS(vm, px, start, px_shift), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u32, px) - __field(u64, start) - __field(u64, end) - ), - - TP_fast_assign( - __entry->vm = vm; - __entry->px = px; - __entry->start = start; - __entry->end = ((start + (1ULL << px_shift)) & ~((1ULL << px_shift)-1)) - 1; - ), - - TP_printk("vm=%p, pde=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -DEFINE_EVENT(i915_px_entry, i915_page_table_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 pde_shift), - TP_ARGS(vm, pde, start, pde_shift) -); - -DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pdpe, u64 start, u64 pdpe_shift), - TP_ARGS(vm, pdpe, start, pdpe_shift), - - TP_printk("vm=%p, pdpe=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift), - TP_ARGS(vm, pml4e, start, pml4e_shift), - - TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -/* Avoid extra math because we only support two sizes. The format is defined by - * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */ -#define TRACE_PT_SIZE(bits) \ - ((((bits) == 1024) ? 288 : 144) + 1) - -DECLARE_EVENT_CLASS(i915_page_table_entry_update, - TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count, u32 bits), - TP_ARGS(vm, pde, pt, first, count, bits), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u32, pde) - __field(u32, first) - __field(u32, last) - __dynamic_array(char, cur_ptes, TRACE_PT_SIZE(bits)) - ), - - TP_fast_assign( - __entry->vm = vm; - __entry->pde = pde; - __entry->first = first; - __entry->last = first + count - 1; - scnprintf(__get_str(cur_ptes), - TRACE_PT_SIZE(bits), - "%*pb", - bits, - pt->used_ptes); - ), - - TP_printk("vm=%p, pde=%d, updating %u:%u\t%s", - __entry->vm, __entry->pde, __entry->last, __entry->first, - __get_str(cur_ptes)) -); - -DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map, - TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count, u32 bits), - TP_ARGS(vm, pde, pt, first, count, bits) -); - -TRACE_EVENT(i915_gem_object_change_domain, - TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write), - TP_ARGS(obj, old_read, old_write), - - TP_STRUCT__entry( - __field(struct drm_i915_gem_object *, obj) - __field(u32, read_domains) - __field(u32, write_domain) - ), - - TP_fast_assign( - __entry->obj = obj; - __entry->read_domains = obj->base.read_domains | (old_read << 16); - __entry->write_domain = obj->base.write_domain | (old_write << 16); - ), - - TP_printk("obj=%p, read=%02x=>%02x, write=%02x=>%02x", - __entry->obj, - __entry->read_domains >> 16, - __entry->read_domains & 0xffff, - __entry->write_domain >> 16, - __entry->write_domain & 0xffff) -); - TRACE_EVENT(i915_gem_object_pwrite, TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), TP_ARGS(obj, offset, len), @@ -503,13 +575,14 @@ TRACE_EVENT(i915_gem_ring_sync_to, __entry->seqno) ); -TRACE_EVENT(i915_gem_ring_dispatch, +TRACE_EVENT(i915_gem_request_queue, TP_PROTO(struct drm_i915_gem_request *req, u32 flags), TP_ARGS(req, flags), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) __field(u32, flags) ), @@ -517,13 +590,14 @@ TRACE_EVENT(i915_gem_ring_dispatch, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; + __entry->ctx = req->ctx->hw_id; + __entry->seqno = req->fence.seqno; __entry->flags = flags; - dma_fence_enable_sw_signaling(&req->fence); ), - TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", - __entry->dev, __entry->ring, __entry->seqno, __entry->flags) + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->flags) ); TRACE_EVENT(i915_gem_ring_flush, @@ -555,18 +629,23 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_STRUCT__entry( __field(u32, dev) + __field(u32, ctx) __field(u32, ring) __field(u32, seqno) + __field(u32, global) ), TP_fast_assign( __entry->dev = req->i915->drm.primary->index; + __entry->ctx = req->ctx->hw_id; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; + __entry->seqno = req->fence.seqno; + __entry->global = req->global_seqno; ), - TP_printk("dev=%u, ring=%u, seqno=%u", - __entry->dev, __entry->ring, __entry->seqno) + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->global) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_add, @@ -574,24 +653,100 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_add, TP_ARGS(req) ); -TRACE_EVENT(i915_gem_request_notify, - TP_PROTO(struct intel_engine_cs *engine), - TP_ARGS(engine), +#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) +DEFINE_EVENT(i915_gem_request, i915_gem_request_submit, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); + +DEFINE_EVENT(i915_gem_request, i915_gem_request_execute, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); + +DECLARE_EVENT_CLASS(i915_gem_request_hw, + TP_PROTO(struct drm_i915_gem_request *req, + unsigned int port), + TP_ARGS(req, port), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, ring) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, ctx) + __field(u32, port) + ), + + TP_fast_assign( + __entry->dev = req->i915->drm.primary->index; + __entry->ring = req->engine->id; + __entry->ctx = req->ctx->hw_id; + __entry->seqno = req->fence.seqno; + __entry->global_seqno = req->global_seqno; + __entry->port = port; + ), + + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", + __entry->dev, __entry->ring, __entry->ctx, + __entry->seqno, __entry->global_seqno, + __entry->port) +); + +DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in, + TP_PROTO(struct drm_i915_gem_request *req, unsigned int port), + TP_ARGS(req, port) +); + +DEFINE_EVENT(i915_gem_request, i915_gem_request_out, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); +#else +#if !defined(TRACE_HEADER_MULTI_READ) +static inline void +trace_i915_gem_request_submit(struct drm_i915_gem_request *req) +{ +} + +static inline void +trace_i915_gem_request_execute(struct drm_i915_gem_request *req) +{ +} + +static inline void +trace_i915_gem_request_in(struct drm_i915_gem_request *req, unsigned int port) +{ +} + +static inline void +trace_i915_gem_request_out(struct drm_i915_gem_request *req) +{ +} +#endif +#endif + +TRACE_EVENT(intel_engine_notify, + TP_PROTO(struct intel_engine_cs *engine, bool waiters), + TP_ARGS(engine, waiters), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) __field(u32, seqno) + __field(bool, waiters) ), TP_fast_assign( __entry->dev = engine->i915->drm.primary->index; __entry->ring = engine->id; __entry->seqno = intel_engine_get_seqno(engine); + __entry->waiters = waiters; ), - TP_printk("dev=%u, ring=%u, seqno=%u", - __entry->dev, __entry->ring, __entry->seqno) + TP_printk("dev=%u, ring=%u, seqno=%u, waiters=%u", + __entry->dev, __entry->ring, __entry->seqno, + __entry->waiters) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, @@ -599,20 +754,17 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, TP_ARGS(req) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_complete, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) -); - TRACE_EVENT(i915_gem_request_wait_begin, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req), + TP_PROTO(struct drm_i915_gem_request *req, unsigned int flags), + TP_ARGS(req, flags), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) - __field(bool, blocking) + __field(u32, global) + __field(unsigned int, flags) ), /* NB: the blocking information is racy since mutex_is_locked @@ -624,14 +776,16 @@ TRACE_EVENT(i915_gem_request_wait_begin, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; - __entry->blocking = - mutex_is_locked(&req->i915->drm.struct_mutex); + __entry->ctx = req->ctx->hw_id; + __entry->seqno = req->fence.seqno; + __entry->global = req->global_seqno; + __entry->flags = flags; ), - TP_printk("dev=%u, ring=%u, seqno=%u, blocking=%s", - __entry->dev, __entry->ring, - __entry->seqno, __entry->blocking ? "yes (NB)" : "no") + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->global, !!(__entry->flags & I915_WAIT_LOCKED), + __entry->flags) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end, @@ -769,17 +923,19 @@ DECLARE_EVENT_CLASS(i915_context, TP_STRUCT__entry( __field(u32, dev) __field(struct i915_gem_context *, ctx) + __field(u32, hw_id) __field(struct i915_address_space *, vm) ), TP_fast_assign( + __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; + __entry->hw_id = ctx->hw_id; __entry->vm = ctx->ppgtt ? &ctx->ppgtt->base : NULL; - __entry->dev = ctx->i915->drm.primary->index; ), - TP_printk("dev=%u, ctx=%p, ctx_vm=%p", - __entry->dev, __entry->ctx, __entry->vm) + TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u", + __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id) ) DEFINE_EVENT(i915_context, i915_context_create, diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 34020873e1f6..b8ba0f2f92af 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -25,6 +25,17 @@ #ifndef __I915_UTILS_H #define __I915_UTILS_H +#if GCC_VERSION >= 70000 +#define add_overflows(A, B) \ + __builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0) +#else +#define add_overflows(A, B) ({ \ + typeof(A) a = (A); \ + typeof(B) b = (B); \ + a + b < a; \ +}) +#endif + #define range_overflows(start, size, max) ({ \ typeof(start) start__ = (start); \ typeof(size) size__ = (size); \ diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index d0abfd08a01c..14014068dfcf 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -179,7 +179,7 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, int intel_vgt_balloon(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - unsigned long ggtt_end = ggtt->base.start + ggtt->base.total; + unsigned long ggtt_end = ggtt->base.total; unsigned long mappable_base, mappable_size, mappable_end; unsigned long unmappable_base, unmappable_size, unmappable_end; @@ -202,8 +202,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) DRM_INFO("Unmappable graphic memory: base 0x%lx size %ldKiB\n", unmappable_base, unmappable_size / 1024); - if (mappable_base < ggtt->base.start || - mappable_end > ggtt->mappable_end || + if (mappable_end > ggtt->mappable_end || unmappable_base < ggtt->mappable_end || unmappable_end > ggtt_end) { DRM_ERROR("Invalid ballooning configuration!\n"); @@ -231,9 +230,9 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) } /* Mappable graphic memory ballooning */ - if (mappable_base > ggtt->base.start) { + if (mappable_base) { ret = vgt_balloon_space(ggtt, &bl_info.space[0], - ggtt->base.start, mappable_base); + 0, mappable_base); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 155906e84812..1aba47024656 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -78,6 +78,9 @@ vma_create(struct drm_i915_gem_object *obj, struct rb_node *rb, **p; int i; + /* The aliasing_ppgtt should never be used directly! */ + GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); + vma = kmem_cache_zalloc(vm->i915->vmas, GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); @@ -238,7 +241,15 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 vma_flags; int ret; - if (WARN_ON(flags == 0)) + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->size > vma->node.size); + + if (GEM_WARN_ON(range_overflows(vma->node.start, + vma->node.size, + vma->vm->total))) + return -ENODEV; + + if (GEM_WARN_ON(!flags)) return -EINVAL; bind_flags = 0; @@ -255,20 +266,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (bind_flags == 0) return 0; - if (GEM_WARN_ON(range_overflows(vma->node.start, - vma->node.size, - vma->vm->total))) - return -ENODEV; - - if (vma_flags == 0 && vma->vm->allocate_va_range) { - trace_i915_va_alloc(vma); - ret = vma->vm->allocate_va_range(vma->vm, - vma->node.start, - vma->node.size); - if (ret) - return ret; - } - trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) @@ -324,8 +321,8 @@ void i915_vma_unpin_and_release(struct i915_vma **p_vma) __i915_gem_object_release_unless_active(obj); } -bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +bool i915_vma_misplaced(const struct i915_vma *vma, + u64 size, u64 alignment, u64 flags) { if (!drm_mm_node_allocated(&vma->node)) return false; @@ -512,10 +509,36 @@ err_unpin: return ret; } +static void +i915_vma_remove(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + + /* Since the unbound list is global, only move to that list if + * no more VMAs exist. + */ + if (--obj->bind_count == 0) + list_move_tail(&obj->global_link, + &to_i915(obj->base.dev)->mm.unbound_list); + + /* And finally now the object is completely decoupled from this vma, + * we can drop its hold on the backing storage and allow it to be + * reaped by the shrinker. + */ + i915_gem_object_unpin_pages(obj); + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); +} + int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - unsigned int bound = vma->flags; + const unsigned int bound = vma->flags; int ret; lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); @@ -524,18 +547,18 @@ int __i915_vma_do_pin(struct i915_vma *vma, if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { ret = -EBUSY; - goto err; + goto err_unpin; } if ((bound & I915_VMA_BIND_MASK) == 0) { ret = i915_vma_insert(vma, size, alignment, flags); if (ret) - goto err; + goto err_unpin; } ret = i915_vma_bind(vma, vma->obj->cache_level, flags); if (ret) - goto err; + goto err_remove; if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); @@ -544,7 +567,12 @@ int __i915_vma_do_pin(struct i915_vma *vma, GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; -err: +err_remove: + if ((bound & I915_VMA_BIND_MASK) == 0) { + GEM_BUG_ON(vma->pages); + i915_vma_remove(vma); + } +err_unpin: __i915_vma_unpin(vma); return ret; } @@ -657,9 +685,6 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - if (vma->pages != obj->mm.pages) { GEM_BUG_ON(!vma->pages); sg_free_table(vma->pages); @@ -667,18 +692,7 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->pages = NULL; - /* Since the unbound list is global, only move to that list if - * no more VMAs exist. */ - if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); - - /* And finally now the object is completely decoupled from this vma, - * we can drop its hold on the backing storage and allow it to be - * reaped by the shrinker. - */ - i915_gem_object_unpin_pages(obj); - GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); + i915_vma_remove(vma); destroy: if (unlikely(i915_vma_is_closed(vma))) @@ -687,3 +701,6 @@ destroy: return 0; } +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_vma.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e39d922cfb6f..2e03f81dddbe 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -228,8 +228,8 @@ i915_vma_compare(struct i915_vma *vma, int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); -bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); +bool i915_vma_misplaced(const struct i915_vma *vma, + u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index aa9160e7f1d8..50fb1f76cc5f 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -99,6 +99,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->update_wm_pre = false; crtc_state->update_wm_post = false; crtc_state->fb_changed = false; + crtc_state->fifo_changed = false; crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; @@ -121,7 +122,7 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, /** * intel_atomic_setup_scalers() - setup scalers for crtc per staged requests - * @dev: DRM device + * @dev_priv: i915 device * @crtc: intel crtc * @crtc_state: incoming crtc_state to validate and setup scalers * @@ -136,9 +137,9 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, * 0 - scalers were setup succesfully * error code - otherwise */ -int intel_atomic_setup_scalers(struct drm_device *dev, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state) +int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state) { struct drm_plane *plane = NULL; struct intel_plane *intel_plane; @@ -199,7 +200,7 @@ int intel_atomic_setup_scalers(struct drm_device *dev, */ if (!plane) { struct drm_plane_state *state; - plane = drm_plane_from_index(dev, i); + plane = drm_plane_from_index(&dev_priv->drm, i); state = drm_atomic_get_plane_state(drm_state, plane); if (IS_ERR(state)) { DRM_DEBUG_KMS("Failed to add [PLANE:%d] to drm_state\n", @@ -247,7 +248,9 @@ int intel_atomic_setup_scalers(struct drm_device *dev, } /* set scaler mode */ - if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { + if (IS_GEMINILAKE(dev_priv)) { + scaler_state->scalers[*scaler_id].mode = 0; + } else if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { /* * when only 1 scaler is in use on either pipe A or B, * scaler 0 operates in high quality (HQ) mode. diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 41fd94e62d3c..cfb47293fd53 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -189,6 +189,12 @@ int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state, if (ret) return ret; + /* FIXME pre-g4x don't work like this */ + if (intel_state->base.visible) + crtc_state->active_planes |= BIT(intel_plane->id); + else + crtc_state->active_planes &= ~BIT(intel_plane->id); + return intel_plane_atomic_calc_changes(&crtc_state->base, state); } @@ -225,12 +231,19 @@ static void intel_plane_atomic_update(struct drm_plane *plane, to_intel_plane_state(plane->state); struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc; - if (intel_state->base.visible) + if (intel_state->base.visible) { + trace_intel_update_plane(plane, + to_intel_crtc(crtc)); + intel_plane->update_plane(plane, to_intel_crtc_state(crtc->state), intel_state); - else + } else { + trace_intel_disable_plane(plane, + to_intel_crtc(crtc)); + intel_plane->disable_plane(plane, crtc); + } } const struct drm_plane_helper_funcs intel_plane_helper_funcs = { diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index d76f3033e890..52c207e81f41 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -720,7 +720,7 @@ static void i915_audio_component_codec_wake_override(struct device *kdev, struct drm_i915_private *dev_priv = kdev_to_i915(kdev); u32 tmp; - if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) + if (!IS_GEN9_BC(dev_priv)) return; i915_audio_component_get_power(kdev); @@ -752,7 +752,7 @@ static int i915_audio_component_get_cdclk_freq(struct device *kdev) if (WARN_ON_ONCE(!HAS_DDI(dev_priv))) return -ENODEV; - return dev_priv->cdclk_freq; + return dev_priv->cdclk.hw.cdclk; } /* diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 7044e9a6abf7..2393bb9fe665 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -27,43 +27,90 @@ #include "i915_drv.h" +static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) +{ + struct intel_wait *wait; + unsigned int result = 0; + + lockdep_assert_held(&b->irq_lock); + + wait = b->irq_wait; + if (wait) { + result = ENGINE_WAKEUP_WAITER; + if (wake_up_process(wait->tsk)) + result |= ENGINE_WAKEUP_ASLEEP; + } + + return result; +} + +unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; + unsigned int result; + + spin_lock_irqsave(&b->irq_lock, flags); + result = __intel_breadcrumbs_wakeup(b); + spin_unlock_irqrestore(&b->irq_lock, flags); + + return result; +} + +static unsigned long wait_timeout(void) +{ + return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); +} + +static noinline void missed_breadcrumb(struct intel_engine_cs *engine) +{ + DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s\n", + engine->name, __builtin_return_address(0), + yesno(test_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted))); + + set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); +} + static void intel_breadcrumbs_hangcheck(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; struct intel_breadcrumbs *b = &engine->breadcrumbs; - if (!b->irq_enabled) + if (!b->irq_armed) return; - if (time_before(jiffies, b->timeout)) { - mod_timer(&b->hangcheck, b->timeout); + if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) { + b->hangcheck_interrupts = atomic_read(&engine->irq_count); + mod_timer(&b->hangcheck, wait_timeout()); return; } - DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); - set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); - - /* Ensure that even if the GPU hangs, we get woken up. + /* We keep the hangcheck time alive until we disarm the irq, even + * if there are no waiters at present. * - * However, note that if no one is waiting, we never notice - * a gpu hang. Eventually, we will have to wait for a resource - * held by the GPU and so trigger a hangcheck. In the most - * pathological case, this will be upon memory starvation! To - * prevent this, we also queue the hangcheck from the retire - * worker. + * If the waiter was currently running, assume it hasn't had a chance + * to process the pending interrupt (e.g, low priority task on a loaded + * system) and wait until it sleeps before declaring a missed interrupt. + * + * If the waiter was asleep (and not even pending a wakeup), then we + * must have missed an interrupt as the GPU has stopped advancing + * but we still have a waiter. Assuming all batches complete within + * DRM_I915_HANGCHECK_JIFFIES [1.5s]! */ - i915_queue_hangcheck(engine->i915); -} - -static unsigned long wait_timeout(void) -{ - return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); + if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { + missed_breadcrumb(engine); + mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + } else { + mod_timer(&b->hangcheck, wait_timeout()); + } } static void intel_breadcrumbs_fake_irq(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; /* * The timer persists in case we cannot enable interrupts, @@ -72,8 +119,26 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) * every jiffie in order to kick the oldest waiter to do the * coherent seqno check. */ - if (intel_engine_wakeup(engine)) - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + + spin_lock_irqsave(&b->irq_lock, flags); + if (!__intel_breadcrumbs_wakeup(b)) + __intel_engine_disarm_breadcrumbs(engine); + spin_unlock_irqrestore(&b->irq_lock, flags); + if (!b->irq_armed) + return; + + mod_timer(&b->fake_irq, jiffies + 1); + + /* Ensure that even if the GPU hangs, we get woken up. + * + * However, note that if no one is waiting, we never notice + * a gpu hang. Eventually, we will have to wait for a resource + * held by the GPU and so trigger a hangcheck. In the most + * pathological case, this will be upon memory starvation! To + * prevent this, we also queue the hangcheck from the retire + * worker. + */ + i915_queue_hangcheck(engine->i915); } static void irq_enable(struct intel_engine_cs *engine) @@ -82,7 +147,7 @@ static void irq_enable(struct intel_engine_cs *engine) * we still need to force the barrier before reading the seqno, * just in case. */ - engine->breadcrumbs.irq_posted = true; + set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); /* Caller disables interrupts */ spin_lock(&engine->i915->irq_lock); @@ -96,61 +161,114 @@ static void irq_disable(struct intel_engine_cs *engine) spin_lock(&engine->i915->irq_lock); engine->irq_disable(engine); spin_unlock(&engine->i915->irq_lock); +} + +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + lockdep_assert_held(&b->irq_lock); - engine->breadcrumbs.irq_posted = false; + if (b->irq_enabled) { + irq_disable(engine); + b->irq_enabled = false; + } + + b->irq_armed = false; } -static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); - struct drm_i915_private *i915 = engine->i915; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; - assert_spin_locked(&b->lock); - if (b->rpm_wakelock) + if (!b->irq_armed) return; - /* Since we are waiting on a request, the GPU should be busy - * and should have its own rpm reference. For completeness, - * record an rpm reference for ourselves to cover the - * interrupt we unmask. + spin_lock_irqsave(&b->irq_lock, flags); + + /* We only disarm the irq when we are idle (all requests completed), + * so if there remains a sleeping waiter, it missed the request + * completion. */ - intel_runtime_pm_get_noresume(i915); - b->rpm_wakelock = true; + if (__intel_breadcrumbs_wakeup(b) & ENGINE_WAKEUP_ASLEEP) + missed_breadcrumb(engine); - /* No interrupts? Kick the waiter every jiffie! */ - if (intel_irqs_enabled(i915)) { - if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) - irq_enable(engine); - b->irq_enabled = true; - } + __intel_engine_disarm_breadcrumbs(engine); + + spin_unlock_irqrestore(&b->irq_lock, flags); +} + +static bool use_fake_irq(const struct intel_breadcrumbs *b) +{ + const struct intel_engine_cs *engine = + container_of(b, struct intel_engine_cs, breadcrumbs); + + if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) + return false; - if (!b->irq_enabled || - test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { + /* Only start with the heavy weight fake irq timer if we have not + * seen any interrupts since enabling it the first time. If the + * interrupts are still arriving, it means we made a mistake in our + * engine->seqno_barrier(), a timing error that should be transient + * and unlikely to reoccur. + */ + return atomic_read(&engine->irq_count) == b->hangcheck_interrupts; +} + +static void enable_fake_irq(struct intel_breadcrumbs *b) +{ + /* Ensure we never sleep indefinitely */ + if (!b->irq_enabled || use_fake_irq(b)) mod_timer(&b->fake_irq, jiffies + 1); - } else { - /* Ensure we never sleep indefinitely */ - GEM_BUG_ON(!time_after(b->timeout, jiffies)); - mod_timer(&b->hangcheck, b->timeout); - } + else + mod_timer(&b->hangcheck, wait_timeout()); } -static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) +static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); + struct drm_i915_private *i915 = engine->i915; - assert_spin_locked(&b->lock); - if (!b->rpm_wakelock) + lockdep_assert_held(&b->irq_lock); + if (b->irq_armed) return; - if (b->irq_enabled) { - irq_disable(engine); - b->irq_enabled = false; + /* The breadcrumb irq will be disarmed on the interrupt after the + * waiters are signaled. This gives us a single interrupt window in + * which we can add a new waiter and avoid the cost of re-enabling + * the irq. + */ + b->irq_armed = true; + GEM_BUG_ON(b->irq_enabled); + + if (I915_SELFTEST_ONLY(b->mock)) { + /* For our mock objects we want to avoid interaction + * with the real hardware (which is not set up). So + * we simply pretend we have enabled the powerwell + * and the irq, and leave it up to the mock + * implementation to call intel_engine_wakeup() + * itself when it wants to simulate a user interrupt, + */ + return; + } + + /* Since we are waiting on a request, the GPU should be busy + * and should have its own rpm reference. This is tracked + * by i915->gt.awake, we can forgo holding our own wakref + * for the interrupt as before i915->gt.awake is released (when + * the driver is idle) we disarm the breadcrumbs. + */ + + /* No interrupts? Kick the waiter every jiffie! */ + if (intel_irqs_enabled(i915)) { + if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) + irq_enable(engine); + b->irq_enabled = true; } - intel_runtime_pm_put(engine->i915); - b->rpm_wakelock = false; + enable_fake_irq(b); } static inline struct intel_wait *to_wait(struct rb_node *node) @@ -161,7 +279,7 @@ static inline struct intel_wait *to_wait(struct rb_node *node) static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, struct intel_wait *wait) { - assert_spin_locked(&b->lock); + lockdep_assert_held(&b->rb_lock); /* This request is completed, so remove it from the tree, mark it as * complete, and *then* wake up the associated task. @@ -172,6 +290,24 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, wake_up_process(wait->tsk); /* implicit smp_wmb() */ } +static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, + struct rb_node *next) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + spin_lock(&b->irq_lock); + GEM_BUG_ON(!b->irq_armed); + b->irq_wait = to_wait(next); + spin_unlock(&b->irq_lock); + + /* We always wake up the next waiter that takes over as the bottom-half + * as we may delegate not only the irq-seqno barrier to the next waiter + * but also the task of waking up concurrent waiters. + */ + if (next) + wake_up_process(to_wait(next)->tsk); +} + static bool __intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait) { @@ -235,7 +371,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, } rb_link_node(&wait->node, parent, p); rb_insert_color(&wait->node, &b->waiters); - GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh)); if (completed) { struct rb_node *next = rb_next(completed); @@ -243,22 +378,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, GEM_BUG_ON(!next && !first); if (next && next != &wait->node) { GEM_BUG_ON(first); - b->timeout = wait_timeout(); - b->first_wait = to_wait(next); - rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); - /* As there is a delay between reading the current - * seqno, processing the completed tasks and selecting - * the next waiter, we may have missed the interrupt - * and so need for the next bottom-half to wakeup. - * - * Also as we enable the IRQ, we may miss the - * interrupt for that seqno, so we have to wake up - * the next bottom-half in order to do a coherent check - * in case the seqno passed. - */ - __intel_breadcrumbs_enable_irq(b); - if (READ_ONCE(b->irq_posted)) - wake_up_process(to_wait(next)->tsk); + __intel_breadcrumbs_next(engine, next); } do { @@ -269,10 +389,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, } if (first) { + spin_lock(&b->irq_lock); GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); - b->timeout = wait_timeout(); - b->first_wait = wait; - rcu_assign_pointer(b->irq_seqno_bh, wait->tsk); + b->irq_wait = wait; /* After assigning ourselves as the new bottom-half, we must * perform a cursory check to prevent a missed interrupt. * Either we miss the interrupt whilst programming the hardware, @@ -282,10 +401,10 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, * and so we miss the wake up. */ __intel_breadcrumbs_enable_irq(b); + spin_unlock(&b->irq_lock); } - GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh)); - GEM_BUG_ON(!b->first_wait); - GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); + GEM_BUG_ON(!b->irq_wait); + GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node); return first; } @@ -296,9 +415,9 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_breadcrumbs *b = &engine->breadcrumbs; bool first; - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); first = __intel_engine_add_wait(engine, wait); - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); return first; } @@ -317,29 +436,20 @@ static inline int wakeup_priority(struct intel_breadcrumbs *b, return tsk->prio; } -void intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) +static void __intel_engine_remove_wait(struct intel_engine_cs *engine, + struct intel_wait *wait) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - /* Quick check to see if this waiter was already decoupled from - * the tree by the bottom-half to avoid contention on the spinlock - * by the herd. - */ - if (RB_EMPTY_NODE(&wait->node)) - return; - - spin_lock_irq(&b->lock); + lockdep_assert_held(&b->rb_lock); if (RB_EMPTY_NODE(&wait->node)) - goto out_unlock; + goto out; - if (b->first_wait == wait) { + if (b->irq_wait == wait) { const int priority = wakeup_priority(b, wait->tsk); struct rb_node *next; - GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk); - /* We are the current bottom-half. Find the next candidate, * the first waiter in the queue on the remaining oldest * request. As multiple seqnos may complete in the time it @@ -372,25 +482,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, } } - if (next) { - /* In our haste, we may have completed the first waiter - * before we enabled the interrupt. Do so now as we - * have a second waiter for a future seqno. Afterwards, - * we have to wake up that waiter in case we missed - * the interrupt, or if we have to handle an - * exception rather than a seqno completion. - */ - b->timeout = wait_timeout(); - b->first_wait = to_wait(next); - rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); - if (b->first_wait->seqno != wait->seqno) - __intel_breadcrumbs_enable_irq(b); - wake_up_process(b->first_wait->tsk); - } else { - b->first_wait = NULL; - rcu_assign_pointer(b->irq_seqno_bh, NULL); - __intel_breadcrumbs_disable_irq(b); - } + __intel_breadcrumbs_next(engine, next); } else { GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); } @@ -398,15 +490,35 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); rb_erase(&wait->node, &b->waiters); -out_unlock: - GEM_BUG_ON(b->first_wait == wait); +out: + GEM_BUG_ON(b->irq_wait == wait); GEM_BUG_ON(rb_first(&b->waiters) != - (b->first_wait ? &b->first_wait->node : NULL)); - GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters)); - spin_unlock_irq(&b->lock); + (b->irq_wait ? &b->irq_wait->node : NULL)); +} + +void intel_engine_remove_wait(struct intel_engine_cs *engine, + struct intel_wait *wait) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + /* Quick check to see if this waiter was already decoupled from + * the tree by the bottom-half to avoid contention on the spinlock + * by the herd. + */ + if (RB_EMPTY_NODE(&wait->node)) + return; + + spin_lock_irq(&b->rb_lock); + __intel_engine_remove_wait(engine, wait); + spin_unlock_irq(&b->rb_lock); +} + +static bool signal_valid(const struct drm_i915_gem_request *request) +{ + return intel_wait_check_request(&request->signaling.wait, request); } -static bool signal_complete(struct drm_i915_gem_request *request) +static bool signal_complete(const struct drm_i915_gem_request *request) { if (!request) return false; @@ -415,7 +527,7 @@ static bool signal_complete(struct drm_i915_gem_request *request) * signalled that this wait is already completed. */ if (intel_wait_complete(&request->signaling.wait)) - return true; + return signal_valid(request); /* Carefully check if the request is complete, giving time for the * seqno to be visible or if the GPU hung. @@ -458,40 +570,62 @@ static int intel_breadcrumbs_signaler(void *arg) * need to wait for a new interrupt from the GPU or for * a new client. */ - request = READ_ONCE(b->first_signal); + rcu_read_lock(); + request = rcu_dereference(b->first_signal); + if (request) + request = i915_gem_request_get_rcu(request); + rcu_read_unlock(); if (signal_complete(request)) { - /* Wake up all other completed waiters and select the - * next bottom-half for the next user interrupt. - */ - intel_engine_remove_wait(engine, - &request->signaling.wait); - local_bh_disable(); dma_fence_signal(&request->fence); local_bh_enable(); /* kick start the tasklets */ + spin_lock_irq(&b->rb_lock); + + /* Wake up all other completed waiters and select the + * next bottom-half for the next user interrupt. + */ + __intel_engine_remove_wait(engine, + &request->signaling.wait); + /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may * have installed an even older signal than the one * we just completed - so double check we are still * the oldest before picking the next one. */ - spin_lock_irq(&b->lock); - if (request == b->first_signal) { + if (request == rcu_access_pointer(b->first_signal)) { struct rb_node *rb = rb_next(&request->signaling.node); - b->first_signal = rb ? to_signaler(rb) : NULL; + rcu_assign_pointer(b->first_signal, + rb ? to_signaler(rb) : NULL); } rb_erase(&request->signaling.node, &b->signals); - spin_unlock_irq(&b->lock); + RB_CLEAR_NODE(&request->signaling.node); + + spin_unlock_irq(&b->rb_lock); i915_gem_request_put(request); } else { - if (kthread_should_stop()) + DEFINE_WAIT(exec); + + if (kthread_should_stop()) { + GEM_BUG_ON(request); break; + } + + if (request) + add_wait_queue(&request->execute, &exec); schedule(); + + if (request) + remove_wait_queue(&request->execute, &exec); + + if (kthread_should_park()) + kthread_parkme(); } + i915_gem_request_put(request); } while (1); __set_current_state(TASK_RUNNING); @@ -504,6 +638,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node *parent, **p; bool first, wakeup; + u32 seqno; /* Note that we may be called from an interrupt handler on another * device (e.g. nouveau signaling a fence completion causing us @@ -513,15 +648,19 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) */ /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ - assert_spin_locked(&request->lock); - if (!request->global_seqno) + GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&request->lock); + + seqno = i915_gem_request_global_seqno(request); + if (!seqno) return; request->signaling.wait.tsk = b->signaler; - request->signaling.wait.seqno = request->global_seqno; + request->signaling.wait.request = request; + request->signaling.wait.seqno = seqno; i915_gem_request_get(request); - spin_lock(&b->lock); + spin_lock(&b->rb_lock); /* First add ourselves into the list of waiters, but register our * bottom-half as the signaller thread. As per usual, only the oldest @@ -542,8 +681,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) p = &b->signals.rb_node; while (*p) { parent = *p; - if (i915_seqno_passed(request->global_seqno, - to_signaler(parent)->global_seqno)) { + if (i915_seqno_passed(seqno, + to_signaler(parent)->signaling.wait.seqno)) { p = &parent->rb_right; first = false; } else { @@ -553,20 +692,52 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) rb_link_node(&request->signaling.node, parent, p); rb_insert_color(&request->signaling.node, &b->signals); if (first) - smp_store_mb(b->first_signal, request); + rcu_assign_pointer(b->first_signal, request); - spin_unlock(&b->lock); + spin_unlock(&b->rb_lock); if (wakeup) wake_up_process(b->signaler); } +void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&request->lock); + GEM_BUG_ON(!request->signaling.wait.seqno); + + spin_lock(&b->rb_lock); + + if (!RB_EMPTY_NODE(&request->signaling.node)) { + if (request == rcu_access_pointer(b->first_signal)) { + struct rb_node *rb = + rb_next(&request->signaling.node); + rcu_assign_pointer(b->first_signal, + rb ? to_signaler(rb) : NULL); + } + rb_erase(&request->signaling.node, &b->signals); + RB_CLEAR_NODE(&request->signaling.node); + i915_gem_request_put(request); + } + + __intel_engine_remove_wait(engine, &request->signaling.wait); + + spin_unlock(&b->rb_lock); + + request->signaling.wait.seqno = 0; +} + int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct task_struct *tsk; - spin_lock_init(&b->lock); + spin_lock_init(&b->rb_lock); + spin_lock_init(&b->irq_lock); + setup_timer(&b->fake_irq, intel_breadcrumbs_fake_irq, (unsigned long)engine); @@ -604,20 +775,26 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; cancel_fake_irq(engine); - spin_lock_irq(&b->lock); + spin_lock_irq(&b->irq_lock); - __intel_breadcrumbs_disable_irq(b); - if (intel_engine_has_waiter(engine)) { - b->timeout = wait_timeout(); - __intel_breadcrumbs_enable_irq(b); - if (READ_ONCE(b->irq_posted)) - wake_up_process(b->first_wait->tsk); - } else { - /* sanitize the IMR and unmask any auxiliary interrupts */ + if (b->irq_enabled) + irq_enable(engine); + else irq_disable(engine); - } - spin_unlock_irq(&b->lock); + /* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the + * GPU is active and may have already executed the MI_USER_INTERRUPT + * before the CPU is ready to receive. However, the engine is currently + * idle (we haven't started it yet), there is no possibility for a + * missed interrupt as we enabled the irq and so we can clear the + * immediate wakeup (until a real interrupt arrives for the waiter). + */ + clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); + + if (b->irq_armed) + enable_fake_irq(b); + + spin_unlock_irq(&b->irq_lock); } void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) @@ -625,9 +802,9 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; /* The engines should be idle and all requests accounted for! */ - WARN_ON(READ_ONCE(b->first_wait)); + WARN_ON(READ_ONCE(b->irq_wait)); WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); - WARN_ON(READ_ONCE(b->first_signal)); + WARN_ON(rcu_access_pointer(b->first_signal)); WARN_ON(!RB_EMPTY_ROOT(&b->signals)); if (!IS_ERR_OR_NULL(b->signaler)) @@ -636,29 +813,28 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) cancel_fake_irq(engine); } -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915) +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int mask = 0; - - for_each_engine(engine, i915, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock_irq(&b->lock); + struct intel_breadcrumbs *b = &engine->breadcrumbs; + bool busy = false; - if (b->first_wait) { - wake_up_process(b->first_wait->tsk); - mask |= intel_engine_flag(engine); - } + spin_lock_irq(&b->rb_lock); - if (b->first_signal) { - wake_up_process(b->signaler); - mask |= intel_engine_flag(engine); - } + if (b->irq_wait) { + wake_up_process(b->irq_wait->tsk); + busy |= intel_engine_flag(engine); + } - spin_unlock_irq(&b->lock); + if (rcu_access_pointer(b->first_signal)) { + wake_up_process(b->signaler); + busy |= intel_engine_flag(engine); } - return mask; + spin_unlock_irq(&b->rb_lock); + + return busy; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_breadcrumbs.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c new file mode 100644 index 000000000000..d643c0c5321b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -0,0 +1,1891 @@ +/* + * Copyright © 2006-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "intel_drv.h" + +/** + * DOC: CDCLK / RAWCLK + * + * The display engine uses several different clocks to do its work. There + * are two main clocks involved that aren't directly related to the actual + * pixel clock or any symbol/bit clock of the actual output port. These + * are the core display clock (CDCLK) and RAWCLK. + * + * CDCLK clocks most of the display pipe logic, and thus its frequency + * must be high enough to support the rate at which pixels are flowing + * through the pipes. Downscaling must also be accounted as that increases + * the effective pixel rate. + * + * On several platforms the CDCLK frequency can be changed dynamically + * to minimize power consumption for a given display configuration. + * Typically changes to the CDCLK frequency require all the display pipes + * to be shut down while the frequency is being changed. + * + * On SKL+ the DMC will toggle the CDCLK off/on during DC5/6 entry/exit. + * DMC will not change the active CDCLK frequency however, so that part + * will still be performed by the driver directly. + * + * RAWCLK is a fixed frequency clock, often used by various auxiliary + * blocks such as AUX CH or backlight PWM. Hence the only thing we + * really need to know about RAWCLK is its frequency so that various + * dividers can be programmed correctly. + */ + +static void fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 133333; +} + +static void fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 200000; +} + +static void fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 266667; +} + +static void fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 333333; +} + +static void fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 400000; +} + +static void fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->cdclk = 450000; +} + +static void i85x_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 hpllcc = 0; + + /* + * 852GM/852GMV only supports 133 MHz and the HPLLCC + * encoding is different :( + * FIXME is this the right way to detect 852GM/852GMV? + */ + if (pdev->revision == 0x1) { + cdclk_state->cdclk = 133333; + return; + } + + pci_bus_read_config_word(pdev->bus, + PCI_DEVFN(0, 3), HPLLCC, &hpllcc); + + /* Assume that the hardware is in the high speed state. This + * should be the default. + */ + switch (hpllcc & GC_CLOCK_CONTROL_MASK) { + case GC_CLOCK_133_200: + case GC_CLOCK_133_200_2: + case GC_CLOCK_100_200: + cdclk_state->cdclk = 200000; + break; + case GC_CLOCK_166_250: + cdclk_state->cdclk = 250000; + break; + case GC_CLOCK_100_133: + cdclk_state->cdclk = 133333; + break; + case GC_CLOCK_133_266: + case GC_CLOCK_133_266_2: + case GC_CLOCK_166_266: + cdclk_state->cdclk = 266667; + break; + } +} + +static void i915gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) { + cdclk_state->cdclk = 133333; + return; + } + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_320_MHZ: + cdclk_state->cdclk = 333333; + break; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + cdclk_state->cdclk = 190000; + break; + } +} + +static void i945gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) { + cdclk_state->cdclk = 133333; + return; + } + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_320_MHZ: + cdclk_state->cdclk = 320000; + break; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + cdclk_state->cdclk = 200000; + break; + } +} + +static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) +{ + static const unsigned int blb_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + [4] = 6400000, + }; + static const unsigned int pnv_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + [4] = 2666667, + }; + static const unsigned int cl_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 6400000, + [4] = 3333333, + [5] = 3566667, + [6] = 4266667, + }; + static const unsigned int elk_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + }; + static const unsigned int ctg_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 6400000, + [4] = 2666667, + [5] = 4266667, + }; + const unsigned int *vco_table; + unsigned int vco; + uint8_t tmp = 0; + + /* FIXME other chipsets? */ + if (IS_GM45(dev_priv)) + vco_table = ctg_vco; + else if (IS_G4X(dev_priv)) + vco_table = elk_vco; + else if (IS_I965GM(dev_priv)) + vco_table = cl_vco; + else if (IS_PINEVIEW(dev_priv)) + vco_table = pnv_vco; + else if (IS_G33(dev_priv)) + vco_table = blb_vco; + else + return 0; + + tmp = I915_READ(IS_MOBILE(dev_priv) ? HPLLVCO_MOBILE : HPLLVCO); + + vco = vco_table[tmp & 0x7]; + if (vco == 0) + DRM_ERROR("Bad HPLL VCO (HPLLVCO=0x%02x)\n", tmp); + else + DRM_DEBUG_KMS("HPLL VCO %u kHz\n", vco); + + return vco; +} + +static void g33_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; + static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; + static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; + static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; + const uint8_t *div_table; + unsigned int cdclk_sel; + uint16_t tmp = 0; + + cdclk_state->vco = intel_hpll_vco(dev_priv); + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = (tmp >> 4) & 0x7; + + if (cdclk_sel >= ARRAY_SIZE(div_3200)) + goto fail; + + switch (cdclk_state->vco) { + case 3200000: + div_table = div_3200; + break; + case 4000000: + div_table = div_4000; + break; + case 4800000: + div_table = div_4800; + break; + case 5333333: + div_table = div_5333; + break; + default: + goto fail; + } + + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, + div_table[cdclk_sel]); + return; + +fail: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%08x\n", + cdclk_state->vco, tmp); + cdclk_state->cdclk = 190476; +} + +static void pnv_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_267_MHZ_PNV: + cdclk_state->cdclk = 266667; + break; + case GC_DISPLAY_CLOCK_333_MHZ_PNV: + cdclk_state->cdclk = 333333; + break; + case GC_DISPLAY_CLOCK_444_MHZ_PNV: + cdclk_state->cdclk = 444444; + break; + case GC_DISPLAY_CLOCK_200_MHZ_PNV: + cdclk_state->cdclk = 200000; + break; + default: + DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); + case GC_DISPLAY_CLOCK_133_MHZ_PNV: + cdclk_state->cdclk = 133333; + break; + case GC_DISPLAY_CLOCK_167_MHZ_PNV: + cdclk_state->cdclk = 166667; + break; + } +} + +static void i965gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + static const uint8_t div_3200[] = { 16, 10, 8 }; + static const uint8_t div_4000[] = { 20, 12, 10 }; + static const uint8_t div_5333[] = { 24, 16, 14 }; + const uint8_t *div_table; + unsigned int cdclk_sel; + uint16_t tmp = 0; + + cdclk_state->vco = intel_hpll_vco(dev_priv); + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = ((tmp >> 8) & 0x1f) - 1; + + if (cdclk_sel >= ARRAY_SIZE(div_3200)) + goto fail; + + switch (cdclk_state->vco) { + case 3200000: + div_table = div_3200; + break; + case 4000000: + div_table = div_4000; + break; + case 5333333: + div_table = div_5333; + break; + default: + goto fail; + } + + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, + div_table[cdclk_sel]); + return; + +fail: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%04x\n", + cdclk_state->vco, tmp); + cdclk_state->cdclk = 200000; +} + +static void gm45_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + unsigned int cdclk_sel; + uint16_t tmp = 0; + + cdclk_state->vco = intel_hpll_vco(dev_priv); + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = (tmp >> 12) & 0x1; + + switch (cdclk_state->vco) { + case 2666667: + case 4000000: + case 5333333: + cdclk_state->cdclk = cdclk_sel ? 333333 : 222222; + break; + case 3200000: + cdclk_state->cdclk = cdclk_sel ? 320000 : 228571; + break; + default: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u, CFGC=0x%04x\n", + cdclk_state->vco, tmp); + cdclk_state->cdclk = 222222; + break; + } +} + +static void hsw_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + cdclk_state->cdclk = 800000; + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + cdclk_state->cdclk = 450000; + else if (freq == LCPLL_CLK_FREQ_450) + cdclk_state->cdclk = 450000; + else if (IS_HSW_ULT(dev_priv)) + cdclk_state->cdclk = 337500; + else + cdclk_state->cdclk = 540000; +} + +static int vlv_calc_cdclk(struct drm_i915_private *dev_priv, + int max_pixclk) +{ + int freq_320 = (dev_priv->hpll_freq << 1) % 320000 != 0 ? + 333333 : 320000; + int limit = IS_CHERRYVIEW(dev_priv) ? 95 : 90; + + /* + * We seem to get an unstable or solid color picture at 200MHz. + * Not sure what's wrong. For now use 200MHz only when all pipes + * are off. + */ + if (!IS_CHERRYVIEW(dev_priv) && + max_pixclk > freq_320*limit/100) + return 400000; + else if (max_pixclk > 266667*limit/100) + return freq_320; + else if (max_pixclk > 0) + return 266667; + else + return 200000; +} + +static void vlv_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + cdclk_state->vco = vlv_get_hpll_vco(dev_priv); + cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk", + CCK_DISPLAY_CLOCK_CONTROL, + cdclk_state->vco); +} + +static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) +{ + unsigned int credits, default_credits; + + if (IS_CHERRYVIEW(dev_priv)) + default_credits = PFI_CREDIT(12); + else + default_credits = PFI_CREDIT(8); + + if (dev_priv->cdclk.hw.cdclk >= dev_priv->czclk_freq) { + /* CHV suggested value is 31 or 63 */ + if (IS_CHERRYVIEW(dev_priv)) + credits = PFI_CREDIT_63; + else + credits = PFI_CREDIT(15); + } else { + credits = default_credits; + } + + /* + * WA - write default credits before re-programming + * FIXME: should we also set the resend bit here? + */ + I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | + default_credits); + + I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | + credits | PFI_CREDIT_RESEND); + + /* + * FIXME is this guaranteed to clear + * immediately or should we poll for it? + */ + WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); +} + +static void vlv_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + u32 val, cmd; + + if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ + cmd = 2; + else if (cdclk == 266667) + cmd = 1; + else + cmd = 0; + + mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + val &= ~DSPFREQGUAR_MASK; + val |= (cmd << DSPFREQGUAR_SHIFT); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & + DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), + 50)) { + DRM_ERROR("timed out waiting for CDclk change\n"); + } + mutex_unlock(&dev_priv->rps.hw_lock); + + mutex_lock(&dev_priv->sb_lock); + + if (cdclk == 400000) { + u32 divider; + + divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, + cdclk) - 1; + + /* adjust cdclk divider */ + val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); + val &= ~CCK_FREQUENCY_VALUES; + val |= divider; + vlv_cck_write(dev_priv, CCK_DISPLAY_CLOCK_CONTROL, val); + + if (wait_for((vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL) & + CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), + 50)) + DRM_ERROR("timed out waiting for CDclk change\n"); + } + + /* adjust self-refresh exit latency value */ + val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC); + val &= ~0x7f; + + /* + * For high bandwidth configs, we set a higher latency in the bunit + * so that the core display fetch happens in time to avoid underruns. + */ + if (cdclk == 400000) + val |= 4500 / 250; /* 4.5 usec */ + else + val |= 3000 / 250; /* 3.0 usec */ + vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); + + mutex_unlock(&dev_priv->sb_lock); + + intel_update_cdclk(dev_priv); + + vlv_program_pfi_credits(dev_priv); +} + +static void chv_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + u32 val, cmd; + + switch (cdclk) { + case 333333: + case 320000: + case 266667: + case 200000: + break; + default: + MISSING_CASE(cdclk); + return; + } + + /* + * Specs are full of misinformation, but testing on actual + * hardware has shown that we just need to write the desired + * CCK divider into the Punit register. + */ + cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; + + mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + val &= ~DSPFREQGUAR_MASK_CHV; + val |= (cmd << DSPFREQGUAR_SHIFT_CHV); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & + DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), + 50)) { + DRM_ERROR("timed out waiting for CDclk change\n"); + } + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_update_cdclk(dev_priv); + + vlv_program_pfi_credits(dev_priv); +} + +static int bdw_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 540000) + return 675000; + else if (max_pixclk > 450000) + return 540000; + else if (max_pixclk > 337500) + return 450000; + else + return 337500; +} + +static void bdw_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + cdclk_state->cdclk = 800000; + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + cdclk_state->cdclk = 450000; + else if (freq == LCPLL_CLK_FREQ_450) + cdclk_state->cdclk = 450000; + else if (freq == LCPLL_CLK_FREQ_54O_BDW) + cdclk_state->cdclk = 540000; + else if (freq == LCPLL_CLK_FREQ_337_5_BDW) + cdclk_state->cdclk = 337500; + else + cdclk_state->cdclk = 675000; +} + +static void bdw_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + uint32_t val, data; + int ret; + + if (WARN((I915_READ(LCPLL_CTL) & + (LCPLL_PLL_DISABLE | LCPLL_PLL_LOCK | + LCPLL_CD_CLOCK_DISABLE | LCPLL_ROOT_CD_CLOCK_DISABLE | + LCPLL_CD2X_CLOCK_DISABLE | LCPLL_POWER_DOWN_ALLOW | + LCPLL_CD_SOURCE_FCLK)) != LCPLL_PLL_LOCK, + "trying to change cdclk frequency with cdclk not enabled\n")) + return; + + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, + BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("failed to inform pcode about cdclk change\n"); + return; + } + + val = I915_READ(LCPLL_CTL); + val |= LCPLL_CD_SOURCE_FCLK; + I915_WRITE(LCPLL_CTL, val); + + if (wait_for_us(I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE, 1)) + DRM_ERROR("Switching to FCLK failed\n"); + + val = I915_READ(LCPLL_CTL); + val &= ~LCPLL_CLK_FREQ_MASK; + + switch (cdclk) { + case 450000: + val |= LCPLL_CLK_FREQ_450; + data = 0; + break; + case 540000: + val |= LCPLL_CLK_FREQ_54O_BDW; + data = 1; + break; + case 337500: + val |= LCPLL_CLK_FREQ_337_5_BDW; + data = 2; + break; + case 675000: + val |= LCPLL_CLK_FREQ_675_BDW; + data = 3; + break; + default: + WARN(1, "invalid cdclk frequency\n"); + return; + } + + I915_WRITE(LCPLL_CTL, val); + + val = I915_READ(LCPLL_CTL); + val &= ~LCPLL_CD_SOURCE_FCLK; + I915_WRITE(LCPLL_CTL, val); + + if (wait_for_us((I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) + DRM_ERROR("Switching back to LCPLL failed\n"); + + mutex_lock(&dev_priv->rps.hw_lock); + sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); + mutex_unlock(&dev_priv->rps.hw_lock); + + I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); + + intel_update_cdclk(dev_priv); + + WARN(cdclk != dev_priv->cdclk.hw.cdclk, + "cdclk requested %d kHz but got %d kHz\n", + cdclk, dev_priv->cdclk.hw.cdclk); +} + +static int skl_calc_cdclk(int max_pixclk, int vco) +{ + if (vco == 8640000) { + if (max_pixclk > 540000) + return 617143; + else if (max_pixclk > 432000) + return 540000; + else if (max_pixclk > 308571) + return 432000; + else + return 308571; + } else { + if (max_pixclk > 540000) + return 675000; + else if (max_pixclk > 450000) + return 540000; + else if (max_pixclk > 337500) + return 450000; + else + return 337500; + } +} + +static void skl_dpll0_update(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 val; + + cdclk_state->ref = 24000; + cdclk_state->vco = 0; + + val = I915_READ(LCPLL1_CTL); + if ((val & LCPLL_PLL_ENABLE) == 0) + return; + + if (WARN_ON((val & LCPLL_PLL_LOCK) == 0)) + return; + + val = I915_READ(DPLL_CTRL1); + + if (WARN_ON((val & (DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | + DPLL_CTRL1_SSC(SKL_DPLL0) | + DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) != + DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) + return; + + switch (val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)) { + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0): + cdclk_state->vco = 8100000; + break; + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0): + cdclk_state->vco = 8640000; + break; + default: + MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); + break; + } +} + +static void skl_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 cdctl; + + skl_dpll0_update(dev_priv, cdclk_state); + + cdclk_state->cdclk = cdclk_state->ref; + + if (cdclk_state->vco == 0) + return; + + cdctl = I915_READ(CDCLK_CTL); + + if (cdclk_state->vco == 8640000) { + switch (cdctl & CDCLK_FREQ_SEL_MASK) { + case CDCLK_FREQ_450_432: + cdclk_state->cdclk = 432000; + break; + case CDCLK_FREQ_337_308: + cdclk_state->cdclk = 308571; + break; + case CDCLK_FREQ_540: + cdclk_state->cdclk = 540000; + break; + case CDCLK_FREQ_675_617: + cdclk_state->cdclk = 617143; + break; + default: + MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + break; + } + } else { + switch (cdctl & CDCLK_FREQ_SEL_MASK) { + case CDCLK_FREQ_450_432: + cdclk_state->cdclk = 450000; + break; + case CDCLK_FREQ_337_308: + cdclk_state->cdclk = 337500; + break; + case CDCLK_FREQ_540: + cdclk_state->cdclk = 540000; + break; + case CDCLK_FREQ_675_617: + cdclk_state->cdclk = 675000; + break; + default: + MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + break; + } + } +} + +/* convert from kHz to .1 fixpoint MHz with -1MHz offset */ +static int skl_cdclk_decimal(int cdclk) +{ + return DIV_ROUND_CLOSEST(cdclk - 1000, 500); +} + +static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, + int vco) +{ + bool changed = dev_priv->skl_preferred_vco_freq != vco; + + dev_priv->skl_preferred_vco_freq = vco; + + if (changed) + intel_update_max_cdclk(dev_priv); +} + +static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) +{ + int min_cdclk = skl_calc_cdclk(0, vco); + u32 val; + + WARN_ON(vco != 8100000 && vco != 8640000); + + /* select the minimum CDCLK before enabling DPLL 0 */ + val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); + I915_WRITE(CDCLK_CTL, val); + POSTING_READ(CDCLK_CTL); + + /* + * We always enable DPLL0 with the lowest link rate possible, but still + * taking into account the VCO required to operate the eDP panel at the + * desired frequency. The usual DP link rates operate with a VCO of + * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640. + * The modeset code is responsible for the selection of the exact link + * rate later on, with the constraint of choosing a frequency that + * works with vco. + */ + val = I915_READ(DPLL_CTRL1); + + val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | + DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); + val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); + if (vco == 8640000) + val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, + SKL_DPLL0); + else + val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, + SKL_DPLL0); + + I915_WRITE(DPLL_CTRL1, val); + POSTING_READ(DPLL_CTRL1); + + I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE); + + if (intel_wait_for_register(dev_priv, + LCPLL1_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, + 5)) + DRM_ERROR("DPLL0 not locked\n"); + + dev_priv->cdclk.hw.vco = vco; + + /* We'll want to keep using the current vco from now on. */ + skl_set_preferred_cdclk_vco(dev_priv, vco); +} + +static void skl_dpll0_disable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); + if (intel_wait_for_register(dev_priv, + LCPLL1_CTL, LCPLL_PLL_LOCK, 0, + 1)) + DRM_ERROR("Couldn't disable DPLL0\n"); + + dev_priv->cdclk.hw.vco = 0; +} + +static void skl_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + int vco = cdclk_state->vco; + u32 freq_select, pcu_ack; + int ret; + + WARN_ON((cdclk == 24000) != (vco == 0)); + + mutex_lock(&dev_priv->rps.hw_lock); + ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", + ret); + return; + } + + /* set CDCLK_CTL */ + switch (cdclk) { + case 450000: + case 432000: + freq_select = CDCLK_FREQ_450_432; + pcu_ack = 1; + break; + case 540000: + freq_select = CDCLK_FREQ_540; + pcu_ack = 2; + break; + case 308571: + case 337500: + default: + freq_select = CDCLK_FREQ_337_308; + pcu_ack = 0; + break; + case 617143: + case 675000: + freq_select = CDCLK_FREQ_675_617; + pcu_ack = 3; + break; + } + + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + skl_dpll0_disable(dev_priv); + + if (dev_priv->cdclk.hw.vco != vco) + skl_dpll0_enable(dev_priv, vco); + + I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); + POSTING_READ(CDCLK_CTL); + + /* inform PCU of the change */ + mutex_lock(&dev_priv->rps.hw_lock); + sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_update_cdclk(dev_priv); +} + +static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) +{ + uint32_t cdctl, expected; + + /* + * check if the pre-os initialized the display + * There is SWF18 scratchpad register defined which is set by the + * pre-os which can be used by the OS drivers to check the status + */ + if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0) + goto sanitize; + + intel_update_cdclk(dev_priv); + /* Is PLL enabled and locked ? */ + if (dev_priv->cdclk.hw.vco == 0 || + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) + goto sanitize; + + /* DPLL okay; verify the cdclock + * + * Noticed in some instances that the freq selection is correct but + * decimal part is programmed wrong from BIOS where pre-os does not + * enable display. Verify the same as well. + */ + cdctl = I915_READ(CDCLK_CTL); + expected = (cdctl & CDCLK_FREQ_SEL_MASK) | + skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; + +sanitize: + DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + + /* force cdclk programming */ + dev_priv->cdclk.hw.cdclk = 0; + /* force full PLL disable + enable */ + dev_priv->cdclk.hw.vco = -1; +} + +/** + * skl_init_cdclk - Initialize CDCLK on SKL + * @dev_priv: i915 device + * + * Initialize CDCLK for SKL and derivatives. This is generally + * done only during the display core initialization sequence, + * after which the DMC will take care of turning CDCLK off/on + * as needed. + */ +void skl_init_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state; + + skl_sanitize_cdclk(dev_priv); + + if (dev_priv->cdclk.hw.cdclk != 0 && + dev_priv->cdclk.hw.vco != 0) { + /* + * Use the current vco as our initial + * guess as to what the preferred vco is. + */ + if (dev_priv->skl_preferred_vco_freq == 0) + skl_set_preferred_cdclk_vco(dev_priv, + dev_priv->cdclk.hw.vco); + return; + } + + cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.vco = dev_priv->skl_preferred_vco_freq; + if (cdclk_state.vco == 0) + cdclk_state.vco = 8100000; + cdclk_state.cdclk = skl_calc_cdclk(0, cdclk_state.vco); + + skl_set_cdclk(dev_priv, &cdclk_state); +} + +/** + * skl_uninit_cdclk - Uninitialize CDCLK on SKL + * @dev_priv: i915 device + * + * Uninitialize CDCLK for SKL and derivatives. This is done only + * during the display core uninitialization sequence. + */ +void skl_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.vco = 0; + + skl_set_cdclk(dev_priv, &cdclk_state); +} + +static int bxt_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 576000) + return 624000; + else if (max_pixclk > 384000) + return 576000; + else if (max_pixclk > 288000) + return 384000; + else if (max_pixclk > 144000) + return 288000; + else + return 144000; +} + +static int glk_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 2 * 158400) + return 316800; + else if (max_pixclk > 2 * 79200) + return 158400; + else + return 79200; +} + +static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +{ + int ratio; + + if (cdclk == dev_priv->cdclk.hw.ref) + return 0; + + switch (cdclk) { + default: + MISSING_CASE(cdclk); + case 144000: + case 288000: + case 384000: + case 576000: + ratio = 60; + break; + case 624000: + ratio = 65; + break; + } + + return dev_priv->cdclk.hw.ref * ratio; +} + +static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +{ + int ratio; + + if (cdclk == dev_priv->cdclk.hw.ref) + return 0; + + switch (cdclk) { + default: + MISSING_CASE(cdclk); + case 79200: + case 158400: + case 316800: + ratio = 33; + break; + } + + return dev_priv->cdclk.hw.ref * ratio; +} + +static void bxt_de_pll_update(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 val; + + cdclk_state->ref = 19200; + cdclk_state->vco = 0; + + val = I915_READ(BXT_DE_PLL_ENABLE); + if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) + return; + + if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) + return; + + val = I915_READ(BXT_DE_PLL_CTL); + cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; +} + +static void bxt_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 divider; + int div; + + bxt_de_pll_update(dev_priv, cdclk_state); + + cdclk_state->cdclk = cdclk_state->ref; + + if (cdclk_state->vco == 0) + return; + + divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; + + switch (divider) { + case BXT_CDCLK_CD2X_DIV_SEL_1: + div = 2; + break; + case BXT_CDCLK_CD2X_DIV_SEL_1_5: + WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + div = 3; + break; + case BXT_CDCLK_CD2X_DIV_SEL_2: + div = 4; + break; + case BXT_CDCLK_CD2X_DIV_SEL_4: + div = 8; + break; + default: + MISSING_CASE(divider); + return; + } + + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); +} + +static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(BXT_DE_PLL_ENABLE, 0); + + /* Timeout 200us */ + if (intel_wait_for_register(dev_priv, + BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 0, + 1)) + DRM_ERROR("timeout waiting for DE PLL unlock\n"); + + dev_priv->cdclk.hw.vco = 0; +} + +static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) +{ + int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); + u32 val; + + val = I915_READ(BXT_DE_PLL_CTL); + val &= ~BXT_DE_PLL_RATIO_MASK; + val |= BXT_DE_PLL_RATIO(ratio); + I915_WRITE(BXT_DE_PLL_CTL, val); + + I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); + + /* Timeout 200us */ + if (intel_wait_for_register(dev_priv, + BXT_DE_PLL_ENABLE, + BXT_DE_PLL_LOCK, + BXT_DE_PLL_LOCK, + 1)) + DRM_ERROR("timeout waiting for DE PLL lock\n"); + + dev_priv->cdclk.hw.vco = vco; +} + +static void bxt_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + int vco = cdclk_state->vco; + u32 val, divider; + int ret; + + /* cdclk = vco / 2 / div{1,1.5,2,4} */ + switch (DIV_ROUND_CLOSEST(vco, cdclk)) { + case 8: + divider = BXT_CDCLK_CD2X_DIV_SEL_4; + break; + case 4: + divider = BXT_CDCLK_CD2X_DIV_SEL_2; + break; + case 3: + WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; + break; + case 2: + divider = BXT_CDCLK_CD2X_DIV_SEL_1; + break; + default: + WARN_ON(cdclk != dev_priv->cdclk.hw.ref); + WARN_ON(vco != 0); + + divider = BXT_CDCLK_CD2X_DIV_SEL_1; + break; + } + + /* Inform power controller of upcoming frequency change */ + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + 0x80000000); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret) { + DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", + ret, cdclk); + return; + } + + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + bxt_de_pll_disable(dev_priv); + + if (dev_priv->cdclk.hw.vco != vco) + bxt_de_pll_enable(dev_priv, vco); + + val = divider | skl_cdclk_decimal(cdclk); + /* + * FIXME if only the cd2x divider needs changing, it could be done + * without shutting off the pipe (if only one pipe is active). + */ + val |= BXT_CDCLK_CD2X_PIPE_NONE; + /* + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. + */ + if (cdclk >= 500000) + val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; + I915_WRITE(CDCLK_CTL, val); + + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + DIV_ROUND_UP(cdclk, 25000)); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret) { + DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", + ret, cdclk); + return; + } + + intel_update_cdclk(dev_priv); +} + +static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) +{ + u32 cdctl, expected; + + intel_update_cdclk(dev_priv); + + if (dev_priv->cdclk.hw.vco == 0 || + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) + goto sanitize; + + /* DPLL okay; verify the cdclock + * + * Some BIOS versions leave an incorrect decimal frequency value and + * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, + * so sanitize this register. + */ + cdctl = I915_READ(CDCLK_CTL); + /* + * Let's ignore the pipe field, since BIOS could have configured the + * dividers both synching to an active pipe, or asynchronously + * (PIPE_NONE). + */ + cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; + + expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | + skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); + /* + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. + */ + if (dev_priv->cdclk.hw.cdclk >= 500000) + expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; + + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; + +sanitize: + DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + + /* force cdclk programming */ + dev_priv->cdclk.hw.cdclk = 0; + + /* force full PLL disable + enable */ + dev_priv->cdclk.hw.vco = -1; +} + +/** + * bxt_init_cdclk - Initialize CDCLK on BXT + * @dev_priv: i915 device + * + * Initialize CDCLK for BXT and derivatives. This is generally + * done only during the display core initialization sequence, + * after which the DMC will take care of turning CDCLK off/on + * as needed. + */ +void bxt_init_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state; + + bxt_sanitize_cdclk(dev_priv); + + if (dev_priv->cdclk.hw.cdclk != 0 && + dev_priv->cdclk.hw.vco != 0) + return; + + cdclk_state = dev_priv->cdclk.hw; + + /* + * FIXME: + * - The initial CDCLK needs to be read from VBT. + * Need to make this change after VBT has changes for BXT. + */ + if (IS_GEMINILAKE(dev_priv)) { + cdclk_state.cdclk = glk_calc_cdclk(0); + cdclk_state.vco = glk_de_pll_vco(dev_priv, cdclk_state.cdclk); + } else { + cdclk_state.cdclk = bxt_calc_cdclk(0); + cdclk_state.vco = bxt_de_pll_vco(dev_priv, cdclk_state.cdclk); + } + + bxt_set_cdclk(dev_priv, &cdclk_state); +} + +/** + * bxt_uninit_cdclk - Uninitialize CDCLK on BXT + * @dev_priv: i915 device + * + * Uninitialize CDCLK for BXT and derivatives. This is done only + * during the display core uninitialization sequence. + */ +void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.vco = 0; + + bxt_set_cdclk(dev_priv, &cdclk_state); +} + +/** + * intel_cdclk_state_compare - Determine if two CDCLK states differ + * @a: first CDCLK state + * @b: second CDCLK state + * + * Returns: + * True if the CDCLK states are identical, false if they differ. + */ +bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) +{ + return memcmp(a, b, sizeof(*a)) == 0; +} + +/** + * intel_set_cdclk - Push the CDCLK state to the hardware + * @dev_priv: i915 device + * @cdclk_state: new CDCLK state + * + * Program the hardware based on the passed in CDCLK state, + * if necessary. + */ +void intel_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + if (intel_cdclk_state_compare(&dev_priv->cdclk.hw, cdclk_state)) + return; + + if (WARN_ON_ONCE(!dev_priv->display.set_cdclk)) + return; + + DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz, VCO %d kHz, ref %d kHz\n", + cdclk_state->cdclk, cdclk_state->vco, + cdclk_state->ref); + + dev_priv->display.set_cdclk(dev_priv, cdclk_state); +} + +static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, + int pixel_rate) +{ + struct drm_i915_private *dev_priv = + to_i915(crtc_state->base.crtc->dev); + + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) + pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); + + /* BSpec says "Do not use DisplayPort with CDCLK less than + * 432 MHz, audio enabled, port width x4, and link rate + * HBR2 (5.4 GHz), or else there may be audio corruption or + * screen corruption." + */ + if (intel_crtc_has_dp_encoder(crtc_state) && + crtc_state->has_audio && + crtc_state->port_clock >= 540000 && + crtc_state->lane_count == 4) + pixel_rate = max(432000, pixel_rate); + + return pixel_rate; +} + +/* compute the max rate for new configuration */ +static int intel_max_pixel_rate(struct drm_atomic_state *state) +{ + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(state->dev); + struct drm_crtc *crtc; + struct drm_crtc_state *cstate; + struct intel_crtc_state *crtc_state; + unsigned int max_pixel_rate = 0, i; + enum pipe pipe; + + memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, + sizeof(intel_state->min_pixclk)); + + for_each_crtc_in_state(state, crtc, cstate, i) { + int pixel_rate; + + crtc_state = to_intel_crtc_state(cstate); + if (!crtc_state->base.enable) { + intel_state->min_pixclk[i] = 0; + continue; + } + + pixel_rate = crtc_state->pixel_rate; + + if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) + pixel_rate = + bdw_adjust_min_pipe_pixel_rate(crtc_state, + pixel_rate); + + intel_state->min_pixclk[i] = pixel_rate; + } + + for_each_pipe(dev_priv, pipe) + max_pixel_rate = max(intel_state->min_pixclk[pipe], + max_pixel_rate); + + return max_pixel_rate; +} + +static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + int max_pixclk = intel_max_pixel_rate(state); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(state); + int cdclk; + + cdclk = vlv_calc_cdclk(dev_priv, max_pixclk); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = vlv_calc_cdclk(dev_priv, 0); + + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + +static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + int max_pixclk = intel_max_pixel_rate(state); + int cdclk; + + /* + * FIXME should also account for plane ratio + * once 64bpp pixel formats are supported. + */ + cdclk = bdw_calc_cdclk(max_pixclk); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = bdw_calc_cdclk(0); + + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + +static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(state->dev); + const int max_pixclk = intel_max_pixel_rate(state); + int cdclk, vco; + + vco = intel_state->cdclk.logical.vco; + if (!vco) + vco = dev_priv->skl_preferred_vco_freq; + + /* + * FIXME should also account for plane ratio + * once 64bpp pixel formats are supported. + */ + cdclk = skl_calc_cdclk(max_pixclk, vco); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.vco = vco; + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = skl_calc_cdclk(0, vco); + + intel_state->cdclk.actual.vco = vco; + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + +static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + int max_pixclk = intel_max_pixel_rate(state); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(state); + int cdclk, vco; + + if (IS_GEMINILAKE(dev_priv)) { + cdclk = glk_calc_cdclk(max_pixclk); + vco = glk_de_pll_vco(dev_priv, cdclk); + } else { + cdclk = bxt_calc_cdclk(max_pixclk); + vco = bxt_de_pll_vco(dev_priv, cdclk); + } + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.vco = vco; + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + if (IS_GEMINILAKE(dev_priv)) { + cdclk = glk_calc_cdclk(0); + vco = glk_de_pll_vco(dev_priv, cdclk); + } else { + cdclk = bxt_calc_cdclk(0); + vco = bxt_de_pll_vco(dev_priv, cdclk); + } + + intel_state->cdclk.actual.vco = vco; + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + +static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) +{ + int max_cdclk_freq = dev_priv->max_cdclk_freq; + + if (IS_GEMINILAKE(dev_priv)) + return 2 * max_cdclk_freq; + else if (INTEL_INFO(dev_priv)->gen >= 9 || + IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + return max_cdclk_freq; + else if (IS_CHERRYVIEW(dev_priv)) + return max_cdclk_freq*95/100; + else if (INTEL_INFO(dev_priv)->gen < 4) + return 2*max_cdclk_freq*90/100; + else + return max_cdclk_freq*90/100; +} + +/** + * intel_update_max_cdclk - Determine the maximum support CDCLK frequency + * @dev_priv: i915 device + * + * Determine the maximum CDCLK frequency the platform supports, and also + * derive the maximum dot clock frequency the maximum CDCLK frequency + * allows. + */ +void intel_update_max_cdclk(struct drm_i915_private *dev_priv) +{ + if (IS_GEN9_BC(dev_priv)) { + u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; + int max_cdclk, vco; + + vco = dev_priv->skl_preferred_vco_freq; + WARN_ON(vco != 8100000 && vco != 8640000); + + /* + * Use the lower (vco 8640) cdclk values as a + * first guess. skl_calc_cdclk() will correct it + * if the preferred vco is 8100 instead. + */ + if (limit == SKL_DFSM_CDCLK_LIMIT_675) + max_cdclk = 617143; + else if (limit == SKL_DFSM_CDCLK_LIMIT_540) + max_cdclk = 540000; + else if (limit == SKL_DFSM_CDCLK_LIMIT_450) + max_cdclk = 432000; + else + max_cdclk = 308571; + + dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco); + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->max_cdclk_freq = 316800; + } else if (IS_BROXTON(dev_priv)) { + dev_priv->max_cdclk_freq = 624000; + } else if (IS_BROADWELL(dev_priv)) { + /* + * FIXME with extra cooling we can allow + * 540 MHz for ULX and 675 Mhz for ULT. + * How can we know if extra cooling is + * available? PCI ID, VTB, something else? + */ + if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + dev_priv->max_cdclk_freq = 450000; + else if (IS_BDW_ULX(dev_priv)) + dev_priv->max_cdclk_freq = 450000; + else if (IS_BDW_ULT(dev_priv)) + dev_priv->max_cdclk_freq = 540000; + else + dev_priv->max_cdclk_freq = 675000; + } else if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->max_cdclk_freq = 320000; + } else if (IS_VALLEYVIEW(dev_priv)) { + dev_priv->max_cdclk_freq = 400000; + } else { + /* otherwise assume cdclk is fixed */ + dev_priv->max_cdclk_freq = dev_priv->cdclk.hw.cdclk; + } + + dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); + + DRM_DEBUG_DRIVER("Max CD clock rate: %d kHz\n", + dev_priv->max_cdclk_freq); + + DRM_DEBUG_DRIVER("Max dotclock rate: %d kHz\n", + dev_priv->max_dotclk_freq); +} + +/** + * intel_update_cdclk - Determine the current CDCLK frequency + * @dev_priv: i915 device + * + * Determine the current CDCLK frequency. + */ +void intel_update_cdclk(struct drm_i915_private *dev_priv) +{ + dev_priv->display.get_cdclk(dev_priv, &dev_priv->cdclk.hw); + + DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", + dev_priv->cdclk.hw.cdclk, dev_priv->cdclk.hw.vco, + dev_priv->cdclk.hw.ref); + + /* + * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): + * Programmng [sic] note: bit[9:2] should be programmed to the number + * of cdclk that generates 4MHz reference clock freq which is used to + * generate GMBus clock. This will vary with the cdclk freq. + */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + I915_WRITE(GMBUSFREQ_VLV, + DIV_ROUND_UP(dev_priv->cdclk.hw.cdclk, 1000)); +} + +static int pch_rawclk(struct drm_i915_private *dev_priv) +{ + return (I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; +} + +static int vlv_hrawclk(struct drm_i915_private *dev_priv) +{ + /* RAWCLK_FREQ_VLV register updated from power well code */ + return vlv_get_cck_clock_hpll(dev_priv, "hrawclk", + CCK_DISPLAY_REF_CLOCK_CONTROL); +} + +static int g4x_hrawclk(struct drm_i915_private *dev_priv) +{ + uint32_t clkcfg; + + /* hrawclock is 1/4 the FSB frequency */ + clkcfg = I915_READ(CLKCFG); + switch (clkcfg & CLKCFG_FSB_MASK) { + case CLKCFG_FSB_400: + return 100000; + case CLKCFG_FSB_533: + return 133333; + case CLKCFG_FSB_667: + return 166667; + case CLKCFG_FSB_800: + return 200000; + case CLKCFG_FSB_1067: + return 266667; + case CLKCFG_FSB_1333: + return 333333; + /* these two are just a guess; one of them might be right */ + case CLKCFG_FSB_1600: + case CLKCFG_FSB_1600_ALT: + return 400000; + default: + return 133333; + } +} + +/** + * intel_update_rawclk - Determine the current RAWCLK frequency + * @dev_priv: i915 device + * + * Determine the current RAWCLK frequency. RAWCLK is a fixed + * frequency clock so this needs to done only once. + */ +void intel_update_rawclk(struct drm_i915_private *dev_priv) +{ + if (HAS_PCH_SPLIT(dev_priv)) + dev_priv->rawclk_freq = pch_rawclk(dev_priv); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->rawclk_freq = vlv_hrawclk(dev_priv); + else if (IS_G4X(dev_priv) || IS_PINEVIEW(dev_priv)) + dev_priv->rawclk_freq = g4x_hrawclk(dev_priv); + else + /* no rawclk on other platforms, or no need to know it */ + return; + + DRM_DEBUG_DRIVER("rawclk rate: %d kHz\n", dev_priv->rawclk_freq); +} + +/** + * intel_init_cdclk_hooks - Initialize CDCLK related modesetting hooks + * @dev_priv: i915 device + */ +void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) +{ + if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->display.set_cdclk = chv_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + vlv_modeset_calc_cdclk; + } else if (IS_VALLEYVIEW(dev_priv)) { + dev_priv->display.set_cdclk = vlv_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + vlv_modeset_calc_cdclk; + } else if (IS_BROADWELL(dev_priv)) { + dev_priv->display.set_cdclk = bdw_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + bdw_modeset_calc_cdclk; + } else if (IS_GEN9_LP(dev_priv)) { + dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + bxt_modeset_calc_cdclk; + } else if (IS_GEN9_BC(dev_priv)) { + dev_priv->display.set_cdclk = skl_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + skl_modeset_calc_cdclk; + } + + if (IS_GEN9_BC(dev_priv)) + dev_priv->display.get_cdclk = skl_get_cdclk; + else if (IS_GEN9_LP(dev_priv)) + dev_priv->display.get_cdclk = bxt_get_cdclk; + else if (IS_BROADWELL(dev_priv)) + dev_priv->display.get_cdclk = bdw_get_cdclk; + else if (IS_HASWELL(dev_priv)) + dev_priv->display.get_cdclk = hsw_get_cdclk; + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->display.get_cdclk = vlv_get_cdclk; + else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_GEN5(dev_priv)) + dev_priv->display.get_cdclk = fixed_450mhz_get_cdclk; + else if (IS_GM45(dev_priv)) + dev_priv->display.get_cdclk = gm45_get_cdclk; + else if (IS_G4X(dev_priv)) + dev_priv->display.get_cdclk = g33_get_cdclk; + else if (IS_I965GM(dev_priv)) + dev_priv->display.get_cdclk = i965gm_get_cdclk; + else if (IS_I965G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_PINEVIEW(dev_priv)) + dev_priv->display.get_cdclk = pnv_get_cdclk; + else if (IS_G33(dev_priv)) + dev_priv->display.get_cdclk = g33_get_cdclk; + else if (IS_I945GM(dev_priv)) + dev_priv->display.get_cdclk = i945gm_get_cdclk; + else if (IS_I945G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_I915GM(dev_priv)) + dev_priv->display.get_cdclk = i915gm_get_cdclk; + else if (IS_I915G(dev_priv)) + dev_priv->display.get_cdclk = fixed_333mhz_get_cdclk; + else if (IS_I865G(dev_priv)) + dev_priv->display.get_cdclk = fixed_266mhz_get_cdclk; + else if (IS_I85X(dev_priv)) + dev_priv->display.get_cdclk = i85x_get_cdclk; + else if (IS_I845G(dev_priv)) + dev_priv->display.get_cdclk = fixed_200mhz_get_cdclk; + else { /* 830 */ + WARN(!IS_I830(dev_priv), + "Unknown platform. Assuming 133 MHz CDCLK\n"); + dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; + } +} diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index d81232b79f00..b9e5266d933b 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -340,20 +340,12 @@ static void haswell_load_luts(struct drm_crtc_state *crtc_state) hsw_enable_ips(intel_crtc); } -/* Loads the palette/gamma unit for the CRTC on Broadwell+. */ -static void broadwell_load_luts(struct drm_crtc_state *state) +static void bdw_load_degamma_lut(struct drm_crtc_state *state) { - struct drm_crtc *crtc = state->crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc_state *intel_state = to_intel_crtc_state(state); - enum pipe pipe = to_intel_crtc(crtc)->pipe; + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; - if (crtc_state_is_legacy(state)) { - haswell_load_luts(state); - return; - } - I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT); @@ -377,6 +369,20 @@ static void broadwell_load_luts(struct drm_crtc_state *state) (v << 20) | (v << 10) | v); } } +} + +static void bdw_load_gamma_lut(struct drm_crtc_state *state, u32 offset) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + + WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK); + + I915_WRITE(PREC_PAL_INDEX(pipe), + (offset ? PAL_PREC_SPLIT_MODE : 0) | + PAL_PREC_AUTO_INCREMENT | + offset); if (state->gamma_lut) { struct drm_color_lut *lut = @@ -410,6 +416,23 @@ static void broadwell_load_luts(struct drm_crtc_state *state) I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16) - 1); I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16) - 1); } +} + +/* Loads the palette/gamma unit for the CRTC on Broadwell+. */ +static void broadwell_load_luts(struct drm_crtc_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + struct intel_crtc_state *intel_state = to_intel_crtc_state(state); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + + if (crtc_state_is_legacy(state)) { + haswell_load_luts(state); + return; + } + + bdw_load_degamma_lut(state); + bdw_load_gamma_lut(state, + INTEL_INFO(dev_priv)->color.degamma_lut_size); intel_state->gamma_mode = GAMMA_MODE_MODE_SPLIT; I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_SPLIT); @@ -422,6 +445,58 @@ static void broadwell_load_luts(struct drm_crtc_state *state) I915_WRITE(PREC_PAL_INDEX(pipe), 0); } +static void glk_load_degamma_lut(struct drm_crtc_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + const uint32_t lut_size = 33; + uint32_t i; + + /* + * When setting the auto-increment bit, the hardware seems to + * ignore the index bits, so we need to reset it to index 0 + * separately. + */ + I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), 0); + I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), PRE_CSC_GAMC_AUTO_INCREMENT); + + /* + * FIXME: The pipe degamma table in geminilake doesn't support + * different values per channel, so this just loads a linear table. + */ + for (i = 0; i < lut_size; i++) { + uint32_t v = (i * ((1 << 16) - 1)) / (lut_size - 1); + + I915_WRITE(PRE_CSC_GAMC_DATA(pipe), v); + } + + /* Clamp values > 1.0. */ + while (i++ < 35) + I915_WRITE(PRE_CSC_GAMC_DATA(pipe), (1 << 16) - 1); +} + +static void glk_load_luts(struct drm_crtc_state *state) +{ + struct drm_crtc *crtc = state->crtc; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc_state *intel_state = to_intel_crtc_state(state); + enum pipe pipe = to_intel_crtc(crtc)->pipe; + + glk_load_degamma_lut(state); + + if (crtc_state_is_legacy(state)) { + haswell_load_luts(state); + return; + } + + bdw_load_gamma_lut(state, 0); + + intel_state->gamma_mode = GAMMA_MODE_MODE_10BIT; + I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_10BIT); + POSTING_READ(GAMMA_MODE(pipe)); +} + /* Loads the palette/gamma unit for the CRTC on CherryView. */ static void cherryview_load_luts(struct drm_crtc_state *state) { @@ -536,10 +611,13 @@ void intel_color_init(struct drm_crtc *crtc) } else if (IS_HASWELL(dev_priv)) { dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_luts = haswell_load_luts; - } else if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv) || - IS_BROXTON(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || + IS_BROXTON(dev_priv)) { dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_luts = broadwell_load_luts; + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; + dev_priv->display.load_luts = glk_load_luts; } else { dev_priv->display.load_luts = i9xx_load_luts; } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 2bf5aca6e37c..8c82607294c6 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -69,12 +69,11 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_encoder_to_crt(encoder); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -91,7 +90,7 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -676,7 +675,6 @@ intel_crt_detect(struct drm_connector *connector, bool force) struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; - enum intel_display_power_domain power_domain; enum drm_connector_status status; struct intel_load_detect_pipe tmp; struct drm_modeset_acquire_ctx ctx; @@ -689,8 +687,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) if (dmi_check_system(intel_spurious_crt_detect)) return connector_status_disconnected; - power_domain = intel_display_port_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_encoder->power_domain); if (I915_HAS_HOTPLUG(dev_priv)) { /* We can not rely on the HPD pin always being correctly wired @@ -745,7 +742,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) drm_modeset_acquire_fini(&ctx); out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return status; } @@ -761,12 +758,10 @@ static int intel_crt_get_modes(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; - enum intel_display_power_domain power_domain; int ret; struct i2c_adapter *i2c; - power_domain = intel_display_port_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_encoder->power_domain); i2c = intel_gmbus_get_adapter(dev_priv, dev_priv->vbt.crt_ddc_pin); ret = intel_crt_ddc_get_modes(connector, i2c); @@ -778,7 +773,7 @@ static int intel_crt_get_modes(struct drm_connector *connector) ret = intel_crt_ddc_get_modes(connector, i2c); out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return ret; } @@ -904,6 +899,8 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->adpa_reg = adpa_reg; + crt->base.power_domain = POWER_DOMAIN_PORT_CRT; + crt->base.compute_config = intel_crt_compute_config; if (HAS_PCH_SPLIT(dev_priv)) { crt->base.disable = pch_disable_crt; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 0085bc745f6a..14659c7e2858 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -34,9 +34,9 @@ * low-power state and comes back to normal. */ -#define I915_CSR_GLK "i915/glk_dmc_ver1_01.bin" +#define I915_CSR_GLK "i915/glk_dmc_ver1_03.bin" MODULE_FIRMWARE(I915_CSR_GLK); -#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) +#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 3) #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" MODULE_FIRMWARE(I915_CSR_KBL); @@ -396,13 +396,11 @@ static void csr_load_work_fn(struct work_struct *work) struct drm_i915_private *dev_priv; struct intel_csr *csr; const struct firmware *fw = NULL; - int ret; dev_priv = container_of(work, typeof(*dev_priv), csr.work); csr = &dev_priv->csr; - ret = request_firmware(&fw, dev_priv->csr.fw_path, - &dev_priv->drm.pdev->dev); + request_firmware(&fw, dev_priv->csr.fw_path, &dev_priv->drm.pdev->dev); if (fw) dev_priv->csr.dmc_payload = parse_csr_fw(dev_priv, fw); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 66b367d0771a..04676760e6fd 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -34,6 +34,19 @@ struct ddi_buf_trans { u8 i_boost; /* SKL: I_boost; valid: 0x0, 0x1, 0x3, 0x7 */ }; +static const u8 index_to_dp_signal_levels[] = { + [0] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [1] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [2] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_2, + [3] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_3, + [4] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [5] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [6] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_2, + [7] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [8] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [9] = DP_TRAIN_VOLTAGE_SWING_LEVEL_3 | DP_TRAIN_PRE_EMPH_LEVEL_0, +}; + /* HDMI/DVI modes ignore everything but the last 2 items. So we share * them for both DP and FDI transports, allowing those ports to * automatically adapt to HDMI connections as well @@ -445,7 +458,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por if (IS_GEN9_LP(dev_priv)) return hdmi_level; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); hdmi_default_entry = 8; } else if (IS_BROADWELL(dev_priv)) { @@ -468,6 +481,59 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por return hdmi_level; } +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_KABYLAKE(dev_priv)) { + return kbl_get_buf_trans_dp(dev_priv, n_entries); + } else if (IS_SKYLAKE(dev_priv)) { + return skl_get_buf_trans_dp(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + return bdw_ddi_translations_dp; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_edp(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv)) { + return skl_get_buf_trans_edp(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + return bdw_get_buf_trans_edp(dev_priv, n_entries); + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); + return hsw_ddi_translations_fdi; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); + return hsw_ddi_translations_fdi; + } + + *n_entries = 0; + return NULL; +} + /* * Starting with Haswell, DDI port buffers must be programmed with correct * values in advance. This function programs the correct values for @@ -477,76 +543,43 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int i, n_dp_entries, n_edp_entries, size; + int i, n_entries; enum port port = intel_ddi_get_encoder_port(encoder); - const struct ddi_buf_trans *ddi_translations_fdi; - const struct ddi_buf_trans *ddi_translations_dp; - const struct ddi_buf_trans *ddi_translations_edp; const struct ddi_buf_trans *ddi_translations; if (IS_GEN9_LP(dev_priv)) return; - if (IS_KABYLAKE(dev_priv)) { - ddi_translations_fdi = NULL; - ddi_translations_dp = - kbl_get_buf_trans_dp(dev_priv, &n_dp_entries); - ddi_translations_edp = - skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - } else if (IS_SKYLAKE(dev_priv)) { - ddi_translations_fdi = NULL; - ddi_translations_dp = - skl_get_buf_trans_dp(dev_priv, &n_dp_entries); - ddi_translations_edp = - skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - } else if (IS_BROADWELL(dev_priv)) { - ddi_translations_fdi = bdw_ddi_translations_fdi; - ddi_translations_dp = bdw_ddi_translations_dp; - ddi_translations_edp = bdw_get_buf_trans_edp(dev_priv, &n_edp_entries); - n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - } else if (IS_HASWELL(dev_priv)) { - ddi_translations_fdi = hsw_ddi_translations_fdi; - ddi_translations_dp = hsw_ddi_translations_dp; - ddi_translations_edp = hsw_ddi_translations_dp; - n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - } else { - WARN(1, "ddi translation table missing\n"); - ddi_translations_edp = bdw_ddi_translations_dp; - ddi_translations_fdi = bdw_ddi_translations_fdi; - ddi_translations_dp = bdw_ddi_translations_dp; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); - n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - } - - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) - iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; - - if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && - port != PORT_A && port != PORT_E && - n_edp_entries > 9)) - n_edp_entries = 9; - } - switch (encoder->type) { case INTEL_OUTPUT_EDP: - ddi_translations = ddi_translations_edp; - size = n_edp_entries; + ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, + &n_entries); break; case INTEL_OUTPUT_DP: - ddi_translations = ddi_translations_dp; - size = n_dp_entries; + ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, + &n_entries); break; case INTEL_OUTPUT_ANALOG: - ddi_translations = ddi_translations_fdi; - size = n_dp_entries; + ddi_translations = intel_ddi_get_buf_trans_fdi(dev_priv, + &n_entries); break; default: - BUG(); + MISSING_CASE(encoder->type); + return; + } + + if (IS_GEN9_BC(dev_priv)) { + /* If we're boosting the current, set bit 31 of trans1 */ + if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; + + if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && + port != PORT_A && port != PORT_E && + n_entries > 9)) + n_entries = 9; } - for (i = 0; i < size; i++) { + for (i = 0; i < n_entries; i++) { I915_WRITE(DDI_BUF_TRANS_LO(port, i), ddi_translations[i].trans1 | iboost_bit); I915_WRITE(DDI_BUF_TRANS_HI(port, i), @@ -572,7 +605,7 @@ static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder) hdmi_level = intel_ddi_hdmi_level(dev_priv, port); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); /* If we're boosting the current, set bit 31 of trans1 */ @@ -641,15 +674,15 @@ static uint32_t hsw_pll_to_ddi_pll_sel(struct intel_shared_dpll *pll) * DDI A (which is used for eDP) */ -void hsw_fdi_link_train(struct drm_crtc *crtc) +void hsw_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; u32 temp, i, rx_ctl_val, ddi_pll_sel; - for_each_encoder_on_crtc(dev, crtc, encoder) { + for_each_encoder_on_crtc(dev, &crtc->base, encoder) { WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG); intel_prepare_dp_ddi_buffers(encoder); } @@ -668,7 +701,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) /* Enable the PCH Receiver FDI PLL */ rx_ctl_val = dev_priv->fdi_rx_config | FDI_RX_ENHANCE_FRAME_ENABLE | FDI_RX_PLL_ENABLE | - FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); POSTING_READ(FDI_RX_CTL(PIPE_A)); udelay(220); @@ -678,7 +711,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); /* Configure Port Clock Select */ - ddi_pll_sel = hsw_pll_to_ddi_pll_sel(intel_crtc->config->shared_dpll); + ddi_pll_sel = hsw_pll_to_ddi_pll_sel(crtc_state->shared_dpll); I915_WRITE(PORT_CLK_SEL(PORT_E), ddi_pll_sel); WARN_ON(ddi_pll_sel != PORT_CLK_SEL_SPLL); @@ -698,7 +731,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) * port reversal bit */ I915_WRITE(DDI_BUF_CTL(PORT_E), DDI_BUF_CTL_ENABLE | - ((intel_crtc->config->fdi_lanes - 1) << 1) | + ((crtc_state->fdi_lanes - 1) << 1) | DDI_BUF_TRANS_SELECT(i / 2)); POSTING_READ(DDI_BUF_CTL(PORT_E)); @@ -785,21 +818,20 @@ void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) } static struct intel_encoder * -intel_ddi_get_crtc_encoder(struct drm_crtc *crtc) +intel_ddi_get_crtc_encoder(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_device *dev = crtc->base.dev; struct intel_encoder *intel_encoder, *ret = NULL; int num_encoders = 0; - for_each_encoder_on_crtc(dev, crtc, intel_encoder) { + for_each_encoder_on_crtc(dev, &crtc->base, intel_encoder) { ret = intel_encoder; num_encoders++; } if (num_encoders != 1) WARN(1, "%d encoders on crtc for pipe %c\n", num_encoders, - pipe_name(intel_crtc->pipe)); + pipe_name(crtc->pipe)); BUG_ON(ret == NULL); return ret; @@ -1089,7 +1121,7 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, if (INTEL_GEN(dev_priv) <= 8) hsw_ddi_clock_get(encoder, pipe_config); - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) skl_ddi_clock_get(encoder, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_clock_get(encoder, pipe_config); @@ -1150,7 +1182,7 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, struct intel_encoder *intel_encoder = intel_ddi_get_crtc_new_encoder(crtc_state); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) return skl_ddi_pll_select(intel_crtc, crtc_state, intel_encoder); else if (IS_GEN9_LP(dev_priv)) @@ -1161,12 +1193,12 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, intel_encoder); } -void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) +void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; int type = intel_encoder->type; uint32_t temp; @@ -1174,7 +1206,7 @@ void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) WARN_ON(transcoder_is_dsi(cpu_transcoder)); temp = TRANS_MSA_SYNC_CLK; - switch (intel_crtc->config->pipe_bpp) { + switch (crtc_state->pipe_bpp) { case 18: temp |= TRANS_MSA_6_BPC; break; @@ -1194,12 +1226,12 @@ void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) } } -void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state) +void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, + bool state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; uint32_t temp; temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (state == true) @@ -1209,14 +1241,13 @@ void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state) I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); } -void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) +void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe = intel_crtc->pipe; - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; enum port port = intel_ddi_get_encoder_port(intel_encoder); int type = intel_encoder->type; uint32_t temp; @@ -1225,7 +1256,7 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) temp = TRANS_DDI_FUNC_ENABLE; temp |= TRANS_DDI_SELECT_PORT(port); - switch (intel_crtc->config->pipe_bpp) { + switch (crtc_state->pipe_bpp) { case 18: temp |= TRANS_DDI_BPC_6; break; @@ -1242,9 +1273,9 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) BUG(); } - if (intel_crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) + if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) temp |= TRANS_DDI_PVSYNC; - if (intel_crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) + if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) temp |= TRANS_DDI_PHSYNC; if (cpu_transcoder == TRANSCODER_EDP) { @@ -1255,8 +1286,8 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) * using motion blur mitigation (which we don't * support). */ if (IS_HASWELL(dev_priv) && - (intel_crtc->config->pch_pfit.enabled || - intel_crtc->config->pch_pfit.force_thru)) + (crtc_state->pch_pfit.enabled || + crtc_state->pch_pfit.force_thru)) temp |= TRANS_DDI_EDP_INPUT_A_ONOFF; else temp |= TRANS_DDI_EDP_INPUT_A_ON; @@ -1274,20 +1305,20 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) } if (type == INTEL_OUTPUT_HDMI) { - if (intel_crtc->config->has_hdmi_sink) + if (crtc_state->has_hdmi_sink) temp |= TRANS_DDI_MODE_SELECT_HDMI; else temp |= TRANS_DDI_MODE_SELECT_DVI; } else if (type == INTEL_OUTPUT_ANALOG) { temp |= TRANS_DDI_MODE_SELECT_FDI; - temp |= (intel_crtc->config->fdi_lanes - 1) << 1; + temp |= (crtc_state->fdi_lanes - 1) << 1; } else if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { temp |= TRANS_DDI_MODE_SELECT_DP_SST; - temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); + temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } else if (type == INTEL_OUTPUT_DP_MST) { temp |= TRANS_DDI_MODE_SELECT_DP_MST; - temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); + temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } else { WARN(1, "Invalid encoder type %d for pipe %c\n", intel_encoder->type, pipe_name(pipe)); @@ -1316,12 +1347,11 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) enum port port = intel_ddi_get_encoder_port(intel_encoder); enum pipe pipe = 0; enum transcoder cpu_transcoder; - enum intel_display_power_domain power_domain; uint32_t tmp; bool ret; - power_domain = intel_display_port_power_domain(intel_encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + intel_encoder->power_domain)) return false; if (!intel_encoder->get_hw_state(intel_encoder, &pipe)) { @@ -1363,7 +1393,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) } out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return ret; } @@ -1374,13 +1404,12 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum port port = intel_ddi_get_encoder_port(encoder); - enum intel_display_power_domain power_domain; u32 tmp; int i; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -1437,29 +1466,39 @@ out: "(PHY_CTL %08x)\n", port_name(port), tmp); } - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } -void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc) +static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder) { - struct drm_crtc *crtc = &intel_crtc->base; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + enum pipe pipe; + + if (intel_ddi_get_hw_state(encoder, &pipe)) + return BIT_ULL(dig_port->ddi_io_power_domain); + + return 0; +} + +void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); enum port port = intel_ddi_get_encoder_port(intel_encoder); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (cpu_transcoder != TRANSCODER_EDP) I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), TRANS_CLK_SEL_PORT(port)); } -void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc) +void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (cpu_transcoder != TRANSCODER_EDP) I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), @@ -1582,50 +1621,38 @@ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, ddi_translations[level].deemphasis); } -static uint32_t translate_signal_level(int signal_levels) +u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) { - uint32_t level; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + int n_entries; - switch (signal_levels) { - default: - DRM_DEBUG_KMS("Unsupported voltage swing/pre-emphasis level: 0x%x\n", - signal_levels); - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 0; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 1; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_2: - level = 2; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_3: - level = 3; - break; + if (encoder->type == INTEL_OUTPUT_EDP) + intel_ddi_get_buf_trans_edp(dev_priv, &n_entries); + else + intel_ddi_get_buf_trans_dp(dev_priv, &n_entries); - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 4; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 5; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_2: - level = 6; - break; + if (WARN_ON(n_entries < 1)) + n_entries = 1; + if (WARN_ON(n_entries > ARRAY_SIZE(index_to_dp_signal_levels))) + n_entries = ARRAY_SIZE(index_to_dp_signal_levels); - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 7; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 8; - break; + return index_to_dp_signal_levels[n_entries - 1] & + DP_TRAIN_VOLTAGE_SWING_MASK; +} - case DP_TRAIN_VOLTAGE_SWING_LEVEL_3 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 9; - break; +static uint32_t translate_signal_level(int signal_levels) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(index_to_dp_signal_levels); i++) { + if (index_to_dp_signal_levels[i] == signal_levels) + return i; } - return level; + WARN(1, "Unsupported voltage swing/pre-emphasis level: 0x%x\n", + signal_levels); + + return 0; } uint32_t ddi_signal_levels(struct intel_dp *intel_dp) @@ -1641,7 +1668,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) level = translate_signal_level(signal_levels); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); @@ -1658,7 +1685,7 @@ void intel_ddi_clk_select(struct intel_encoder *encoder, if (WARN_ON(!pll)) return; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { uint32_t val; /* DDI -> PLL mapping */ @@ -1684,6 +1711,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + + WARN_ON(link_mst && (port == PORT_A || port == PORT_E)); intel_dp_set_link_params(intel_dp, link_rate, lane_count, link_mst); @@ -1691,6 +1721,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_edp_panel_on(intel_dp); intel_ddi_clk_select(encoder, pll); + + intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); + intel_prepare_dp_ddi_buffers(encoder); intel_ddi_init_dp_buf_reg(encoder); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); @@ -1710,11 +1743,15 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, struct drm_encoder *drm_encoder = &encoder->base; enum port port = intel_ddi_get_encoder_port(encoder); int level = intel_ddi_hdmi_level(dev_priv, port); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); intel_ddi_clk_select(encoder, pll); + + intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); + intel_prepare_hdmi_ddi_buffers(encoder); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, @@ -1729,23 +1766,21 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - struct intel_crtc *crtc = to_intel_crtc(encoder->crtc); int type = intel_encoder->type; if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { intel_ddi_pre_enable_dp(intel_encoder, - crtc->config->port_clock, - crtc->config->lane_count, - crtc->config->shared_dpll, - intel_crtc_has_type(crtc->config, + pipe_config->port_clock, + pipe_config->lane_count, + pipe_config->shared_dpll, + intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)); } if (type == INTEL_OUTPUT_HDMI) { intel_ddi_pre_enable_hdmi(intel_encoder, pipe_config->has_hdmi_sink, pipe_config, conn_state, - crtc->config->shared_dpll); + pipe_config->shared_dpll); } } @@ -1756,6 +1791,7 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, struct drm_encoder *encoder = &intel_encoder->base; struct drm_i915_private *dev_priv = to_i915(encoder->dev); enum port port = intel_ddi_get_encoder_port(intel_encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); int type = intel_encoder->type; uint32_t val; bool wait = false; @@ -1784,7 +1820,10 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, intel_edp_panel_off(intel_dp); } - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (dig_port) + intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + + if (IS_GEN9_BC(dev_priv)) I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) | DPLL_CTRL2_DDI_CLK_OFF(port))); else if (INTEL_GEN(dev_priv) < 9) @@ -1835,8 +1874,6 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, struct drm_connector_state *conn_state) { struct drm_encoder *encoder = &intel_encoder->base; - struct drm_crtc *crtc = encoder->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(encoder->dev); enum port port = intel_ddi_get_encoder_port(intel_encoder); int type = intel_encoder->type; @@ -1863,10 +1900,8 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, intel_edp_drrs_enable(intel_dp, pipe_config); } - if (intel_crtc->config->has_audio) { - intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + if (pipe_config->has_audio) intel_audio_codec_enable(intel_encoder, pipe_config, conn_state); - } } static void intel_disable_ddi(struct intel_encoder *intel_encoder, @@ -1874,16 +1909,10 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder, struct drm_connector_state *old_conn_state) { struct drm_encoder *encoder = &intel_encoder->base; - struct drm_crtc *crtc = encoder->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int type = intel_encoder->type; - struct drm_device *dev = encoder->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - if (intel_crtc->config->has_audio) { + if (old_crtc_state->has_audio) intel_audio_codec_disable(intel_encoder); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); - } if (type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); @@ -1898,8 +1927,7 @@ static void bxt_ddi_pre_pll_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - uint8_t mask = intel_crtc->config->lane_lat_optim_mask; + uint8_t mask = pipe_config->lane_lat_optim_mask; bxt_ddi_phy_set_lane_optim_mask(encoder, mask); } @@ -2126,45 +2154,6 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) return connector; } -struct intel_shared_dpll * -intel_ddi_get_link_dpll(struct intel_dp *intel_dp, int clock) -{ - struct intel_connector *connector = intel_dp->attached_connector; - struct intel_encoder *encoder = connector->encoder; - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct intel_shared_dpll *pll = NULL; - struct intel_shared_dpll_state tmp_pll_state; - enum intel_dpll_id dpll_id; - - if (IS_GEN9_LP(dev_priv)) { - dpll_id = (enum intel_dpll_id)dig_port->port; - /* - * Select the required PLL. This works for platforms where - * there is no shared DPLL. - */ - pll = &dev_priv->shared_dplls[dpll_id]; - if (WARN_ON(pll->active_mask)) { - - DRM_ERROR("Shared DPLL in use. active_mask:%x\n", - pll->active_mask); - return NULL; - } - tmp_pll_state = pll->state; - if (!bxt_ddi_dp_set_dpll_hw_state(clock, - &pll->state.hw_state)) { - DRM_ERROR("Could not setup DPLL\n"); - pll->state = tmp_pll_state; - return NULL; - } - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - pll = skl_find_link_pll(dev_priv, clock); - } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - pll = hsw_ddi_dp_get_dpll(encoder, clock); - } - return pll; -} - void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) { struct intel_digital_port *intel_dig_port; @@ -2241,12 +2230,38 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->get_hw_state = intel_ddi_get_hw_state; intel_encoder->get_config = intel_ddi_get_config; intel_encoder->suspend = intel_dp_encoder_suspend; + intel_encoder->get_power_domains = intel_ddi_get_power_domains; intel_dig_port->port = port; intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); + switch (port) { + case PORT_A: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_A_IO; + break; + case PORT_B: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_B_IO; + break; + case PORT_C: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_C_IO; + break; + case PORT_D: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_D_IO; + break; + case PORT_E: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_E_IO; + break; + default: + MISSING_CASE(port); + } + /* * Bspec says that DDI_A_4_LANES is the only supported configuration * for Broxton. Yet some BIOS fail to set this bit on port A if eDP @@ -2265,6 +2280,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_dig_port->max_lanes = max_lanes; intel_encoder->type = INTEL_OUTPUT_UNKNOWN; + intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); intel_encoder->cloneable = 0; @@ -2274,14 +2290,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) goto err; intel_dig_port->hpd_pulse = intel_dp_hpd_pulse; - /* - * On BXT A0/A1, sw needs to activate DDIA HPD logic and - * interrupts to check the external panel connection. - */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) && port == PORT_B) - dev_priv->hotplug.irq_port[PORT_A] = intel_dig_port; - else - dev_priv->hotplug.irq_port[port] = intel_dig_port; + dev_priv->hotplug.irq_port[port] = intel_dig_port; } /* In theory we don't need the encoder->type check, but leave it just in diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index fcf81815daff..9fc6ab783008 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -56,6 +56,8 @@ static const char * const platform_names[] = { const char *intel_platform_name(enum intel_platform platform) { + BUILD_BUG_ON(ARRAY_SIZE(platform_names) != INTEL_MAX_PLATFORMS); + if (WARN_ON_ONCE(platform >= ARRAY_SIZE(platform_names) || platform_names[platform] == NULL)) return "<unknown>"; @@ -234,7 +236,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = BIT(ss_max) - 1; + sseu->subslice_mask = GENMASK(ss_max - 1, 0); sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> GEN8_F2_SS_DIS_SHIFT); @@ -410,10 +412,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->has_snoop = !info->has_llc; - /* Snooping is broken on BXT A stepping. */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - info->has_snoop = false; - DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask); DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask)); DRM_DEBUG_DRIVER("subslice total: %u\n", diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a2fece5e9fb3..7369ee31ad91 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -37,6 +37,7 @@ #include "intel_frontbuffer.h" #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "intel_dsi.h" #include "i915_trace.h" #include <drm/drm_atomic.h> @@ -96,10 +97,9 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, static void ironlake_pch_clock_get(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); -static int intel_framebuffer_init(struct drm_device *dev, - struct intel_framebuffer *ifb, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj); +static int intel_framebuffer_init(struct intel_framebuffer *ifb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc); static void intel_set_pipe_timings(struct intel_crtc *intel_crtc); static void intel_set_pipe_src_size(struct intel_crtc *intel_crtc); @@ -122,9 +122,6 @@ static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force); static void ironlake_pfit_enable(struct intel_crtc *crtc); static void intel_modeset_setup_hw_state(struct drm_device *dev); static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc); -static int ilk_max_pixel_rate(struct drm_atomic_state *state); -static int glk_calc_cdclk(int max_pixclk); -static int bxt_calc_cdclk(int max_pixclk); struct intel_limit { struct { @@ -138,7 +135,7 @@ struct intel_limit { }; /* returns HPLL frequency in kHz */ -static int valleyview_get_vco(struct drm_i915_private *dev_priv) +int vlv_get_hpll_vco(struct drm_i915_private *dev_priv) { int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; @@ -170,73 +167,16 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, return DIV_ROUND_CLOSEST(ref_freq << 1, divider + 1); } -static int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, - const char *name, u32 reg) +int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, + const char *name, u32 reg) { if (dev_priv->hpll_freq == 0) - dev_priv->hpll_freq = valleyview_get_vco(dev_priv); + dev_priv->hpll_freq = vlv_get_hpll_vco(dev_priv); return vlv_get_cck_clock(dev_priv, name, reg, dev_priv->hpll_freq); } -static int -intel_pch_rawclk(struct drm_i915_private *dev_priv) -{ - return (I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; -} - -static int -intel_vlv_hrawclk(struct drm_i915_private *dev_priv) -{ - /* RAWCLK_FREQ_VLV register updated from power well code */ - return vlv_get_cck_clock_hpll(dev_priv, "hrawclk", - CCK_DISPLAY_REF_CLOCK_CONTROL); -} - -static int -intel_g4x_hrawclk(struct drm_i915_private *dev_priv) -{ - uint32_t clkcfg; - - /* hrawclock is 1/4 the FSB frequency */ - clkcfg = I915_READ(CLKCFG); - switch (clkcfg & CLKCFG_FSB_MASK) { - case CLKCFG_FSB_400: - return 100000; - case CLKCFG_FSB_533: - return 133333; - case CLKCFG_FSB_667: - return 166667; - case CLKCFG_FSB_800: - return 200000; - case CLKCFG_FSB_1067: - return 266667; - case CLKCFG_FSB_1333: - return 333333; - /* these two are just a guess; one of them might be right */ - case CLKCFG_FSB_1600: - case CLKCFG_FSB_1600_ALT: - return 400000; - default: - return 133333; - } -} - -void intel_update_rawclk(struct drm_i915_private *dev_priv) -{ - if (HAS_PCH_SPLIT(dev_priv)) - dev_priv->rawclk_freq = intel_pch_rawclk(dev_priv); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - dev_priv->rawclk_freq = intel_vlv_hrawclk(dev_priv); - else if (IS_G4X(dev_priv) || IS_PINEVIEW(dev_priv)) - dev_priv->rawclk_freq = intel_g4x_hrawclk(dev_priv); - else - return; /* no rawclk on other platforms, or no need to know it */ - - DRM_DEBUG_DRIVER("rawclk rate: %d kHz\n", dev_priv->rawclk_freq); -} - static void intel_update_czclk(struct drm_i915_private *dev_priv) { if (!(IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))) @@ -2112,11 +2052,13 @@ static void intel_tile_dims(const struct drm_i915_private *dev_priv, } unsigned int -intel_fb_align_height(struct drm_device *dev, unsigned int height, - uint32_t pixel_format, uint64_t fb_modifier) +intel_fb_align_height(struct drm_i915_private *dev_priv, + unsigned int height, + uint32_t pixel_format, + uint64_t fb_modifier) { unsigned int cpp = drm_format_plane_cpp(pixel_format, 0); - unsigned int tile_height = intel_tile_height(to_i915(dev), fb_modifier, cpp); + unsigned int tile_height = intel_tile_height(dev_priv, fb_modifier, cpp); return ALIGN(height, tile_height); } @@ -2682,15 +2624,13 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; mutex_lock(&dev->struct_mutex); - obj = i915_gem_object_create_stolen_for_preallocated(dev_priv, base_aligned, base_aligned, size_aligned); - if (!obj) { - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); + if (!obj) return false; - } if (plane_config->tiling == I915_TILING_X) obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X; @@ -2702,20 +2642,17 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, mode_cmd.modifier[0] = fb->modifier; mode_cmd.flags = DRM_MODE_FB_MODIFIERS; - if (intel_framebuffer_init(dev, to_intel_framebuffer(fb), - &mode_cmd, obj)) { + if (intel_framebuffer_init(to_intel_framebuffer(fb), obj, &mode_cmd)) { DRM_DEBUG_KMS("intel fb init failed\n"); goto out_unref_obj; } - mutex_unlock(&dev->struct_mutex); DRM_DEBUG_KMS("initial plane fb obj %p\n", obj); return true; out_unref_obj: i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); return false; } @@ -2734,6 +2671,29 @@ update_state_fb(struct drm_plane *plane) } static void +intel_set_plane_visible(struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state, + bool visible) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + + plane_state->base.visible = visible; + + /* FIXME pre-g4x don't work like this */ + if (visible) { + crtc_state->base.plane_mask |= BIT(drm_plane_index(&plane->base)); + crtc_state->active_planes |= BIT(plane->id); + } else { + crtc_state->base.plane_mask &= ~BIT(drm_plane_index(&plane->base)); + crtc_state->active_planes &= ~BIT(plane->id); + } + + DRM_DEBUG_KMS("%s active planes 0x%x\n", + crtc_state->base.crtc->name, + crtc_state->active_planes); +} + +static void intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct intel_initial_plane_config *plane_config) { @@ -2790,9 +2750,11 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - plane_state->visible = false; - crtc_state->plane_mask &= ~(1 << drm_plane_index(primary)); + intel_set_plane_visible(to_intel_crtc_state(crtc_state), + to_intel_plane_state(plane_state), + false); intel_pre_disable_primary_noatomic(&intel_crtc->base); + trace_intel_disable_plane(primary, intel_crtc); intel_plane->disable_plane(primary, &intel_crtc->base); return; @@ -2831,7 +2793,11 @@ valid_fb: drm_framebuffer_reference(fb); primary->fb = primary->state->fb = fb; primary->crtc = primary->state->crtc = &intel_crtc->base; - intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary)); + + intel_set_plane_visible(to_intel_crtc_state(crtc_state), + to_intel_plane_state(plane_state), + true); + atomic_or(to_intel_plane(primary)->frontbuffer_bit, &obj->frontbuffer_bits); } @@ -3386,13 +3352,22 @@ static void skylake_update_primary_plane(struct drm_plane *plane, int dst_w = drm_rect_width(&plane_state->base.dst); int dst_h = drm_rect_height(&plane_state->base.dst); - plane_ctl = PLANE_CTL_ENABLE | - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE; + plane_ctl = PLANE_CTL_ENABLE; + + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE(PLANE_COLOR_CTL(pipe, plane_id), + PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PIPE_CSC_ENABLE | + PLANE_COLOR_PLANE_GAMMA_DISABLE); + } else { + plane_ctl |= + PLANE_CTL_PIPE_GAMMA_ENABLE | + PLANE_CTL_PIPE_CSC_ENABLE | + PLANE_CTL_PLANE_GAMMA_DISABLE; + } plane_ctl |= skl_plane_ctl_format(fb->format->format); plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; plane_ctl |= skl_plane_ctl_rotation(rotation); /* Sizes are 0 based */ @@ -3473,10 +3448,14 @@ static void intel_update_primary_planes(struct drm_device *dev) struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); - if (plane_state->base.visible) + if (plane_state->base.visible) { + trace_intel_update_plane(&plane->base, + to_intel_crtc(crtc)); + plane->update_plane(&plane->base, to_intel_crtc_state(crtc->state), plane_state); + } } } @@ -3505,7 +3484,8 @@ __intel_display_resume(struct drm_device *dev, } /* ignore any reset values/BIOS leftovers in the WM registers */ - to_intel_atomic_state(state)->skip_intermediate_wm = true; + if (!HAS_GMCH_DISPLAY(to_i915(dev))) + to_intel_atomic_state(state)->skip_intermediate_wm = true; ret = drm_atomic_helper_commit_duplicated_state(state, ctx); @@ -3701,12 +3681,11 @@ static void intel_update_pipe_config(struct intel_crtc *crtc, } } -static void intel_fdi_normal_train(struct drm_crtc *crtc) +static void intel_fdi_normal_train(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp; @@ -3744,12 +3723,12 @@ static void intel_fdi_normal_train(struct drm_crtc *crtc) } /* The FDI link training functions for ILK/Ibexpeak. */ -static void ironlake_fdi_link_train(struct drm_crtc *crtc) +static void ironlake_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp, tries; @@ -3770,7 +3749,7 @@ static void ironlake_fdi_link_train(struct drm_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; I915_WRITE(reg, temp | FDI_TX_ENABLE); @@ -3845,12 +3824,12 @@ static const int snb_b_fdi_train_param[] = { }; /* The FDI link training functions for SNB/Cougarpoint. */ -static void gen6_fdi_link_train(struct drm_crtc *crtc) +static void gen6_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, retry; @@ -3869,7 +3848,7 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; @@ -3978,12 +3957,12 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc) } /* Manual link training for Ivy Bridge A0 parts */ -static void ivb_manual_fdi_link_train(struct drm_crtc *crtc) +static void ivb_manual_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, j; @@ -4021,7 +4000,7 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); temp |= FDI_LINK_TRAIN_PATTERN_1_IVB; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; temp |= snb_b_fdi_train_param[j/2]; @@ -4308,10 +4287,10 @@ void lpt_disable_iclkip(struct drm_i915_private *dev_priv) } /* Program iCLKIP clock to the desired frequency */ -static void lpt_program_iclkip(struct drm_crtc *crtc) +static void lpt_program_iclkip(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - int clock = to_intel_crtc(crtc)->config->base.adjusted_mode.crtc_clock; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + int clock = crtc->config->base.adjusted_mode.crtc_clock; u32 divsel, phaseinc, auxdiv, phasedir = 0; u32 temp; @@ -4492,12 +4471,12 @@ static void ivybridge_update_fdi_bc_bifurcation(struct intel_crtc *intel_crtc) /* Return which DP Port should be selected for Transcoder DP control */ static enum port -intel_trans_dp_port_sel(struct drm_crtc *crtc) +intel_trans_dp_port_sel(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct intel_encoder *encoder; - for_each_encoder_on_crtc(dev, crtc, encoder) { + for_each_encoder_on_crtc(dev, &crtc->base, encoder) { if (encoder->type == INTEL_OUTPUT_DP || encoder->type == INTEL_OUTPUT_EDP) return enc_to_dig_port(&encoder->base)->port; @@ -4514,18 +4493,18 @@ intel_trans_dp_port_sel(struct drm_crtc *crtc) * - DP transcoding bits * - transcoder */ -static void ironlake_pch_enable(struct drm_crtc *crtc) +static void ironlake_pch_enable(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; u32 temp; assert_pch_transcoder_disabled(dev_priv, pipe); if (IS_IVYBRIDGE(dev_priv)) - ivybridge_update_fdi_bc_bifurcation(intel_crtc); + ivybridge_update_fdi_bc_bifurcation(crtc); /* Write the TU size bits before fdi link training, so that error * detection works. */ @@ -4533,7 +4512,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) I915_READ(PIPE_DATA_M1(pipe)) & TU_SIZE_MASK); /* For PCH output, training FDI link */ - dev_priv->display.fdi_link_train(crtc); + dev_priv->display.fdi_link_train(crtc, crtc_state); /* We need to program the right clock selection before writing the pixel * mutliplier into the DPLL. */ @@ -4543,7 +4522,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) temp = I915_READ(PCH_DPLL_SEL); temp |= TRANS_DPLL_ENABLE(pipe); sel = TRANS_DPLLB_SEL(pipe); - if (intel_crtc->config->shared_dpll == + if (crtc_state->shared_dpll == intel_get_shared_dpll_by_id(dev_priv, DPLL_ID_PCH_PLL_B)) temp |= sel; else @@ -4558,19 +4537,19 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) * Note that enable_shared_dpll tries to do the right thing, but * get_shared_dpll unconditionally resets the pll - we need that to have * the right LVDS enable sequence. */ - intel_enable_shared_dpll(intel_crtc); + intel_enable_shared_dpll(crtc); /* set transcoder timing, panel must allow it */ assert_panel_unlocked(dev_priv, pipe); - ironlake_pch_transcoder_set_timings(intel_crtc, pipe); + ironlake_pch_transcoder_set_timings(crtc, pipe); intel_fdi_normal_train(crtc); /* For PCH DP, enable TRANS_DP_CTL */ if (HAS_PCH_CPT(dev_priv) && - intel_crtc_has_dp_encoder(intel_crtc->config)) { + intel_crtc_has_dp_encoder(crtc_state)) { const struct drm_display_mode *adjusted_mode = - &intel_crtc->config->base.adjusted_mode; + &crtc_state->base.adjusted_mode; u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPECONF_BPC_MASK) >> 5; i915_reg_t reg = TRANS_DP_CTL(pipe); temp = I915_READ(reg); @@ -4605,19 +4584,18 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) ironlake_enable_pch_transcoder(dev_priv, pipe); } -static void lpt_pch_enable(struct drm_crtc *crtc) +static void lpt_pch_enable(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; assert_pch_transcoder_disabled(dev_priv, TRANSCODER_A); lpt_program_iclkip(crtc); /* Set transcoder timing. */ - ironlake_pch_transcoder_set_timings(intel_crtc, PIPE_A); + ironlake_pch_transcoder_set_timings(crtc, PIPE_A); lpt_enable_pch_transcoder(dev_priv, cpu_transcoder); } @@ -5026,8 +5004,6 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits); - crtc->wm.cxsr_allowed = true; - if (pipe_config->update_wm_post && pipe_config->base.active) intel_update_watermarks(crtc); @@ -5074,22 +5050,18 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state) intel_pre_disable_primary(&crtc->base); } - if (pipe_config->disable_cxsr && HAS_GMCH_DISPLAY(dev_priv)) { - crtc->wm.cxsr_allowed = false; - - /* - * Vblank time updates from the shadow to live plane control register - * are blocked if the memory self-refresh mode is active at that - * moment. So to make sure the plane gets truly disabled, disable - * first the self-refresh mode. The self-refresh enable bit in turn - * will be checked/applied by the HW only at the next frame start - * event which is after the vblank start event, so we need to have a - * wait-for-vblank between disabling the plane and the pipe. - */ - if (old_crtc_state->base.active && - intel_set_memory_cxsr(dev_priv, false)) - intel_wait_for_vblank(dev_priv, crtc->pipe); - } + /* + * Vblank time updates from the shadow to live plane control register + * are blocked if the memory self-refresh mode is active at that + * moment. So to make sure the plane gets truly disabled, disable + * first the self-refresh mode. The self-refresh enable bit in turn + * will be checked/applied by the HW only at the next frame start + * event which is after the vblank start event, so we need to have a + * wait-for-vblank between disabling the plane and the pipe. + */ + if (HAS_GMCH_DISPLAY(dev_priv) && old_crtc_state->base.active && + pipe_config->disable_cxsr && intel_set_memory_cxsr(dev_priv, false)) + intel_wait_for_vblank(dev_priv, crtc->pipe); /* * IVB workaround: must disable low power watermarks for at least @@ -5344,7 +5316,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) - ironlake_pch_enable(crtc); + ironlake_pch_enable(pipe_config); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5426,10 +5398,10 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_pre_enable(crtc, pipe_config, old_state); if (intel_crtc->config->has_pch_encoder) - dev_priv->display.fdi_link_train(crtc); + dev_priv->display.fdi_link_train(intel_crtc, pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_enable_pipe_clock(intel_crtc); + intel_ddi_enable_pipe_clock(pipe_config); if (INTEL_GEN(dev_priv) >= 9) skylake_pfit_enable(intel_crtc); @@ -5442,9 +5414,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, */ intel_color_load_luts(&pipe_config->base); - intel_ddi_set_pipe_settings(crtc); + intel_ddi_set_pipe_settings(pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_enable_transcoder_func(crtc); + intel_ddi_enable_transcoder_func(pipe_config); if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, pipe_config); @@ -5454,10 +5426,10 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) - lpt_pch_enable(crtc); + lpt_pch_enable(pipe_config); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(crtc, true); + intel_ddi_set_vc_payload_alloc(pipe_config, true); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5579,7 +5551,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_disable_pipe(intel_crtc); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(crtc, false); + intel_ddi_set_vc_payload_alloc(intel_crtc->config, false); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder); @@ -5590,7 +5562,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, ironlake_pfit_disable(intel_crtc, false); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_disable_pipe_clock(intel_crtc); + intel_ddi_disable_pipe_clock(intel_crtc->config); intel_encoders_post_disable(crtc, old_crtc_state, old_state); @@ -5623,7 +5595,7 @@ static void i9xx_pfit_enable(struct intel_crtc *crtc) I915_WRITE(BCLRPAT(crtc->pipe), 0); } -static enum intel_display_power_domain port_to_power_domain(enum port port) +enum intel_display_power_domain intel_port_to_power_domain(enum port port) { switch (port) { case PORT_A: @@ -5642,91 +5614,15 @@ static enum intel_display_power_domain port_to_power_domain(enum port port) } } -static enum intel_display_power_domain port_to_aux_power_domain(enum port port) -{ - switch (port) { - case PORT_A: - return POWER_DOMAIN_AUX_A; - case PORT_B: - return POWER_DOMAIN_AUX_B; - case PORT_C: - return POWER_DOMAIN_AUX_C; - case PORT_D: - return POWER_DOMAIN_AUX_D; - case PORT_E: - /* FIXME: Check VBT for actual wiring of PORT E */ - return POWER_DOMAIN_AUX_D; - default: - MISSING_CASE(port); - return POWER_DOMAIN_AUX_A; - } -} - -enum intel_display_power_domain -intel_display_port_power_domain(struct intel_encoder *intel_encoder) -{ - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_digital_port *intel_dig_port; - - switch (intel_encoder->type) { - case INTEL_OUTPUT_UNKNOWN: - /* Only DDI platforms should ever use this output type */ - WARN_ON_ONCE(!HAS_DDI(dev_priv)); - case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_HDMI: - case INTEL_OUTPUT_EDP: - intel_dig_port = enc_to_dig_port(&intel_encoder->base); - return port_to_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_DP_MST: - intel_dig_port = enc_to_mst(&intel_encoder->base)->primary; - return port_to_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_ANALOG: - return POWER_DOMAIN_PORT_CRT; - case INTEL_OUTPUT_DSI: - return POWER_DOMAIN_PORT_DSI; - default: - return POWER_DOMAIN_PORT_OTHER; - } -} - -enum intel_display_power_domain -intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder) -{ - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_digital_port *intel_dig_port; - - switch (intel_encoder->type) { - case INTEL_OUTPUT_UNKNOWN: - case INTEL_OUTPUT_HDMI: - /* - * Only DDI platforms should ever use these output types. - * We can get here after the HDMI detect code has already set - * the type of the shared encoder. Since we can't be sure - * what's the status of the given connectors, play safe and - * run the DP detection too. - */ - WARN_ON_ONCE(!HAS_DDI(dev_priv)); - case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_EDP: - intel_dig_port = enc_to_dig_port(&intel_encoder->base); - return port_to_aux_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_DP_MST: - intel_dig_port = enc_to_mst(&intel_encoder->base)->primary; - return port_to_aux_power_domain(intel_dig_port->port); - default: - MISSING_CASE(intel_encoder->type); - return POWER_DOMAIN_AUX_A; - } -} - -static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, - struct intel_crtc_state *crtc_state) +static u64 get_crtc_power_domains(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_encoder *encoder; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; - unsigned long mask; + u64 mask; enum transcoder transcoder = crtc_state->cpu_transcoder; if (!crtc_state->base.active) @@ -5736,28 +5632,31 @@ static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, mask |= BIT(POWER_DOMAIN_TRANSCODER(transcoder)); if (crtc_state->pch_pfit.enabled || crtc_state->pch_pfit.force_thru) - mask |= BIT(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); + mask |= BIT_ULL(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); drm_for_each_encoder_mask(encoder, dev, crtc_state->base.encoder_mask) { struct intel_encoder *intel_encoder = to_intel_encoder(encoder); - mask |= BIT(intel_display_port_power_domain(intel_encoder)); + mask |= BIT_ULL(intel_encoder->power_domain); } + if (HAS_DDI(dev_priv) && crtc_state->has_audio) + mask |= BIT(POWER_DOMAIN_AUDIO); + if (crtc_state->shared_dpll) - mask |= BIT(POWER_DOMAIN_PLLS); + mask |= BIT_ULL(POWER_DOMAIN_PLLS); return mask; } -static unsigned long +static u64 modeset_get_crtc_power_domains(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum intel_display_power_domain domain; - unsigned long domains, new_domains, old_domains; + u64 domains, new_domains, old_domains; old_domains = intel_crtc->enabled_power_domains; intel_crtc->enabled_power_domains = new_domains = @@ -5772,7 +5671,7 @@ modeset_get_crtc_power_domains(struct drm_crtc *crtc, } static void modeset_put_power_domains(struct drm_i915_private *dev_priv, - unsigned long domains) + u64 domains) { enum intel_display_power_domain domain; @@ -5780,922 +5679,11 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv, intel_display_power_put(dev_priv, domain); } -static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) -{ - int max_cdclk_freq = dev_priv->max_cdclk_freq; - - if (IS_GEMINILAKE(dev_priv)) - return 2 * max_cdclk_freq; - else if (INTEL_INFO(dev_priv)->gen >= 9 || - IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - return max_cdclk_freq; - else if (IS_CHERRYVIEW(dev_priv)) - return max_cdclk_freq*95/100; - else if (INTEL_INFO(dev_priv)->gen < 4) - return 2*max_cdclk_freq*90/100; - else - return max_cdclk_freq*90/100; -} - -static int skl_calc_cdclk(int max_pixclk, int vco); - -static void intel_update_max_cdclk(struct drm_i915_private *dev_priv) -{ - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; - int max_cdclk, vco; - - vco = dev_priv->skl_preferred_vco_freq; - WARN_ON(vco != 8100000 && vco != 8640000); - - /* - * Use the lower (vco 8640) cdclk values as a - * first guess. skl_calc_cdclk() will correct it - * if the preferred vco is 8100 instead. - */ - if (limit == SKL_DFSM_CDCLK_LIMIT_675) - max_cdclk = 617143; - else if (limit == SKL_DFSM_CDCLK_LIMIT_540) - max_cdclk = 540000; - else if (limit == SKL_DFSM_CDCLK_LIMIT_450) - max_cdclk = 432000; - else - max_cdclk = 308571; - - dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco); - } else if (IS_GEMINILAKE(dev_priv)) { - dev_priv->max_cdclk_freq = 316800; - } else if (IS_BROXTON(dev_priv)) { - dev_priv->max_cdclk_freq = 624000; - } else if (IS_BROADWELL(dev_priv)) { - /* - * FIXME with extra cooling we can allow - * 540 MHz for ULX and 675 Mhz for ULT. - * How can we know if extra cooling is - * available? PCI ID, VTB, something else? - */ - if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - dev_priv->max_cdclk_freq = 450000; - else if (IS_BDW_ULX(dev_priv)) - dev_priv->max_cdclk_freq = 450000; - else if (IS_BDW_ULT(dev_priv)) - dev_priv->max_cdclk_freq = 540000; - else - dev_priv->max_cdclk_freq = 675000; - } else if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->max_cdclk_freq = 320000; - } else if (IS_VALLEYVIEW(dev_priv)) { - dev_priv->max_cdclk_freq = 400000; - } else { - /* otherwise assume cdclk is fixed */ - dev_priv->max_cdclk_freq = dev_priv->cdclk_freq; - } - - dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); - - DRM_DEBUG_DRIVER("Max CD clock rate: %d kHz\n", - dev_priv->max_cdclk_freq); - - DRM_DEBUG_DRIVER("Max dotclock rate: %d kHz\n", - dev_priv->max_dotclk_freq); -} - -static void intel_update_cdclk(struct drm_i915_private *dev_priv) -{ - dev_priv->cdclk_freq = dev_priv->display.get_display_clock_speed(dev_priv); - - if (INTEL_GEN(dev_priv) >= 9) - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", - dev_priv->cdclk_freq, dev_priv->cdclk_pll.vco, - dev_priv->cdclk_pll.ref); - else - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz\n", - dev_priv->cdclk_freq); - - /* - * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): - * Programmng [sic] note: bit[9:2] should be programmed to the number - * of cdclk that generates 4MHz reference clock freq which is used to - * generate GMBus clock. This will vary with the cdclk freq. - */ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - I915_WRITE(GMBUSFREQ_VLV, DIV_ROUND_UP(dev_priv->cdclk_freq, 1000)); -} - -/* convert from kHz to .1 fixpoint MHz with -1MHz offset */ -static int skl_cdclk_decimal(int cdclk) -{ - return DIV_ROUND_CLOSEST(cdclk - 1000, 500); -} - -static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk_pll.ref) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - case 144000: - case 288000: - case 384000: - case 576000: - ratio = 60; - break; - case 624000: - ratio = 65; - break; - } - - return dev_priv->cdclk_pll.ref * ratio; -} - -static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk_pll.ref) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - case 79200: - case 158400: - case 316800: - ratio = 33; - break; - } - - return dev_priv->cdclk_pll.ref * ratio; -} - -static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) -{ - I915_WRITE(BXT_DE_PLL_ENABLE, 0); - - /* Timeout 200us */ - if (intel_wait_for_register(dev_priv, - BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 0, - 1)) - DRM_ERROR("timeout waiting for DE PLL unlock\n"); - - dev_priv->cdclk_pll.vco = 0; -} - -static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) -{ - int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk_pll.ref); - u32 val; - - val = I915_READ(BXT_DE_PLL_CTL); - val &= ~BXT_DE_PLL_RATIO_MASK; - val |= BXT_DE_PLL_RATIO(ratio); - I915_WRITE(BXT_DE_PLL_CTL, val); - - I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); - - /* Timeout 200us */ - if (intel_wait_for_register(dev_priv, - BXT_DE_PLL_ENABLE, - BXT_DE_PLL_LOCK, - BXT_DE_PLL_LOCK, - 1)) - DRM_ERROR("timeout waiting for DE PLL lock\n"); - - dev_priv->cdclk_pll.vco = vco; -} - -static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) -{ - u32 val, divider; - int vco, ret; - - if (IS_GEMINILAKE(dev_priv)) - vco = glk_de_pll_vco(dev_priv, cdclk); - else - vco = bxt_de_pll_vco(dev_priv, cdclk); - - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); - - /* cdclk = vco / 2 / div{1,1.5,2,4} */ - switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - case 8: - divider = BXT_CDCLK_CD2X_DIV_SEL_4; - break; - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; - break; - case 3: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); - divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; - break; - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - default: - WARN_ON(cdclk != dev_priv->cdclk_pll.ref); - WARN_ON(vco != 0); - - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - } - - /* Inform power controller of upcoming frequency change */ - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - 0x80000000); - mutex_unlock(&dev_priv->rps.hw_lock); - - if (ret) { - DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", - ret, cdclk); - return; - } - - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) - bxt_de_pll_disable(dev_priv); - - if (dev_priv->cdclk_pll.vco != vco) - bxt_de_pll_enable(dev_priv, vco); - - val = divider | skl_cdclk_decimal(cdclk); - /* - * FIXME if only the cd2x divider needs changing, it could be done - * without shutting off the pipe (if only one pipe is active). - */ - val |= BXT_CDCLK_CD2X_PIPE_NONE; - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (cdclk >= 500000) - val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - I915_WRITE(CDCLK_CTL, val); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - DIV_ROUND_UP(cdclk, 25000)); - mutex_unlock(&dev_priv->rps.hw_lock); - - if (ret) { - DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", - ret, cdclk); - return; - } - - intel_update_cdclk(dev_priv); -} - -static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) -{ - u32 cdctl, expected; - - intel_update_cdclk(dev_priv); - - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Some BIOS versions leave an incorrect decimal frequency value and - * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, - * so sanitize this register. - */ - cdctl = I915_READ(CDCLK_CTL); - /* - * Let's ignore the pipe field, since BIOS could have configured the - * dividers both synching to an active pipe, or asynchronously - * (PIPE_NONE). - */ - cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; - - expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (dev_priv->cdclk_freq >= 500000) - expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk_freq = 0; - - /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; -} - -void bxt_init_cdclk(struct drm_i915_private *dev_priv) -{ - int cdclk; - - bxt_sanitize_cdclk(dev_priv); - - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) - return; - - /* - * FIXME: - * - The initial CDCLK needs to be read from VBT. - * Need to make this change after VBT has changes for BXT. - */ - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(0); - else - cdclk = bxt_calc_cdclk(0); - - bxt_set_cdclk(dev_priv, cdclk); -} - -void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref); -} - -static int skl_calc_cdclk(int max_pixclk, int vco) -{ - if (vco == 8640000) { - if (max_pixclk > 540000) - return 617143; - else if (max_pixclk > 432000) - return 540000; - else if (max_pixclk > 308571) - return 432000; - else - return 308571; - } else { - if (max_pixclk > 540000) - return 675000; - else if (max_pixclk > 450000) - return 540000; - else if (max_pixclk > 337500) - return 450000; - else - return 337500; - } -} - -static void -skl_dpll0_update(struct drm_i915_private *dev_priv) -{ - u32 val; - - dev_priv->cdclk_pll.ref = 24000; - dev_priv->cdclk_pll.vco = 0; - - val = I915_READ(LCPLL1_CTL); - if ((val & LCPLL_PLL_ENABLE) == 0) - return; - - if (WARN_ON((val & LCPLL_PLL_LOCK) == 0)) - return; - - val = I915_READ(DPLL_CTRL1); - - if (WARN_ON((val & (DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | - DPLL_CTRL1_SSC(SKL_DPLL0) | - DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) != - DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) - return; - - switch (val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)) { - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8100000; - break; - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8640000; - break; - default: - MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); - break; - } -} - -void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco) -{ - bool changed = dev_priv->skl_preferred_vco_freq != vco; - - dev_priv->skl_preferred_vco_freq = vco; - - if (changed) - intel_update_max_cdclk(dev_priv); -} - -static void -skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) -{ - int min_cdclk = skl_calc_cdclk(0, vco); - u32 val; - - WARN_ON(vco != 8100000 && vco != 8640000); - - /* select the minimum CDCLK before enabling DPLL 0 */ - val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); - I915_WRITE(CDCLK_CTL, val); - POSTING_READ(CDCLK_CTL); - - /* - * We always enable DPLL0 with the lowest link rate possible, but still - * taking into account the VCO required to operate the eDP panel at the - * desired frequency. The usual DP link rates operate with a VCO of - * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640. - * The modeset code is responsible for the selection of the exact link - * rate later on, with the constraint of choosing a frequency that - * works with vco. - */ - val = I915_READ(DPLL_CTRL1); - - val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | - DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); - val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); - if (vco == 8640000) - val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, - SKL_DPLL0); - else - val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, - SKL_DPLL0); - - I915_WRITE(DPLL_CTRL1, val); - POSTING_READ(DPLL_CTRL1); - - I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE); - - if (intel_wait_for_register(dev_priv, - LCPLL1_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, - 5)) - DRM_ERROR("DPLL0 not locked\n"); - - dev_priv->cdclk_pll.vco = vco; - - /* We'll want to keep using the current vco from now on. */ - skl_set_preferred_cdclk_vco(dev_priv, vco); -} - -static void -skl_dpll0_disable(struct drm_i915_private *dev_priv) -{ - I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); - if (intel_wait_for_register(dev_priv, - LCPLL1_CTL, LCPLL_PLL_LOCK, 0, - 1)) - DRM_ERROR("Couldn't disable DPLL0\n"); - - dev_priv->cdclk_pll.vco = 0; -} - -static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) -{ - u32 freq_select, pcu_ack; - int ret; - - WARN_ON((cdclk == 24000) != (vco == 0)); - - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); - if (ret) { - DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", - ret); - return; - } - - /* set CDCLK_CTL */ - switch (cdclk) { - case 450000: - case 432000: - freq_select = CDCLK_FREQ_450_432; - pcu_ack = 1; - break; - case 540000: - freq_select = CDCLK_FREQ_540; - pcu_ack = 2; - break; - case 308571: - case 337500: - default: - freq_select = CDCLK_FREQ_337_308; - pcu_ack = 0; - break; - case 617143: - case 675000: - freq_select = CDCLK_FREQ_675_617; - pcu_ack = 3; - break; - } - - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) - skl_dpll0_disable(dev_priv); - - if (dev_priv->cdclk_pll.vco != vco) - skl_dpll0_enable(dev_priv, vco); - - I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); - POSTING_READ(CDCLK_CTL); - - /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); - - intel_update_cdclk(dev_priv); -} - -static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv); - -void skl_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - skl_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); -} - -void skl_init_cdclk(struct drm_i915_private *dev_priv) -{ - int cdclk, vco; - - skl_sanitize_cdclk(dev_priv); - - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) { - /* - * Use the current vco as our initial - * guess as to what the preferred vco is. - */ - if (dev_priv->skl_preferred_vco_freq == 0) - skl_set_preferred_cdclk_vco(dev_priv, - dev_priv->cdclk_pll.vco); - return; - } - - vco = dev_priv->skl_preferred_vco_freq; - if (vco == 0) - vco = 8100000; - cdclk = skl_calc_cdclk(0, vco); - - skl_set_cdclk(dev_priv, cdclk, vco); -} - -static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) -{ - uint32_t cdctl, expected; - - /* - * check if the pre-os intialized the display - * There is SWF18 scratchpad register defined which is set by the - * pre-os which can be used by the OS drivers to check the status - */ - if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0) - goto sanitize; - - intel_update_cdclk(dev_priv); - /* Is PLL enabled and locked ? */ - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Noticed in some instances that the freq selection is correct but - * decimal part is programmed wrong from BIOS where pre-os does not - * enable display. Verify the same as well. - */ - cdctl = I915_READ(CDCLK_CTL); - expected = (cdctl & CDCLK_FREQ_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk_freq = 0; - /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; -} - -/* Adjust CDclk dividers to allow high res or save power if possible */ -static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, cmd; - - WARN_ON(dev_priv->display.get_display_clock_speed(dev_priv) - != dev_priv->cdclk_freq); - - if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ - cmd = 2; - else if (cdclk == 266667) - cmd = 1; - else - cmd = 0; - - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); - val &= ~DSPFREQGUAR_MASK; - val |= (cmd << DSPFREQGUAR_SHIFT); - vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & - DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), - 50)) { - DRM_ERROR("timed out waiting for CDclk change\n"); - } - mutex_unlock(&dev_priv->rps.hw_lock); - - mutex_lock(&dev_priv->sb_lock); - - if (cdclk == 400000) { - u32 divider; - - divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - - /* adjust cdclk divider */ - val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); - val &= ~CCK_FREQUENCY_VALUES; - val |= divider; - vlv_cck_write(dev_priv, CCK_DISPLAY_CLOCK_CONTROL, val); - - if (wait_for((vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL) & - CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), - 50)) - DRM_ERROR("timed out waiting for CDclk change\n"); - } - - /* adjust self-refresh exit latency value */ - val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC); - val &= ~0x7f; - - /* - * For high bandwidth configs, we set a higher latency in the bunit - * so that the core display fetch happens in time to avoid underruns. - */ - if (cdclk == 400000) - val |= 4500 / 250; /* 4.5 usec */ - else - val |= 3000 / 250; /* 3.0 usec */ - vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); - - mutex_unlock(&dev_priv->sb_lock); - - intel_update_cdclk(dev_priv); -} - -static void cherryview_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, cmd; - - WARN_ON(dev_priv->display.get_display_clock_speed(dev_priv) - != dev_priv->cdclk_freq); - - switch (cdclk) { - case 333333: - case 320000: - case 266667: - case 200000: - break; - default: - MISSING_CASE(cdclk); - return; - } - - /* - * Specs are full of misinformation, but testing on actual - * hardware has shown that we just need to write the desired - * CCK divider into the Punit register. - */ - cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); - val &= ~DSPFREQGUAR_MASK_CHV; - val |= (cmd << DSPFREQGUAR_SHIFT_CHV); - vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & - DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), - 50)) { - DRM_ERROR("timed out waiting for CDclk change\n"); - } - mutex_unlock(&dev_priv->rps.hw_lock); - - intel_update_cdclk(dev_priv); -} - -static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv, - int max_pixclk) -{ - int freq_320 = (dev_priv->hpll_freq << 1) % 320000 != 0 ? 333333 : 320000; - int limit = IS_CHERRYVIEW(dev_priv) ? 95 : 90; - - /* - * Really only a few cases to deal with, as only 4 CDclks are supported: - * 200MHz - * 267MHz - * 320/333MHz (depends on HPLL freq) - * 400MHz (VLV only) - * So we check to see whether we're above 90% (VLV) or 95% (CHV) - * of the lower bin and adjust if needed. - * - * We seem to get an unstable or solid color picture at 200MHz. - * Not sure what's wrong. For now use 200MHz only when all pipes - * are off. - */ - if (!IS_CHERRYVIEW(dev_priv) && - max_pixclk > freq_320*limit/100) - return 400000; - else if (max_pixclk > 266667*limit/100) - return freq_320; - else if (max_pixclk > 0) - return 266667; - else - return 200000; -} - -static int glk_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 2 * 158400) - return 316800; - else if (max_pixclk > 2 * 79200) - return 158400; - else - return 79200; -} - -static int bxt_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 576000) - return 624000; - else if (max_pixclk > 384000) - return 576000; - else if (max_pixclk > 288000) - return 384000; - else if (max_pixclk > 144000) - return 288000; - else - return 144000; -} - -/* Compute the max pixel clock for new configuration. */ -static int intel_mode_max_pixclk(struct drm_device *dev, - struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; - unsigned max_pixclk = 0, i; - enum pipe pipe; - - memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, - sizeof(intel_state->min_pixclk)); - - for_each_crtc_in_state(state, crtc, crtc_state, i) { - int pixclk = 0; - - if (crtc_state->enable) - pixclk = crtc_state->adjusted_mode.crtc_clock; - - intel_state->min_pixclk[i] = pixclk; - } - - for_each_pipe(dev_priv, pipe) - max_pixclk = max(intel_state->min_pixclk[pipe], max_pixclk); - - return max_pixclk; -} - -static int valleyview_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_device *dev = state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - int max_pixclk = intel_mode_max_pixclk(dev, state); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - - intel_state->cdclk = intel_state->dev_cdclk = - valleyview_calc_cdclk(dev_priv, max_pixclk); - - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = valleyview_calc_cdclk(dev_priv, 0); - - return 0; -} - -static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->dev); - int max_pixclk = ilk_max_pixel_rate(state); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - int cdclk; - - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(max_pixclk); - else - cdclk = bxt_calc_cdclk(max_pixclk); - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - - if (!intel_state->active_crtcs) { - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(0); - else - cdclk = bxt_calc_cdclk(0); - - intel_state->dev_cdclk = cdclk; - } - - return 0; -} - -static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) -{ - unsigned int credits, default_credits; - - if (IS_CHERRYVIEW(dev_priv)) - default_credits = PFI_CREDIT(12); - else - default_credits = PFI_CREDIT(8); - - if (dev_priv->cdclk_freq >= dev_priv->czclk_freq) { - /* CHV suggested value is 31 or 63 */ - if (IS_CHERRYVIEW(dev_priv)) - credits = PFI_CREDIT_63; - else - credits = PFI_CREDIT(15); - } else { - credits = default_credits; - } - - /* - * WA - write default credits before re-programming - * FIXME: should we also set the resend bit here? - */ - I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | - default_credits); - - I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | - credits | PFI_CREDIT_RESEND); - - /* - * FIXME is this guaranteed to clear - * immediately or should we poll for it? - */ - WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); -} - -static void valleyview_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned req_cdclk = old_intel_state->dev_cdclk; - - /* - * FIXME: We can end up here with all power domains off, yet - * with a CDCLK frequency other than the minimum. To account - * for this take the PIPE-A power domain, which covers the HW - * blocks needed for the following programming. This can be - * removed once it's guaranteed that we get here either with - * the minimum CDCLK set, or the required power domains - * enabled. - */ - intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - - if (IS_CHERRYVIEW(dev_priv)) - cherryview_set_cdclk(dev, req_cdclk); - else - valleyview_set_cdclk(dev, req_cdclk); - - vlv_program_pfi_credits(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); -} - static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_atomic_state *old_state) { + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -6740,7 +5728,8 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, intel_color_load_luts(&pipe_config->base); - intel_update_watermarks(intel_crtc); + dev_priv->display.initial_watermarks(old_intel_state, + pipe_config); intel_enable_pipe(intel_crtc); assert_vblank_disabled(crtc); @@ -6857,6 +5846,9 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, if (!IS_GEN2(dev_priv)) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); + + if (!dev_priv->display.initial_watermarks) + intel_update_watermarks(intel_crtc); } static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) @@ -6865,7 +5857,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum intel_display_power_domain domain; - unsigned long domains; + u64 domains; struct drm_atomic_state *state; struct intel_crtc_state *crtc_state; int ret; @@ -7173,7 +6165,7 @@ static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv, * * Should measure whether using a lower cdclk w/o IPS */ - return ilk_pipe_pixel_rate(pipe_config) <= + return pipe_config->pixel_rate <= dev_priv->max_cdclk_freq * 95 / 100; } @@ -7197,6 +6189,54 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } +static uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) +{ + uint32_t pixel_rate; + + pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; + + /* + * We only use IF-ID interlacing. If we ever use + * PF-ID we'll need to adjust the pixel_rate here. + */ + + if (pipe_config->pch_pfit.enabled) { + uint64_t pipe_w, pipe_h, pfit_w, pfit_h; + uint32_t pfit_size = pipe_config->pch_pfit.size; + + pipe_w = pipe_config->pipe_src_w; + pipe_h = pipe_config->pipe_src_h; + + pfit_w = (pfit_size >> 16) & 0xFFFF; + pfit_h = pfit_size & 0xFFFF; + if (pipe_w < pfit_w) + pipe_w = pfit_w; + if (pipe_h < pfit_h) + pipe_h = pfit_h; + + if (WARN_ON(!pfit_w || !pfit_h)) + return pixel_rate; + + pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, + pfit_w * pfit_h); + } + + return pixel_rate; +} + +static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + if (HAS_GMCH_DISPLAY(dev_priv)) + /* FIXME calculate proper pipe pixel rate for GMCH pfit */ + crtc_state->pixel_rate = + crtc_state->base.adjusted_mode.crtc_clock; + else + crtc_state->pixel_rate = + ilk_pipe_pixel_rate(crtc_state); +} + static int intel_crtc_compute_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { @@ -7243,6 +6283,8 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, adjusted_mode->crtc_hsync_start == adjusted_mode->crtc_hdisplay) return -EINVAL; + intel_crtc_compute_pixel_rate(pipe_config); + if (HAS_IPS(dev_priv)) hsw_compute_ips_config(crtc, pipe_config); @@ -7252,428 +6294,6 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, return 0; } -static int skylake_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - u32 cdctl; - - skl_dpll0_update(dev_priv); - - if (dev_priv->cdclk_pll.vco == 0) - return dev_priv->cdclk_pll.ref; - - cdctl = I915_READ(CDCLK_CTL); - - if (dev_priv->cdclk_pll.vco == 8640000) { - switch (cdctl & CDCLK_FREQ_SEL_MASK) { - case CDCLK_FREQ_450_432: - return 432000; - case CDCLK_FREQ_337_308: - return 308571; - case CDCLK_FREQ_540: - return 540000; - case CDCLK_FREQ_675_617: - return 617143; - default: - MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); - } - } else { - switch (cdctl & CDCLK_FREQ_SEL_MASK) { - case CDCLK_FREQ_450_432: - return 450000; - case CDCLK_FREQ_337_308: - return 337500; - case CDCLK_FREQ_540: - return 540000; - case CDCLK_FREQ_675_617: - return 675000; - default: - MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); - } - } - - return dev_priv->cdclk_pll.ref; -} - -static void bxt_de_pll_update(struct drm_i915_private *dev_priv) -{ - u32 val; - - dev_priv->cdclk_pll.ref = 19200; - dev_priv->cdclk_pll.vco = 0; - - val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) - return; - - if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) - return; - - val = I915_READ(BXT_DE_PLL_CTL); - dev_priv->cdclk_pll.vco = (val & BXT_DE_PLL_RATIO_MASK) * - dev_priv->cdclk_pll.ref; -} - -static int broxton_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - u32 divider; - int div, vco; - - bxt_de_pll_update(dev_priv); - - vco = dev_priv->cdclk_pll.vco; - if (vco == 0) - return dev_priv->cdclk_pll.ref; - - divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; - - switch (divider) { - case BXT_CDCLK_CD2X_DIV_SEL_1: - div = 2; - break; - case BXT_CDCLK_CD2X_DIV_SEL_1_5: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); - div = 3; - break; - case BXT_CDCLK_CD2X_DIV_SEL_2: - div = 4; - break; - case BXT_CDCLK_CD2X_DIV_SEL_4: - div = 8; - break; - default: - MISSING_CASE(divider); - return dev_priv->cdclk_pll.ref; - } - - return DIV_ROUND_CLOSEST(vco, div); -} - -static int broadwell_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - - if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; - else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; - else if (freq == LCPLL_CLK_FREQ_450) - return 450000; - else if (freq == LCPLL_CLK_FREQ_54O_BDW) - return 540000; - else if (freq == LCPLL_CLK_FREQ_337_5_BDW) - return 337500; - else - return 675000; -} - -static int haswell_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - - if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; - else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; - else if (freq == LCPLL_CLK_FREQ_450) - return 450000; - else if (IS_HSW_ULT(dev_priv)) - return 337500; - else - return 540000; -} - -static int valleyview_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return vlv_get_cck_clock_hpll(dev_priv, "cdclk", - CCK_DISPLAY_CLOCK_CONTROL); -} - -static int ilk_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 450000; -} - -static int i945_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 400000; -} - -static int i915_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 333333; -} - -static int i9xx_misc_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 200000; -} - -static int pnv_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 gcfgc = 0; - - pci_read_config_word(pdev, GCFGC, &gcfgc); - - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_267_MHZ_PNV: - return 266667; - case GC_DISPLAY_CLOCK_333_MHZ_PNV: - return 333333; - case GC_DISPLAY_CLOCK_444_MHZ_PNV: - return 444444; - case GC_DISPLAY_CLOCK_200_MHZ_PNV: - return 200000; - default: - DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); - case GC_DISPLAY_CLOCK_133_MHZ_PNV: - return 133333; - case GC_DISPLAY_CLOCK_167_MHZ_PNV: - return 166667; - } -} - -static int i915gm_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 gcfgc = 0; - - pci_read_config_word(pdev, GCFGC, &gcfgc); - - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133333; - else { - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_333_MHZ: - return 333333; - default: - case GC_DISPLAY_CLOCK_190_200_MHZ: - return 190000; - } - } -} - -static int i865_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 266667; -} - -static int i85x_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 hpllcc = 0; - - /* - * 852GM/852GMV only supports 133 MHz and the HPLLCC - * encoding is different :( - * FIXME is this the right way to detect 852GM/852GMV? - */ - if (pdev->revision == 0x1) - return 133333; - - pci_bus_read_config_word(pdev->bus, - PCI_DEVFN(0, 3), HPLLCC, &hpllcc); - - /* Assume that the hardware is in the high speed state. This - * should be the default. - */ - switch (hpllcc & GC_CLOCK_CONTROL_MASK) { - case GC_CLOCK_133_200: - case GC_CLOCK_133_200_2: - case GC_CLOCK_100_200: - return 200000; - case GC_CLOCK_166_250: - return 250000; - case GC_CLOCK_100_133: - return 133333; - case GC_CLOCK_133_266: - case GC_CLOCK_133_266_2: - case GC_CLOCK_166_266: - return 266667; - } - - /* Shouldn't happen */ - return 0; -} - -static int i830_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - return 133333; -} - -static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) -{ - static const unsigned int blb_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - [4] = 6400000, - }; - static const unsigned int pnv_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - [4] = 2666667, - }; - static const unsigned int cl_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 6400000, - [4] = 3333333, - [5] = 3566667, - [6] = 4266667, - }; - static const unsigned int elk_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - }; - static const unsigned int ctg_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 6400000, - [4] = 2666667, - [5] = 4266667, - }; - const unsigned int *vco_table; - unsigned int vco; - uint8_t tmp = 0; - - /* FIXME other chipsets? */ - if (IS_GM45(dev_priv)) - vco_table = ctg_vco; - else if (IS_G4X(dev_priv)) - vco_table = elk_vco; - else if (IS_I965GM(dev_priv)) - vco_table = cl_vco; - else if (IS_PINEVIEW(dev_priv)) - vco_table = pnv_vco; - else if (IS_G33(dev_priv)) - vco_table = blb_vco; - else - return 0; - - tmp = I915_READ(IS_MOBILE(dev_priv) ? HPLLVCO_MOBILE : HPLLVCO); - - vco = vco_table[tmp & 0x7]; - if (vco == 0) - DRM_ERROR("Bad HPLL VCO (HPLLVCO=0x%02x)\n", tmp); - else - DRM_DEBUG_KMS("HPLL VCO %u kHz\n", vco); - - return vco; -} - -static int gm45_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = (tmp >> 12) & 0x1; - - switch (vco) { - case 2666667: - case 4000000: - case 5333333: - return cdclk_sel ? 333333 : 222222; - case 3200000: - return cdclk_sel ? 320000 : 228571; - default: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u, CFGC=0x%04x\n", vco, tmp); - return 222222; - } -} - -static int i965gm_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 16, 10, 8 }; - static const uint8_t div_4000[] = { 20, 12, 10 }; - static const uint8_t div_5333[] = { 24, 16, 14 }; - const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = ((tmp >> 8) & 0x1f) - 1; - - if (cdclk_sel >= ARRAY_SIZE(div_3200)) - goto fail; - - switch (vco) { - case 3200000: - div_table = div_3200; - break; - case 4000000: - div_table = div_4000; - break; - case 5333333: - div_table = div_5333; - break; - default: - goto fail; - } - - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); - -fail: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%04x\n", vco, tmp); - return 200000; -} - -static int g33_get_display_clock_speed(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; - static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; - static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; - static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; - const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = (tmp >> 4) & 0x7; - - if (cdclk_sel >= ARRAY_SIZE(div_3200)) - goto fail; - - switch (vco) { - case 3200000: - div_table = div_3200; - break; - case 4000000: - div_table = div_4000; - break; - case 4800000: - div_table = div_4800; - break; - case 5333333: - div_table = div_5333; - break; - default: - goto fail; - } - - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); - -fail: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%08x\n", vco, tmp); - return 190476; -} - static void intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den) { @@ -8768,7 +7388,8 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, val = I915_READ(DSPSTRIDE(pipe)); fb->pitches[0] = val & 0xffffffc0; - aligned_height = intel_fb_align_height(dev, fb->height, + aligned_height = intel_fb_align_height(dev_priv, + fb->height, fb->format->format, fb->modifier); @@ -9809,7 +8430,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, fb->format->format); fb->pitches[0] = (val & 0x3ff) * stride_mult; - aligned_height = intel_fb_align_height(dev, fb->height, + aligned_height = intel_fb_align_height(dev_priv, + fb->height, fb->format->format, fb->modifier); @@ -9907,7 +8529,8 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc, val = I915_READ(DSPSTRIDE(pipe)); fb->pitches[0] = val & 0xffffffc0; - aligned_height = intel_fb_align_height(dev, fb->height, + aligned_height = intel_fb_align_height(dev_priv, + fb->height, fb->format->format, fb->modifier); @@ -10235,245 +8858,6 @@ void hsw_disable_pc8(struct drm_i915_private *dev_priv) } } -static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = old_intel_state->dev_cdclk; - - bxt_set_cdclk(to_i915(dev), req_cdclk); -} - -static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, - int pixel_rate) -{ - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) - pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); - - /* BSpec says "Do not use DisplayPort with CDCLK less than - * 432 MHz, audio enabled, port width x4, and link rate - * HBR2 (5.4 GHz), or else there may be audio corruption or - * screen corruption." - */ - if (intel_crtc_has_dp_encoder(crtc_state) && - crtc_state->has_audio && - crtc_state->port_clock >= 540000 && - crtc_state->lane_count == 4) - pixel_rate = max(432000, pixel_rate); - - return pixel_rate; -} - -/* compute the max rate for new configuration */ -static int ilk_max_pixel_rate(struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(state->dev); - struct drm_crtc *crtc; - struct drm_crtc_state *cstate; - struct intel_crtc_state *crtc_state; - unsigned max_pixel_rate = 0, i; - enum pipe pipe; - - memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, - sizeof(intel_state->min_pixclk)); - - for_each_crtc_in_state(state, crtc, cstate, i) { - int pixel_rate; - - crtc_state = to_intel_crtc_state(cstate); - if (!crtc_state->base.enable) { - intel_state->min_pixclk[i] = 0; - continue; - } - - pixel_rate = ilk_pipe_pixel_rate(crtc_state); - - if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) - pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state, - pixel_rate); - - intel_state->min_pixclk[i] = pixel_rate; - } - - for_each_pipe(dev_priv, pipe) - max_pixel_rate = max(intel_state->min_pixclk[pipe], max_pixel_rate); - - return max_pixel_rate; -} - -static void broadwell_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t val, data; - int ret; - - if (WARN((I915_READ(LCPLL_CTL) & - (LCPLL_PLL_DISABLE | LCPLL_PLL_LOCK | - LCPLL_CD_CLOCK_DISABLE | LCPLL_ROOT_CD_CLOCK_DISABLE | - LCPLL_CD2X_CLOCK_DISABLE | LCPLL_POWER_DOWN_ALLOW | - LCPLL_CD_SOURCE_FCLK)) != LCPLL_PLL_LOCK, - "trying to change cdclk frequency with cdclk not enabled\n")) - return; - - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, - BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); - mutex_unlock(&dev_priv->rps.hw_lock); - if (ret) { - DRM_ERROR("failed to inform pcode about cdclk change\n"); - return; - } - - val = I915_READ(LCPLL_CTL); - val |= LCPLL_CD_SOURCE_FCLK; - I915_WRITE(LCPLL_CTL, val); - - if (wait_for_us(I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 1)) - DRM_ERROR("Switching to FCLK failed\n"); - - val = I915_READ(LCPLL_CTL); - val &= ~LCPLL_CLK_FREQ_MASK; - - switch (cdclk) { - case 450000: - val |= LCPLL_CLK_FREQ_450; - data = 0; - break; - case 540000: - val |= LCPLL_CLK_FREQ_54O_BDW; - data = 1; - break; - case 337500: - val |= LCPLL_CLK_FREQ_337_5_BDW; - data = 2; - break; - case 675000: - val |= LCPLL_CLK_FREQ_675_BDW; - data = 3; - break; - default: - WARN(1, "invalid cdclk frequency\n"); - return; - } - - I915_WRITE(LCPLL_CTL, val); - - val = I915_READ(LCPLL_CTL); - val &= ~LCPLL_CD_SOURCE_FCLK; - I915_WRITE(LCPLL_CTL, val); - - if (wait_for_us((I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) - DRM_ERROR("Switching back to LCPLL failed\n"); - - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); - mutex_unlock(&dev_priv->rps.hw_lock); - - I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); - - intel_update_cdclk(dev_priv); - - WARN(cdclk != dev_priv->cdclk_freq, - "cdclk requested %d kHz but got %d kHz\n", - cdclk, dev_priv->cdclk_freq); -} - -static int broadwell_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 540000) - return 675000; - else if (max_pixclk > 450000) - return 540000; - else if (max_pixclk > 337500) - return 450000; - else - return 337500; -} - -static int broadwell_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - int max_pixclk = ilk_max_pixel_rate(state); - int cdclk; - - /* - * FIXME should also account for plane ratio - * once 64bpp pixel formats are supported. - */ - cdclk = broadwell_calc_cdclk(max_pixclk); - - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - return -EINVAL; - } - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = broadwell_calc_cdclk(0); - - return 0; -} - -static void broadwell_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned req_cdclk = old_intel_state->dev_cdclk; - - broadwell_set_cdclk(dev, req_cdclk); -} - -static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(state->dev); - const int max_pixclk = ilk_max_pixel_rate(state); - int vco = intel_state->cdclk_pll_vco; - int cdclk; - - /* - * FIXME should also account for plane ratio - * once 64bpp pixel formats are supported. - */ - cdclk = skl_calc_cdclk(max_pixclk, vco); - - /* - * FIXME move the cdclk caclulation to - * compute_config() so we can fail gracegully. - */ - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_ERROR("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - cdclk = dev_priv->max_cdclk_freq; - } - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = skl_calc_cdclk(0, vco); - - return 0; -} - -static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(old_state); - unsigned int req_cdclk = intel_state->dev_cdclk; - unsigned int req_vco = intel_state->cdclk_pll_vco; - - skl_set_cdclk(dev_priv, req_cdclk, req_vco); -} - static int haswell_crtc_compute_clock(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state) { @@ -10565,7 +8949,7 @@ static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, static bool hsw_get_transcoder_state(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, - unsigned long *power_domain_mask) + u64 *power_domain_mask) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -10607,7 +8991,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) return false; - *power_domain_mask |= BIT(power_domain); + *power_domain_mask |= BIT_ULL(power_domain); tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder)); @@ -10616,7 +9000,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, - unsigned long *power_domain_mask) + u64 *power_domain_mask) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -10634,7 +9018,7 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) continue; - *power_domain_mask |= BIT(power_domain); + *power_domain_mask |= BIT_ULL(power_domain); /* * The PLL needs to be enabled with a valid divider @@ -10674,7 +9058,7 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, port = (tmp & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skylake_get_ddi_pll(dev_priv, port, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_get_ddi_pll(dev_priv, port, pipe_config); @@ -10709,13 +9093,13 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum intel_display_power_domain power_domain; - unsigned long power_domain_mask; + u64 power_domain_mask; bool active; power_domain = POWER_DOMAIN_PIPE(crtc->pipe); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) return false; - power_domain_mask = BIT(power_domain); + power_domain_mask = BIT_ULL(power_domain); pipe_config->shared_dpll = NULL; @@ -10749,7 +9133,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe); if (intel_display_power_get_if_enabled(dev_priv, power_domain)) { - power_domain_mask |= BIT(power_domain); + power_domain_mask |= BIT_ULL(power_domain); if (INTEL_GEN(dev_priv) >= 9) skylake_get_pfit_config(crtc, pipe_config); else @@ -10972,9 +9356,8 @@ static struct drm_display_mode load_detect_mode = { }; struct drm_framebuffer * -__intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) +intel_framebuffer_create(struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd) { struct intel_framebuffer *intel_fb; int ret; @@ -10983,7 +9366,7 @@ __intel_framebuffer_create(struct drm_device *dev, if (!intel_fb) return ERR_PTR(-ENOMEM); - ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj); + ret = intel_framebuffer_init(intel_fb, obj, mode_cmd); if (ret) goto err; @@ -10994,23 +9377,6 @@ err: return ERR_PTR(ret); } -static struct drm_framebuffer * -intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) -{ - struct drm_framebuffer *fb; - int ret; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ERR_PTR(ret); - fb = __intel_framebuffer_create(dev, mode_cmd, obj); - mutex_unlock(&dev->struct_mutex); - - return fb; -} - static u32 intel_framebuffer_pitch_for_width(int width, int bpp) { @@ -11045,7 +9411,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev, bpp); mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth); - fb = intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) i915_gem_object_put(obj); @@ -11731,14 +10097,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 flip_mask; - int ret; + u32 flip_mask, *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Can't queue multiple flips, so wait for the previous * one to finish before executing the next. @@ -11747,13 +10111,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev, flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, 0); /* aux display base address, unused */ + *cs++ = MI_WAIT_FOR_EVENT | flip_mask; + *cs++ = MI_NOOP; + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = 0; /* aux display base address, unused */ return 0; } @@ -11765,26 +10128,23 @@ static int intel_gen3_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 flip_mask; - int ret; + u32 flip_mask, *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); if (intel_crtc->plane) flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_WAIT_FOR_EVENT | flip_mask; + *cs++ = MI_NOOP; + *cs++ = MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = MI_NOOP; return 0; } @@ -11796,25 +10156,22 @@ static int intel_gen4_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t pf, pipesrc; - int ret; + u32 pf, pipesrc, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* i965+ uses the linear or tiled offsets from the * Display Registers (which do not change across a page-flip) * so we need only reprogram the base address. */ - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | - intel_fb_modifier_to_tiling(fb->modifier)); + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset | + intel_fb_modifier_to_tiling(fb->modifier); /* XXX Enabling the panel-fitter across page-flip is so far * untested on non-native modes, so ignore it for now. @@ -11822,7 +10179,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(ring, pf | pipesrc); + *cs++ = pf | pipesrc; return 0; } @@ -11834,21 +10191,17 @@ static int intel_gen6_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t pf, pipesrc; - int ret; + u32 pf, pipesrc, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0] | - intel_fb_modifier_to_tiling(fb->modifier)); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier); + *cs++ = intel_crtc->flip_work->gtt_offset; /* Contrary to the suggestions in the documentation, * "Enable Panel Fitter" does not seem to be required when page @@ -11858,7 +10211,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(ring, pf | pipesrc); + *cs++ = pf | pipesrc; return 0; } @@ -11871,9 +10224,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev, uint32_t flags) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t plane_bit = 0; + u32 *cs, plane_bit = 0; int len, ret; switch (intel_crtc->plane) { @@ -11917,9 +10269,9 @@ static int intel_gen7_queue_flip(struct drm_device *dev, if (ret) return ret; - ret = intel_ring_begin(req, len); - if (ret) - return ret; + cs = intel_ring_begin(req, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Unmask the flip-done completion message. Note that the bspec says that * we should do this for both the BCS and RCS, and that we must not unmask @@ -11931,31 +10283,28 @@ static int intel_gen7_queue_flip(struct drm_device *dev, * to zero does lead to lockups within MI_DISPLAY_FLIP. */ if (req->engine->id == RCS) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE | - DERRMR_PIPEB_PRI_FLIP_DONE | - DERRMR_PIPEC_PRI_FLIP_DONE)); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(DERRMR); + *cs++ = ~(DERRMR_PIPEA_PRI_FLIP_DONE | + DERRMR_PIPEB_PRI_FLIP_DONE | + DERRMR_PIPEC_PRI_FLIP_DONE); if (IS_GEN8(dev_priv)) - intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT); + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT; else - intel_ring_emit(ring, MI_STORE_REGISTER_MEM | - MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, - i915_ggtt_offset(req->engine->scratch) + 256); + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(DERRMR); + *cs++ = i915_ggtt_offset(req->engine->scratch) + 256; if (IS_GEN8(dev_priv)) { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } } - intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); - intel_ring_emit(ring, fb->pitches[0] | - intel_fb_modifier_to_tiling(fb->modifier)); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, (MI_NOOP)); + *cs++ = MI_DISPLAY_FLIP_I915 | plane_bit; + *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier); + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = MI_NOOP; return 0; } @@ -12147,6 +10496,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) spin_unlock(&dev->event_lock); } +__maybe_unused static int intel_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, @@ -12441,11 +10791,11 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc_state); struct drm_crtc *crtc = crtc_state->crtc; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_plane *plane = plane_state->plane; + struct intel_plane *plane = to_intel_plane(plane_state->plane); struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane_state *old_plane_state = - to_intel_plane_state(plane->state); + to_intel_plane_state(plane->base.state); bool mode_changed = needs_modeset(crtc_state); bool was_crtc_enabled = crtc->state->active; bool is_crtc_enabled = crtc_state->active; @@ -12453,7 +10803,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, struct drm_framebuffer *fb = plane_state->fb; int ret; - if (INTEL_GEN(dev_priv) >= 9 && plane->type != DRM_PLANE_TYPE_CURSOR) { + if (INTEL_GEN(dev_priv) >= 9 && plane->id != PLANE_CURSOR) { ret = skl_update_scaler_plane( to_intel_crtc_state(crtc_state), to_intel_plane_state(plane_state)); @@ -12477,8 +10827,10 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, * per-plane wm computation to the .check_plane() hook, and * only combine the results from all planes in the current place? */ - if (!is_crtc_enabled) + if (!is_crtc_enabled) { plane_state->visible = visible = false; + to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id); + } if (!was_visible && !visible) return 0; @@ -12490,41 +10842,39 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, turn_on = visible && (!was_visible || mode_changed); DRM_DEBUG_ATOMIC("[CRTC:%d:%s] has [PLANE:%d:%s] with fb %i\n", - intel_crtc->base.base.id, - intel_crtc->base.name, - plane->base.id, plane->name, + intel_crtc->base.base.id, intel_crtc->base.name, + plane->base.base.id, plane->base.name, fb ? fb->base.id : -1); DRM_DEBUG_ATOMIC("[PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n", - plane->base.id, plane->name, + plane->base.base.id, plane->base.name, was_visible, visible, turn_off, turn_on, mode_changed); if (turn_on) { - pipe_config->update_wm_pre = true; + if (INTEL_GEN(dev_priv) < 5) + pipe_config->update_wm_pre = true; /* must disable cxsr around plane enable/disable */ - if (plane->type != DRM_PLANE_TYPE_CURSOR) + if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; } else if (turn_off) { - pipe_config->update_wm_post = true; + if (INTEL_GEN(dev_priv) < 5) + pipe_config->update_wm_post = true; /* must disable cxsr around plane enable/disable */ - if (plane->type != DRM_PLANE_TYPE_CURSOR) + if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; - } else if (intel_wm_need_update(plane, plane_state)) { - /* FIXME bollocks */ - pipe_config->update_wm_pre = true; - pipe_config->update_wm_post = true; + } else if (intel_wm_need_update(&plane->base, plane_state)) { + if (INTEL_GEN(dev_priv) < 5) { + /* FIXME bollocks */ + pipe_config->update_wm_pre = true; + pipe_config->update_wm_post = true; + } } - /* Pre-gen9 platforms need two-step watermark updates */ - if ((pipe_config->update_wm_pre || pipe_config->update_wm_post) && - INTEL_GEN(dev_priv) < 9 && dev_priv->display.optimize_watermarks) - to_intel_crtc_state(crtc_state)->wm.need_postvbl_update = true; - if (visible || was_visible) - pipe_config->fb_bits |= to_intel_plane(plane)->frontbuffer_bit; + pipe_config->fb_bits |= plane->frontbuffer_bit; /* * WaCxSRDisabledForSpriteScaling:ivb @@ -12532,7 +10882,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, * cstate->update_wm was already set above, so this flag will * take effect when we commit and program watermarks. */ - if (plane->type == DRM_PLANE_TYPE_OVERLAY && IS_IVYBRIDGE(dev_priv) && + if (plane->id == PLANE_SPRITE0 && IS_IVYBRIDGE(dev_priv) && needs_scaling(to_intel_plane_state(plane_state)) && !needs_scaling(old_plane_state)) pipe_config->disable_lp_wm = true; @@ -12642,7 +10992,7 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, ret = skl_update_scaler_crtc(pipe_config); if (!ret) - ret = intel_atomic_setup_scalers(dev, intel_crtc, + ret = intel_atomic_setup_scalers(dev_priv, intel_crtc, pipe_config); } @@ -12800,9 +11150,10 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, DRM_DEBUG_KMS("adjusted mode:\n"); drm_mode_debug_printmodeline(&pipe_config->base.adjusted_mode); intel_dump_crtc_timings(&pipe_config->base.adjusted_mode); - DRM_DEBUG_KMS("port clock: %d, pipe src size: %dx%d\n", + DRM_DEBUG_KMS("port clock: %d, pipe src size: %dx%d, pixel rate %d\n", pipe_config->port_clock, - pipe_config->pipe_src_w, pipe_config->pipe_src_h); + pipe_config->pipe_src_w, pipe_config->pipe_src_h, + pipe_config->pixel_rate); if (INTEL_GEN(dev_priv) >= 9) DRM_DEBUG_KMS("num_scalers: %d, scaler_users: 0x%x, scaler_id: %d\n", @@ -12920,10 +11271,13 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) static void clear_intel_crtc_state(struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = + to_i915(crtc_state->base.crtc->dev); struct drm_crtc_state tmp_state; struct intel_crtc_scaler_state scaler_state; struct intel_dpll_hw_state dpll_hw_state; struct intel_shared_dpll *shared_dpll; + struct intel_crtc_wm_state wm_state; bool force_thru; /* FIXME: before the switch to atomic started, a new pipe_config was @@ -12936,6 +11290,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) shared_dpll = crtc_state->shared_dpll; dpll_hw_state = crtc_state->dpll_hw_state; force_thru = crtc_state->pch_pfit.force_thru; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + wm_state = crtc_state->wm; memset(crtc_state, 0, sizeof *crtc_state); @@ -12944,6 +11300,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) crtc_state->shared_dpll = shared_dpll; crtc_state->dpll_hw_state = dpll_hw_state; crtc_state->pch_pfit.force_thru = force_thru; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + crtc_state->wm = wm_state; } static int @@ -13060,8 +11418,11 @@ encoder_retry: } /* Dithering seems to not pass-through bits correctly when it should, so - * only enable it on 6bpc panels. */ - pipe_config->dither = pipe_config->pipe_bpp == 6*3; + * only enable it on 6bpc panels and when its not a compliance + * test requesting 6bpc video pattern. + */ + pipe_config->dither = (pipe_config->pipe_bpp == 6*3) && + !pipe_config->dither_force_disable; DRM_DEBUG_KMS("hw max bpp: %i, pipe bpp: %i, dithering: %i\n", base_bpp, pipe_config->pipe_bpp, pipe_config->dither); @@ -13375,6 +11736,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, } PIPE_CONF_CHECK_I(scaler_state.scaler_id); + PIPE_CONF_CHECK_CLOCK_FUZZY(pixel_rate); } /* BDW+ don't expose a synchronous way to read the state */ @@ -13666,6 +12028,8 @@ verify_crtc_state(struct drm_crtc *crtc, } } + intel_crtc_compute_pixel_rate(pipe_config); + if (!new_crtc_state->active) return; @@ -13993,6 +12357,8 @@ static int intel_modeset_checks(struct drm_atomic_state *state) intel_state->modeset = true; intel_state->active_crtcs = dev_priv->active_crtcs; + intel_state->cdclk.logical = dev_priv->cdclk.logical; + intel_state->cdclk.actual = dev_priv->cdclk.actual; for_each_crtc_in_state(state, crtc, crtc_state, i) { if (crtc_state->active) @@ -14012,38 +12378,35 @@ static int intel_modeset_checks(struct drm_atomic_state *state) * adjusted_mode bits in the crtc directly. */ if (dev_priv->display.modeset_calc_cdclk) { - if (!intel_state->cdclk_pll_vco) - intel_state->cdclk_pll_vco = dev_priv->cdclk_pll.vco; - if (!intel_state->cdclk_pll_vco) - intel_state->cdclk_pll_vco = dev_priv->skl_preferred_vco_freq; - ret = dev_priv->display.modeset_calc_cdclk(state); if (ret < 0) return ret; /* - * Writes to dev_priv->atomic_cdclk_freq must protected by + * Writes to dev_priv->cdclk.logical must protected by * holding all the crtc locks, even if we don't end up * touching the hardware */ - if (intel_state->cdclk != dev_priv->atomic_cdclk_freq) { + if (!intel_cdclk_state_compare(&dev_priv->cdclk.logical, + &intel_state->cdclk.logical)) { ret = intel_lock_all_pipes(state); if (ret < 0) return ret; } /* All pipes must be switched off while we change the cdclk. */ - if (intel_state->dev_cdclk != dev_priv->cdclk_freq || - intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco) { + if (!intel_cdclk_state_compare(&dev_priv->cdclk.actual, + &intel_state->cdclk.actual)) { ret = intel_modeset_all_pipes(state); if (ret < 0) return ret; } - DRM_DEBUG_KMS("New cdclk calculated to be atomic %u, actual %u\n", - intel_state->cdclk, intel_state->dev_cdclk); + DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", + intel_state->cdclk.logical.cdclk, + intel_state->cdclk.actual.cdclk); } else { - to_intel_atomic_state(state)->cdclk = dev_priv->atomic_cdclk_freq; + to_intel_atomic_state(state)->cdclk.logical = dev_priv->cdclk.logical; } intel_modeset_clear_plls(state); @@ -14146,7 +12509,7 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) return ret; } else { - intel_state->cdclk = dev_priv->atomic_cdclk_freq; + intel_state->cdclk.logical = dev_priv->cdclk.logical; } ret = drm_atomic_helper_check_planes(dev, state); @@ -14253,12 +12616,7 @@ static bool needs_vblank_wait(struct intel_crtc_state *crtc_state) if (crtc_state->update_wm_post) return true; - /* - * cxsr is re-enabled after vblank. - * This is already handled by crtc_state->update_wm_post, - * but added for clarity. - */ - if (crtc_state->disable_cxsr) + if (crtc_state->wm.need_postvbl_update) return true; return false; @@ -14380,6 +12738,24 @@ static void skl_update_crtcs(struct drm_atomic_state *state, } while (progress); } +static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) +{ + struct intel_atomic_state *state, *next; + struct llist_node *freed; + + freed = llist_del_all(&dev_priv->atomic_helper.free_list); + llist_for_each_entry_safe(state, next, freed, freed) + drm_atomic_state_put(&state->base); +} + +static void intel_atomic_helper_free_state_worker(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), atomic_helper.free_work); + + intel_atomic_helper_free_state(dev_priv); +} + static void intel_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; @@ -14389,7 +12765,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) struct drm_crtc *crtc; struct intel_crtc_state *intel_cstate; bool hw_check = intel_state->modeset; - unsigned long put_domains[I915_MAX_PIPES] = {}; + u64 put_domains[I915_MAX_PIPES] = {}; unsigned crtc_vblank_mask = 0; int i; @@ -14433,12 +12809,12 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) /* * Make sure we don't call initial_watermarks * for ILK-style watermark updates. + * + * No clue what this is supposed to achieve. */ - if (dev_priv->display.atomic_update_watermarks) + if (INTEL_GEN(dev_priv) >= 9) dev_priv->display.initial_watermarks(intel_state, to_intel_crtc_state(crtc->state)); - else - intel_update_watermarks(intel_crtc); } } } @@ -14450,10 +12826,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (intel_state->modeset) { drm_atomic_helper_update_legacy_modeset_state(state->dev, state); - if (dev_priv->display.modeset_commit_cdclk && - (intel_state->dev_cdclk != dev_priv->cdclk_freq || - intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco)) - dev_priv->display.modeset_commit_cdclk(state); + intel_set_cdclk(dev_priv, &dev_priv->cdclk.actual); /* * SKL workaround: bspec recommends we disable the SAGV when we @@ -14546,6 +12919,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * can happen also when the device is completely off. */ intel_uncore_arm_unclaimed_mmio_detection(dev_priv); + + intel_atomic_helper_free_state(dev_priv); } static void intel_atomic_commit_work(struct work_struct *work) @@ -14615,6 +12990,17 @@ static int intel_atomic_commit(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; + /* + * The intel_legacy_cursor_update() fast path takes care + * of avoiding the vblank waits for simple cursor + * movement and flips. For cursor on/off and size changes, + * we want to perform the vblank waits so that watermark + * updates happen during the correct frames. Gen9+ have + * double buffered watermarks and so shouldn't need this. + */ + if (INTEL_GEN(dev_priv) < 9) + state->legacy_cursor_update = false; + ret = drm_atomic_helper_setup_commit(state, nonblock); if (ret) return ret; @@ -14639,7 +13025,8 @@ static int intel_atomic_commit(struct drm_device *dev, memcpy(dev_priv->min_pixclk, intel_state->min_pixclk, sizeof(intel_state->min_pixclk)); dev_priv->active_crtcs = intel_state->active_crtcs; - dev_priv->atomic_cdclk_freq = intel_state->cdclk; + dev_priv->cdclk.logical = intel_state->cdclk.logical; + dev_priv->cdclk.actual = intel_state->cdclk.actual; } drm_atomic_state_get(state); @@ -14739,7 +13126,7 @@ static const struct drm_crtc_funcs intel_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .set_property = drm_atomic_helper_crtc_set_property, .destroy = intel_crtc_destroy, - .page_flip = intel_crtc_page_flip, + .page_flip = drm_atomic_helper_page_flip, .atomic_duplicate_state = intel_crtc_duplicate_state, .atomic_destroy_state = intel_crtc_destroy_state, .set_crc_source = intel_crtc_set_crc_source, @@ -14771,6 +13158,29 @@ intel_prepare_plane_fb(struct drm_plane *plane, struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); int ret; + if (obj) { + if (plane->type == DRM_PLANE_TYPE_CURSOR && + INTEL_INFO(dev_priv)->cursor_needs_physical) { + const int align = IS_I830(dev_priv) ? 16 * 1024 : 256; + + ret = i915_gem_object_attach_phys(obj, align); + if (ret) { + DRM_DEBUG_KMS("failed to attach phys object\n"); + return ret; + } + } else { + struct i915_vma *vma; + + vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); + if (IS_ERR(vma)) { + DRM_DEBUG_KMS("failed to pin object\n"); + return PTR_ERR(vma); + } + + to_intel_plane_state(new_state)->vma = vma; + } + } + if (!obj && !old_obj) return 0; @@ -14823,26 +13233,6 @@ intel_prepare_plane_fb(struct drm_plane *plane, i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); } - if (plane->type == DRM_PLANE_TYPE_CURSOR && - INTEL_INFO(dev_priv)->cursor_needs_physical) { - int align = IS_I830(dev_priv) ? 16 * 1024 : 256; - ret = i915_gem_object_attach_phys(obj, align); - if (ret) { - DRM_DEBUG_KMS("failed to attach phys object\n"); - return ret; - } - } else { - struct i915_vma *vma; - - vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); - if (IS_ERR(vma)) { - DRM_DEBUG_KMS("failed to pin object\n"); - return PTR_ERR(vma); - } - - to_intel_plane_state(new_state)->vma = vma; - } - return 0; } @@ -14870,16 +13260,22 @@ intel_cleanup_plane_fb(struct drm_plane *plane, int skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv; int max_scale; - int crtc_clock, cdclk; + int crtc_clock, max_dotclk; if (!intel_crtc || !crtc_state->base.enable) return DRM_PLANE_HELPER_NO_SCALING; + dev_priv = to_i915(intel_crtc->base.dev); + crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; - cdclk = to_intel_atomic_state(crtc_state->base.state)->cdclk; + max_dotclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; - if (WARN_ON_ONCE(!crtc_clock || cdclk < crtc_clock)) + if (IS_GEMINILAKE(dev_priv)) + max_dotclk *= 2; + + if (WARN_ON_ONCE(!crtc_clock || max_dotclk < crtc_clock)) return DRM_PLANE_HELPER_NO_SCALING; /* @@ -14888,7 +13284,8 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state * or * cdclk/crtc_clock */ - max_scale = min((1 << 16) * 3 - 1, (1 << 8) * ((cdclk << 8) / crtc_clock)); + max_scale = min((1 << 16) * 3 - 1, + (1 << 8) * ((max_dotclk << 8) / crtc_clock)); return max_scale; } @@ -15038,8 +13435,7 @@ intel_legacy_cursor_update(struct drm_plane *plane, old_plane_state->src_h != src_h || old_plane_state->crtc_w != crtc_w || old_plane_state->crtc_h != crtc_h || - !old_plane_state->visible || - old_plane_state->fb->modifier != fb->modifier) + !old_plane_state->fb != !fb) goto slow; new_plane_state = intel_plane_duplicate_state(plane); @@ -15062,10 +13458,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, if (ret) goto out_free; - /* Visibility changed, must take slowpath. */ - if (!new_plane_state->visible) - goto slow_free; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); if (ret) goto out_free; @@ -15105,9 +13497,15 @@ intel_legacy_cursor_update(struct drm_plane *plane, new_plane_state->fb = old_fb; to_intel_plane_state(new_plane_state)->vma = old_vma; - intel_plane->update_plane(plane, - to_intel_crtc_state(crtc->state), - to_intel_plane_state(plane->state)); + if (plane->state->visible) { + trace_intel_update_plane(plane, to_intel_crtc(crtc)); + intel_plane->update_plane(plane, + to_intel_crtc_state(crtc->state), + to_intel_plane_state(plane->state)); + } else { + trace_intel_disable_plane(plane, to_intel_crtc(crtc)); + intel_plane->disable_plane(plane, crtc); + } intel_cleanup_plane_fb(plane, new_plane_state); @@ -15117,8 +13515,6 @@ out_free: intel_plane_destroy_state(plane, new_plane_state); return ret; -slow_free: - intel_plane_destroy_state(plane, new_plane_state); slow: return drm_atomic_helper_update_plane(plane, crtc, fb, crtc_x, crtc_y, crtc_w, crtc_h, @@ -15492,8 +13888,6 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) intel_crtc->cursor_cntl = ~0; intel_crtc->cursor_size = ~0; - intel_crtc->wm.cxsr_allowed = true; - /* initialize shared scalers */ intel_crtc_init_scalers(intel_crtc, crtc_state); @@ -15681,7 +14075,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) */ found = I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_INIT_DISPLAY_DETECTED; /* WaIgnoreDDIAStrap: skl */ - if (found || IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (found || IS_GEN9_BC(dev_priv)) intel_ddi_init(dev_priv, PORT_A); /* DDI B, C and D detection is indicated by the SFUSE_STRAP @@ -15697,7 +14091,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) /* * On SKL we don't have a way to detect DDI-E so we rely on VBT. */ - if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && + if (IS_GEN9_BC(dev_priv) && (dev_priv->vbt.ddi_port_info[PORT_E].supports_dp || dev_priv->vbt.ddi_port_info[PORT_E].supports_dvi || dev_priv->vbt.ddi_port_info[PORT_E].supports_hdmi)) @@ -15830,14 +14224,16 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) { - struct drm_device *dev = fb->dev; struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); drm_framebuffer_cleanup(fb); - mutex_lock(&dev->struct_mutex); + + i915_gem_object_lock(intel_fb->obj); WARN_ON(!intel_fb->obj->framebuffer_references--); + i915_gem_object_unlock(intel_fb->obj); + i915_gem_object_put(intel_fb->obj); - mutex_unlock(&dev->struct_mutex); + kfree(intel_fb); } @@ -15862,15 +14258,10 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, struct drm_clip_rect *clips, unsigned num_clips) { - struct drm_device *dev = fb->dev; - struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); - struct drm_i915_gem_object *obj = intel_fb->obj; + struct drm_i915_gem_object *obj = intel_fb_obj(fb); - mutex_lock(&dev->struct_mutex); - if (obj->pin_display && obj->cache_dirty) - i915_gem_clflush_object(obj, true); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); - mutex_unlock(&dev->struct_mutex); + i915_gem_object_flush_if_display(obj); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); return 0; } @@ -15885,7 +14276,7 @@ static u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, uint64_t fb_modifier, uint32_t pixel_format) { - u32 gen = INTEL_INFO(dev_priv)->gen; + u32 gen = INTEL_GEN(dev_priv); if (gen >= 9) { int cpp = drm_format_plane_cpp(pixel_format, 0); @@ -15894,8 +14285,7 @@ u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, * pixels and 32K bytes." */ return min(8192 * cpp, 32768); - } else if (gen >= 5 && !IS_VALLEYVIEW(dev_priv) && - !IS_CHERRYVIEW(dev_priv)) { + } else if (gen >= 5 && !HAS_GMCH_DISPLAY(dev_priv)) { return 32*1024; } else if (gen >= 4) { if (fb_modifier == I915_FORMAT_MOD_X_TILED) @@ -15913,18 +14303,21 @@ u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, } } -static int intel_framebuffer_init(struct drm_device *dev, - struct intel_framebuffer *intel_fb, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) +static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd) { - struct drm_i915_private *dev_priv = to_i915(dev); - unsigned int tiling = i915_gem_object_get_tiling(obj); - int ret; - u32 pitch_limit, stride_alignment; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct drm_format_name_buf format_name; + u32 pitch_limit, stride_alignment; + unsigned int tiling, stride; + int ret = -EINVAL; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + i915_gem_object_lock(obj); + obj->framebuffer_references++; + tiling = i915_gem_object_get_tiling(obj); + stride = i915_gem_object_get_stride(obj); + i915_gem_object_unlock(obj); if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { /* @@ -15934,14 +14327,14 @@ static int intel_framebuffer_init(struct drm_device *dev, if (tiling != I915_TILING_NONE && tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { DRM_DEBUG("tiling_mode doesn't match fb modifier\n"); - return -EINVAL; + goto err; } } else { if (tiling == I915_TILING_X) { mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; } else if (tiling == I915_TILING_Y) { DRM_DEBUG("No Y tiling for legacy addfb\n"); - return -EINVAL; + goto err; } } @@ -15952,7 +14345,7 @@ static int intel_framebuffer_init(struct drm_device *dev, if (INTEL_GEN(dev_priv) < 9) { DRM_DEBUG("Unsupported tiling 0x%llx!\n", mode_cmd->modifier[0]); - return -EINVAL; + goto err; } case DRM_FORMAT_MOD_NONE: case I915_FORMAT_MOD_X_TILED: @@ -15960,7 +14353,7 @@ static int intel_framebuffer_init(struct drm_device *dev, default: DRM_DEBUG("Unsupported fb modifier 0x%llx!\n", mode_cmd->modifier[0]); - return -EINVAL; + goto err; } /* @@ -15970,7 +14363,7 @@ static int intel_framebuffer_init(struct drm_device *dev, if (INTEL_INFO(dev_priv)->gen < 4 && tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { DRM_DEBUG("tiling_mode must match fb modifier exactly on gen2/3\n"); - return -EINVAL; + goto err; } stride_alignment = intel_fb_stride_alignment(dev_priv, @@ -15979,7 +14372,7 @@ static int intel_framebuffer_init(struct drm_device *dev, if (mode_cmd->pitches[0] & (stride_alignment - 1)) { DRM_DEBUG("pitch (%d) must be at least %u byte aligned\n", mode_cmd->pitches[0], stride_alignment); - return -EINVAL; + goto err; } pitch_limit = intel_fb_pitch_limit(dev_priv, mode_cmd->modifier[0], @@ -15989,19 +14382,17 @@ static int intel_framebuffer_init(struct drm_device *dev, mode_cmd->modifier[0] != DRM_FORMAT_MOD_NONE ? "tiled" : "linear", mode_cmd->pitches[0], pitch_limit); - return -EINVAL; + goto err; } /* * If there's a fence, enforce that * the fb pitch and fence stride match. */ - if (tiling != I915_TILING_NONE && - mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) { + if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) { DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", - mode_cmd->pitches[0], - i915_gem_object_get_stride(obj)); - return -EINVAL; + mode_cmd->pitches[0], stride); + goto err; } /* Reject formats not supported by any plane early. */ @@ -16015,7 +14406,7 @@ static int intel_framebuffer_init(struct drm_device *dev, if (INTEL_GEN(dev_priv) > 3) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; case DRM_FORMAT_ABGR8888: @@ -16023,7 +14414,7 @@ static int intel_framebuffer_init(struct drm_device *dev, INTEL_GEN(dev_priv) < 9) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; case DRM_FORMAT_XBGR8888: @@ -16032,14 +14423,14 @@ static int intel_framebuffer_init(struct drm_device *dev, if (INTEL_GEN(dev_priv) < 4) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; case DRM_FORMAT_ABGR2101010: if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; case DRM_FORMAT_YUYV: @@ -16049,35 +14440,42 @@ static int intel_framebuffer_init(struct drm_device *dev, if (INTEL_GEN(dev_priv) < 5) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; default: DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ if (mode_cmd->offsets[0] != 0) - return -EINVAL; + goto err; - drm_helper_mode_fill_fb_struct(dev, &intel_fb->base, mode_cmd); + drm_helper_mode_fill_fb_struct(&dev_priv->drm, + &intel_fb->base, mode_cmd); intel_fb->obj = obj; ret = intel_fill_fb_info(dev_priv, &intel_fb->base); if (ret) - return ret; + goto err; - ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs); + ret = drm_framebuffer_init(obj->base.dev, + &intel_fb->base, + &intel_fb_funcs); if (ret) { DRM_ERROR("framebuffer init failed %d\n", ret); - return ret; + goto err; } - intel_fb->obj->framebuffer_references++; - return 0; + +err: + i915_gem_object_lock(obj); + obj->framebuffer_references--; + i915_gem_object_unlock(obj); + return ret; } static struct drm_framebuffer * @@ -16093,7 +14491,7 @@ intel_user_framebuffer_create(struct drm_device *dev, if (!obj) return ERR_PTR(-ENOENT); - fb = intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) i915_gem_object_put(obj); @@ -16127,6 +14525,8 @@ static const struct drm_mode_config_funcs intel_mode_funcs = { */ void intel_init_display_hooks(struct drm_i915_private *dev_priv) { + intel_init_cdclk_hooks(dev_priv); + if (INTEL_INFO(dev_priv)->gen >= 9) { dev_priv->display.get_pipe_config = haswell_get_pipe_config; dev_priv->display.get_initial_plane_config = @@ -16195,62 +14595,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.crtc_disable = i9xx_crtc_disable; } - /* Returns the core display clock speed */ - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - dev_priv->display.get_display_clock_speed = - skylake_get_display_clock_speed; - else if (IS_GEN9_LP(dev_priv)) - dev_priv->display.get_display_clock_speed = - broxton_get_display_clock_speed; - else if (IS_BROADWELL(dev_priv)) - dev_priv->display.get_display_clock_speed = - broadwell_get_display_clock_speed; - else if (IS_HASWELL(dev_priv)) - dev_priv->display.get_display_clock_speed = - haswell_get_display_clock_speed; - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - dev_priv->display.get_display_clock_speed = - valleyview_get_display_clock_speed; - else if (IS_GEN5(dev_priv)) - dev_priv->display.get_display_clock_speed = - ilk_get_display_clock_speed; - else if (IS_I945G(dev_priv) || IS_I965G(dev_priv) || - IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) - dev_priv->display.get_display_clock_speed = - i945_get_display_clock_speed; - else if (IS_GM45(dev_priv)) - dev_priv->display.get_display_clock_speed = - gm45_get_display_clock_speed; - else if (IS_I965GM(dev_priv)) - dev_priv->display.get_display_clock_speed = - i965gm_get_display_clock_speed; - else if (IS_PINEVIEW(dev_priv)) - dev_priv->display.get_display_clock_speed = - pnv_get_display_clock_speed; - else if (IS_G33(dev_priv) || IS_G4X(dev_priv)) - dev_priv->display.get_display_clock_speed = - g33_get_display_clock_speed; - else if (IS_I915G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i915_get_display_clock_speed; - else if (IS_I945GM(dev_priv) || IS_I845G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i9xx_misc_get_display_clock_speed; - else if (IS_I915GM(dev_priv)) - dev_priv->display.get_display_clock_speed = - i915gm_get_display_clock_speed; - else if (IS_I865G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i865_get_display_clock_speed; - else if (IS_I85X(dev_priv)) - dev_priv->display.get_display_clock_speed = - i85x_get_display_clock_speed; - else { /* 830 */ - WARN(!IS_I830(dev_priv), "Unknown platform. Assuming 133 MHz CDCLK\n"); - dev_priv->display.get_display_clock_speed = - i830_get_display_clock_speed; - } - if (IS_GEN5(dev_priv)) { dev_priv->display.fdi_link_train = ironlake_fdi_link_train; } else if (IS_GEN6(dev_priv)) { @@ -16262,28 +14606,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.fdi_link_train = hsw_fdi_link_train; } - if (IS_BROADWELL(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - broadwell_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - broadwell_modeset_calc_cdclk; - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - valleyview_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - valleyview_modeset_calc_cdclk; - } else if (IS_GEN9_LP(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - bxt_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - bxt_modeset_calc_cdclk; - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - skl_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - skl_modeset_calc_cdclk; - } - if (dev_priv->info.gen >= 9) dev_priv->display.update_crtcs = skl_update_crtcs; else @@ -16510,8 +14832,7 @@ void intel_modeset_init_hw(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); intel_update_cdclk(dev_priv); - - dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; + dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; intel_init_clock_gating(dev_priv); } @@ -16566,7 +14887,8 @@ retry: * intermediate watermarks (since we don't trust the current * watermarks). */ - intel_state->skip_intermediate_wm = true; + if (!HAS_GMCH_DISPLAY(dev_priv)) + intel_state->skip_intermediate_wm = true; ret = intel_atomic_check(dev, state); if (ret) { @@ -16600,18 +14922,6 @@ fail: drm_modeset_acquire_fini(&ctx); } -static void intel_atomic_helper_free_state(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), atomic_helper.free_work); - struct intel_atomic_state *state, *next; - struct llist_node *freed; - - freed = llist_del_all(&dev_priv->atomic_helper.free_list); - llist_for_each_entry_safe(state, next, freed, freed) - drm_atomic_state_put(&state->base); -} - int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -16632,7 +14942,7 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = &intel_mode_funcs; INIT_WORK(&dev_priv->atomic_helper.free_work, - intel_atomic_helper_free_state); + intel_atomic_helper_free_state_worker); intel_init_quirks(dev); @@ -16697,12 +15007,11 @@ int intel_modeset_init(struct drm_device *dev) } } - intel_update_czclk(dev_priv); - intel_update_cdclk(dev_priv); - dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; - intel_shared_dpll_init(dev); + intel_update_czclk(dev_priv); + intel_modeset_init_hw(dev); + if (dev_priv->max_cdclk_freq == 0) intel_update_max_cdclk(dev_priv); @@ -16742,7 +15051,8 @@ int intel_modeset_init(struct drm_device *dev) * Note that we need to do this after reconstructing the BIOS fb's * since the watermark calculation done here will use pstate->fb. */ - sanitize_watermarks(dev); + if (!HAS_GMCH_DISPLAY(dev_priv)) + sanitize_watermarks(dev); return 0; } @@ -16844,6 +15154,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) continue; + trace_intel_disable_plane(&plane->base, crtc); plane->disable_plane(&plane->base, &crtc->base); } } @@ -16990,15 +15301,14 @@ static bool primary_get_hw_state(struct intel_plane *plane) /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct intel_crtc *crtc) { - struct drm_plane *primary = crtc->base.primary; - struct intel_plane_state *plane_state = - to_intel_plane_state(primary->state); + struct intel_plane *primary = to_intel_plane(crtc->base.primary); + bool visible; - plane_state->base.visible = crtc->active && - primary_get_hw_state(to_intel_plane(primary)); + visible = crtc->active && primary_get_hw_state(primary); - if (plane_state->base.visible) - crtc->base.state->plane_mask |= 1 << drm_plane_index(primary); + intel_set_plane_visible(to_intel_crtc_state(crtc->base.state), + to_intel_plane_state(primary->base.state), + visible); } static void intel_modeset_readout_hw_state(struct drm_device *dev) @@ -17131,10 +15441,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) */ crtc_state->base.mode.private_flags = I915_MODE_FLAG_INHERITED; - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - pixclk = ilk_pipe_pixel_rate(crtc_state); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - pixclk = crtc_state->base.adjusted_mode.crtc_clock; + intel_crtc_compute_pixel_rate(crtc_state); + + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + pixclk = crtc_state->pixel_rate; else WARN_ON(dev_priv->display.modeset_calc_cdclk); @@ -17152,6 +15463,24 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) } } +static void +get_encoder_power_domains(struct drm_i915_private *dev_priv) +{ + struct intel_encoder *encoder; + + for_each_intel_encoder(&dev_priv->drm, encoder) { + u64 get_domains; + enum intel_display_power_domain domain; + + if (!encoder->get_power_domains) + continue; + + get_domains = encoder->get_power_domains(encoder); + for_each_power_domain(domain, get_domains) + intel_display_power_get(dev_priv, domain); + } +} + /* Scan out the current hw modeset state, * and sanitizes it to the current state */ @@ -17167,6 +15496,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev) intel_modeset_readout_hw_state(dev); /* HW state is read out, now we need to sanitize this mess. */ + get_encoder_power_domains(dev_priv); + for_each_intel_encoder(dev, encoder) { intel_sanitize_encoder(encoder); } @@ -17193,15 +15524,17 @@ intel_modeset_setup_hw_state(struct drm_device *dev) pll->on = false; } - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_wm_get_hw_state(dev); - else if (IS_GEN9(dev_priv)) + vlv_wm_sanitize(dev_priv); + } else if (IS_GEN9(dev_priv)) { skl_wm_get_hw_state(dev); - else if (HAS_PCH_SPLIT(dev_priv)) + } else if (HAS_PCH_SPLIT(dev_priv)) { ilk_wm_get_hw_state(dev); + } for_each_intel_crtc(dev, crtc) { - unsigned long put_domains; + u64 put_domains; put_domains = modeset_get_crtc_power_domains(&crtc->base, crtc->config); if (WARN_ON(put_domains)) @@ -17209,6 +15542,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev) } intel_display_set_init_power(dev_priv, false); + intel_power_domains_verify_state(dev_priv); + intel_fbc_init_pipe_state(dev_priv); } @@ -17259,8 +15594,6 @@ void intel_modeset_gem_init(struct drm_device *dev) intel_init_gt_powersave(dev_priv); - intel_modeset_init_hw(dev); - intel_setup_overlay(dev_priv); } @@ -17492,9 +15825,9 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv) void intel_display_print_error_state(struct drm_i915_error_state_buf *m, - struct drm_i915_private *dev_priv, struct intel_display_error_state *error) { + struct drm_i915_private *dev_priv = m->i915; int i; if (!error) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d1670b8afbf5..fd96a6cf7326 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -28,8 +28,10 @@ #include <linux/i2c.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/types.h> #include <linux/notifier.h> #include <linux/reboot.h> +#include <asm/byteorder.h> #include <drm/drmP.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> @@ -226,7 +228,7 @@ intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates) if (IS_GEN9_LP(dev_priv)) { *source_rates = bxt_rates; size = ARRAY_SIZE(bxt_rates); - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { *source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); } else { @@ -394,14 +396,12 @@ static void pps_lock(struct intel_dp *intel_dp) struct intel_encoder *encoder = &intel_dig_port->base; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; /* * See vlv_power_sequencer_reset() why we need * a power domain reference here. */ - power_domain = intel_display_port_aux_power_domain(encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); mutex_lock(&dev_priv->pps_mutex); } @@ -412,12 +412,10 @@ static void pps_unlock(struct intel_dp *intel_dp) struct intel_encoder *encoder = &intel_dig_port->base; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; mutex_unlock(&dev_priv->pps_mutex); - power_domain = intel_display_port_aux_power_domain(encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } static void @@ -916,7 +914,7 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * divide by 2000 and use that */ if (intel_dig_port->port == PORT_A) - return DIV_ROUND_CLOSEST(dev_priv->cdclk_freq, 2000); + return DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.cdclk, 2000); else return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); } @@ -1593,6 +1591,13 @@ static int intel_dp_compute_bpp(struct intel_dp *intel_dp, if (bpc > 0) bpp = min(bpp, 3*bpc); + /* For DP Compliance we override the computed bpp for the pipe */ + if (intel_dp->compliance.test_data.bpc != 0) { + pipe_config->pipe_bpp = 3*intel_dp->compliance.test_data.bpc; + pipe_config->dither_force_disable = pipe_config->pipe_bpp == 6*3; + DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", + pipe_config->pipe_bpp); + } return bpp; } @@ -1613,6 +1618,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Conveniently, the link BW constants become indices with a shift...*/ int min_clock = 0; int max_clock; + int link_rate_index; int bpp, mode_rate; int link_avail, link_clock; int common_rates[DP_MAX_SUPPORTED_RATES] = {}; @@ -1654,6 +1660,15 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return false; + /* Use values requested by Compliance Test Request */ + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + link_rate_index = intel_dp_link_rate_index(intel_dp, + common_rates, + intel_dp->compliance.test_link_rate); + if (link_rate_index >= 0) + min_clock = max_clock = link_rate_index; + min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; + } DRM_DEBUG_KMS("DP link computation with max lane count %i " "max bw %d pixel clock %iKHz\n", max_lane_count, common_rates[max_clock], @@ -1753,8 +1768,7 @@ found: * DPLL0 VCO may need to be adjusted to get the correct * clock for eDP. This will affect cdclk as well. */ - if (is_edp(intel_dp) && - (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) { + if (is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) { int vco; switch (pipe_config->port_clock / 2) { @@ -1767,7 +1781,7 @@ found: break; } - to_intel_atomic_state(pipe_config->base.state)->cdclk_pll_vco = vco; + to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco; } if (!HAS_DDI(dev_priv)) @@ -1987,9 +2001,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_stat_reg, pp_ctrl_reg; bool need_to_disable = !intel_dp->want_panel_vdd; @@ -2005,8 +2017,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) if (edp_have_panel_vdd(intel_dp)) return need_to_disable; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); DRM_DEBUG_KMS("Turning eDP port %c VDD on\n", port_name(intel_dig_port->port)); @@ -2064,8 +2075,6 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_stat_reg, pp_ctrl_reg; @@ -2095,8 +2104,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) if ((pp & PANEL_POWER_ON) == 0) intel_dp->panel_power_off_time = ktime_get_boottime(); - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } static void edp_panel_vdd_work(struct work_struct *__work) @@ -2209,11 +2217,8 @@ void intel_edp_panel_on(struct intel_dp *intel_dp) static void edp_panel_off(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_dp_to_dev(intel_dp); struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_ctrl_reg; @@ -2245,8 +2250,7 @@ static void edp_panel_off(struct intel_dp *intel_dp) wait_panel_off(intel_dp); /* We got a reference when we enabled the VDD. */ - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } void intel_edp_panel_off(struct intel_dp *intel_dp) @@ -2492,12 +2496,11 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, enum port port = dp_to_dig_port(intel_dp)->port; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -2533,7 +2536,7 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -3080,9 +3083,8 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) if (IS_GEN9_LP(dev_priv)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else if (INTEL_GEN(dev_priv) >= 9) { - if (dev_priv->vbt.edp.low_vswing && port == PORT_A) - return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; - return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + return intel_ddi_dp_voltage_max(encoder); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else if (IS_GEN7(dev_priv) && port == PORT_A) @@ -3922,19 +3924,112 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_ACK; - return test_result; + int status = 0; + int min_lane_count = 1; + int common_rates[DP_MAX_SUPPORTED_RATES] = {}; + int link_rate_index, test_link_rate; + uint8_t test_lane_count, test_link_bw; + /* (DP CTS 1.2) + * 4.3.1.11 + */ + /* Read the TEST_LANE_COUNT and TEST_LINK_RTAE fields (DP CTS 3.1.4) */ + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LANE_COUNT, + &test_lane_count); + + if (status <= 0) { + DRM_DEBUG_KMS("Lane count read failed\n"); + return DP_TEST_NAK; + } + test_lane_count &= DP_MAX_LANE_COUNT_MASK; + /* Validate the requested lane count */ + if (test_lane_count < min_lane_count || + test_lane_count > intel_dp->max_sink_lane_count) + return DP_TEST_NAK; + + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE, + &test_link_bw); + if (status <= 0) { + DRM_DEBUG_KMS("Link Rate read failed\n"); + return DP_TEST_NAK; + } + /* Validate the requested link rate */ + test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw); + link_rate_index = intel_dp_link_rate_index(intel_dp, + common_rates, + test_link_rate); + if (link_rate_index < 0) + return DP_TEST_NAK; + + intel_dp->compliance.test_lane_count = test_lane_count; + intel_dp->compliance.test_link_rate = test_link_rate; + + return DP_TEST_ACK; } static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_NAK; - return test_result; + uint8_t test_pattern; + uint16_t test_misc; + __be16 h_width, v_height; + int status = 0; + + /* Read the TEST_PATTERN (DP CTS 3.1.5) */ + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_PATTERN, + &test_pattern, 1); + if (status <= 0) { + DRM_DEBUG_KMS("Test pattern read failed\n"); + return DP_TEST_NAK; + } + if (test_pattern != DP_COLOR_RAMP) + return DP_TEST_NAK; + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_H_WIDTH_HI, + &h_width, 2); + if (status <= 0) { + DRM_DEBUG_KMS("H Width read failed\n"); + return DP_TEST_NAK; + } + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_V_HEIGHT_HI, + &v_height, 2); + if (status <= 0) { + DRM_DEBUG_KMS("V Height read failed\n"); + return DP_TEST_NAK; + } + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_MISC0, + &test_misc, 1); + if (status <= 0) { + DRM_DEBUG_KMS("TEST MISC read failed\n"); + return DP_TEST_NAK; + } + if ((test_misc & DP_TEST_COLOR_FORMAT_MASK) != DP_COLOR_FORMAT_RGB) + return DP_TEST_NAK; + if (test_misc & DP_TEST_DYNAMIC_RANGE_CEA) + return DP_TEST_NAK; + switch (test_misc & DP_TEST_BIT_DEPTH_MASK) { + case DP_TEST_BIT_DEPTH_6: + intel_dp->compliance.test_data.bpc = 6; + break; + case DP_TEST_BIT_DEPTH_8: + intel_dp->compliance.test_data.bpc = 8; + break; + default: + return DP_TEST_NAK; + } + + intel_dp->compliance.test_data.video_pattern = test_pattern; + intel_dp->compliance.test_data.hdisplay = be16_to_cpu(h_width); + intel_dp->compliance.test_data.vdisplay = be16_to_cpu(v_height); + /* Set test active flag here so userspace doesn't interrupt things */ + intel_dp->compliance.test_active = 1; + + return DP_TEST_ACK; } static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_NAK; + uint8_t test_result = DP_TEST_ACK; struct intel_connector *intel_connector = intel_dp->attached_connector; struct drm_connector *connector = &intel_connector->base; @@ -3969,7 +4064,7 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) DRM_DEBUG_KMS("Failed to write EDID checksum\n"); test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE; - intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_STANDARD; + intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_PREFERRED; } /* Set test active flag here so userspace doesn't interrupt things */ @@ -3987,45 +4082,42 @@ static uint8_t intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) static void intel_dp_handle_test_request(struct intel_dp *intel_dp) { uint8_t response = DP_TEST_NAK; - uint8_t rxdata = 0; - int status = 0; + uint8_t request = 0; + int status; - status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_REQUEST, &rxdata, 1); + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_REQUEST, &request); if (status <= 0) { DRM_DEBUG_KMS("Could not read test request from sink\n"); goto update_status; } - switch (rxdata) { + switch (request) { case DP_TEST_LINK_TRAINING: DRM_DEBUG_KMS("LINK_TRAINING test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_TRAINING; response = intel_dp_autotest_link_training(intel_dp); break; case DP_TEST_LINK_VIDEO_PATTERN: DRM_DEBUG_KMS("TEST_PATTERN test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_VIDEO_PATTERN; response = intel_dp_autotest_video_pattern(intel_dp); break; case DP_TEST_LINK_EDID_READ: DRM_DEBUG_KMS("EDID test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_EDID_READ; response = intel_dp_autotest_edid(intel_dp); break; case DP_TEST_LINK_PHY_TEST_PATTERN: DRM_DEBUG_KMS("PHY_PATTERN test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_PHY_TEST_PATTERN; response = intel_dp_autotest_phy_pattern(intel_dp); break; default: - DRM_DEBUG_KMS("Invalid test request '%02x'\n", rxdata); + DRM_DEBUG_KMS("Invalid test request '%02x'\n", request); break; } + if (response & DP_TEST_ACK) + intel_dp->compliance.test_type = request; + update_status: - status = drm_dp_dpcd_write(&intel_dp->aux, - DP_TEST_RESPONSE, - &response, 1); + status = drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_RESPONSE, response); if (status <= 0) DRM_DEBUG_KMS("Could not write test response to sink\n"); } @@ -4137,9 +4229,8 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) if (!intel_dp->lane_count) return; - /* if link training is requested we should perform it always */ - if ((intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) || - (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count))) { + /* Retrain if Channel EQ or CR not ok */ + if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n", intel_encoder->base.name); @@ -4164,6 +4255,7 @@ static bool intel_dp_short_pulse(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); + struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; u8 sink_irq_vector = 0; u8 old_sink_count = intel_dp->sink_count; bool ret; @@ -4197,7 +4289,7 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) sink_irq_vector); if (sink_irq_vector & DP_AUTOMATED_TEST_REQUEST) - DRM_DEBUG_DRIVER("Test request in short pulse not handled\n"); + intel_dp_handle_test_request(intel_dp); if (sink_irq_vector & (DP_CP_IRQ | DP_SINK_SPECIFIC_IRQ)) DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n"); } @@ -4205,6 +4297,11 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); intel_dp_check_link_status(intel_dp); drm_modeset_unlock(&dev->mode_config.connection_mutex); + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + DRM_DEBUG_KMS("Link Training Compliance Test requested\n"); + /* Send a Hotplug Uevent to userspace to start modeset */ + drm_kms_helper_hotplug_event(intel_encoder->base.dev); + } return true; } @@ -4213,9 +4310,13 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) static enum drm_connector_status intel_dp_detect_dpcd(struct intel_dp *intel_dp) { + struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp); uint8_t *dpcd = intel_dp->dpcd; uint8_t type; + if (lspcon->active) + lspcon_resume(lspcon); + if (!intel_dp_get_dpcd(intel_dp)) return connector_status_disconnected; @@ -4474,11 +4575,9 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = connector->dev; enum drm_connector_status status; - enum intel_display_power_domain power_domain; u8 sink_irq_vector = 0; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(to_i915(dev), power_domain); + intel_display_power_get(to_i915(dev), intel_dp->aux_power_domain); /* Can't disconnect eDP, but you can close the lid... */ if (is_edp(intel_dp)) @@ -4511,11 +4610,15 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) yesno(intel_dp_source_supports_hbr2(intel_dp)), yesno(drm_dp_tps3_supported(intel_dp->dpcd))); - /* Set the max lane count for sink */ - intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + if (intel_dp->reset_link_params) { + /* Set the max lane count for sink */ + intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + + /* Set the max link BW for sink */ + intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); - /* Set the max link BW for sink */ - intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); + intel_dp->reset_link_params = false; + } intel_dp_print_rates(intel_dp); @@ -4575,7 +4678,7 @@ out: if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); - intel_display_power_put(to_i915(dev), power_domain); + intel_display_power_put(to_i915(dev), intel_dp->aux_power_domain); return status; } @@ -4603,7 +4706,6 @@ intel_dp_force(struct drm_connector *connector) struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - enum intel_display_power_domain power_domain; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -4612,12 +4714,11 @@ intel_dp_force(struct drm_connector *connector) if (connector->status != connector_status_connected) return; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); intel_dp_set_edid(intel_dp); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); if (intel_encoder->type != INTEL_OUTPUT_EDP) intel_encoder->type = INTEL_OUTPUT_DP; @@ -4852,7 +4953,6 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; lockdep_assert_held(&dev_priv->pps_mutex); @@ -4866,8 +4966,7 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) * indefinitely. */ DRM_DEBUG_KMS("VDD left on by BIOS, adjusting state tracking\n"); - power_domain = intel_display_port_aux_power_domain(&intel_dig_port->base); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); edp_panel_vdd_schedule_off(intel_dp); } @@ -4897,6 +4996,8 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) if (lspcon->active) lspcon_resume(lspcon); + intel_dp->reset_link_params = true; + pps_lock(intel_dp); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) @@ -4939,10 +5040,8 @@ enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) { struct intel_dp *intel_dp = &intel_dig_port->dp; - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; enum irqreturn ret = IRQ_NONE; if (intel_dig_port->base.type != INTEL_OUTPUT_EDP && @@ -4966,12 +5065,12 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) long_hpd ? "long" : "short"); if (long_hpd) { + intel_dp->reset_link_params = true; intel_dp->detect_done = false; return IRQ_NONE; } - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); if (intel_dp->is_mst) { if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { @@ -4999,7 +5098,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) ret = IRQ_HANDLED; put_power: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); return ret; } @@ -5790,6 +5889,41 @@ out_vdd_off: return false; } +/* Set up the hotplug pin and aux power domain. */ +static void +intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) +{ + struct intel_encoder *encoder = &intel_dig_port->base; + struct intel_dp *intel_dp = &intel_dig_port->dp; + + switch (intel_dig_port->port) { + case PORT_A: + encoder->hpd_pin = HPD_PORT_A; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_A; + break; + case PORT_B: + encoder->hpd_pin = HPD_PORT_B; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_B; + break; + case PORT_C: + encoder->hpd_pin = HPD_PORT_C; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_C; + break; + case PORT_D: + encoder->hpd_pin = HPD_PORT_D; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; + break; + case PORT_E: + encoder->hpd_pin = HPD_PORT_E; + + /* FIXME: Check VBT for actual wiring of PORT E */ + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; + break; + default: + MISSING_CASE(intel_dig_port->port); + } +} + bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector) @@ -5807,6 +5941,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dig_port->max_lanes, port_name(port))) return false; + intel_dp->reset_link_params = true; intel_dp->pps_pipe = INVALID_PIPE; intel_dp->active_pipe = INVALID_PIPE; @@ -5863,6 +5998,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, connector->interlace_allowed = true; connector->doublescan_allowed = 0; + intel_dp_init_connector_port_info(intel_dig_port); + intel_dp_aux_init(intel_dp); INIT_DELAYED_WORK(&intel_dp->panel_vdd_work, @@ -5875,29 +6012,6 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, else intel_connector->get_hw_state = intel_connector_get_hw_state; - /* Set up the hotplug pin. */ - switch (port) { - case PORT_A: - intel_encoder->hpd_pin = HPD_PORT_A; - break; - case PORT_B: - intel_encoder->hpd_pin = HPD_PORT_B; - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - intel_encoder->hpd_pin = HPD_PORT_A; - break; - case PORT_C: - intel_encoder->hpd_pin = HPD_PORT_C; - break; - case PORT_D: - intel_encoder->hpd_pin = HPD_PORT_D; - break; - case PORT_E: - intel_encoder->hpd_pin = HPD_PORT_E; - break; - default: - BUG(); - } - /* init MST on ports that can support it */ if (HAS_DP_MST(dev_priv) && !is_edp(intel_dp) && (port == PORT_B || port == PORT_C || port == PORT_D)) @@ -5982,6 +6096,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_dig_port->max_lanes = 4; intel_encoder->type = INTEL_OUTPUT_DP; + intel_encoder->power_domain = intel_port_to_power_domain(port); if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) intel_encoder->crtc_mask = 1 << 2; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 38e3ca2f6f18..094cbdcbcd6d 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -47,6 +47,11 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = false; bpp = 24; + if (intel_dp->compliance.test_data.bpc) { + bpp = intel_dp->compliance.test_data.bpc * 3; + DRM_DEBUG_KMS("Setting pipe bpp to %d\n", + bpp); + } /* * for MST we always configure max link bw - the spec doesn't * seem to suggest we should do otherwise. @@ -55,7 +60,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->lane_count = lane_count; - pipe_config->pipe_bpp = 24; + pipe_config->pipe_bpp = bpp; pipe_config->port_clock = intel_dp_max_link_rate(intel_dp); state = pipe_config->base.state; @@ -87,7 +92,6 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = &intel_dig_port->dp; struct intel_connector *connector = to_intel_connector(old_conn_state->connector); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int ret; DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); @@ -98,10 +102,8 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, if (ret) { DRM_ERROR("failed to update payload %d\n", ret); } - if (old_crtc_state->has_audio) { + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); - } } static void intel_mst_post_disable_dp(struct intel_encoder *encoder, @@ -157,23 +159,9 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); - if (intel_dp->active_mst_links == 0) { - intel_ddi_clk_select(&intel_dig_port->base, - pipe_config->shared_dpll); - - intel_prepare_dp_ddi_buffers(&intel_dig_port->base); - intel_dp_set_link_params(intel_dp, - pipe_config->port_clock, - pipe_config->lane_count, - true); - - intel_ddi_init_dp_buf_reg(&intel_dig_port->base); - - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); - - intel_dp_start_link_train(intel_dp); - intel_dp_stop_link_train(intel_dp); - } + if (intel_dp->active_mst_links == 0) + intel_dig_port->base.pre_enable(&intel_dig_port->base, + pipe_config, NULL); ret = drm_dp_mst_allocate_vcpi(&intel_dp->mst_mgr, connector->port, @@ -214,10 +202,8 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder, ret = drm_dp_check_act_status(&intel_dp->mst_mgr); ret = drm_dp_update_payload_part2(&intel_dp->mst_mgr); - if (pipe_config->has_audio) { - intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + if (pipe_config->has_audio) intel_audio_codec_enable(encoder, pipe_config, conn_state); - } } static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, @@ -548,6 +534,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum DRM_MODE_ENCODER_DPMST, "DP-MST %c", pipe_name(pipe)); intel_encoder->type = INTEL_OUTPUT_DP_MST; + intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->port; intel_encoder->crtc_mask = 0x7; intel_encoder->cloneable = 0; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index e59e43a9f3a6..b4de632f1158 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -42,44 +42,6 @@ * commit phase. */ -struct intel_shared_dpll * -skl_find_link_pll(struct drm_i915_private *dev_priv, int clock) -{ - struct intel_shared_dpll *pll = NULL; - struct intel_dpll_hw_state dpll_hw_state; - enum intel_dpll_id i; - bool found = false; - - if (!skl_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state)) - return pll; - - for (i = DPLL_ID_SKL_DPLL1; i <= DPLL_ID_SKL_DPLL3; i++) { - pll = &dev_priv->shared_dplls[i]; - - /* Only want to check enabled timings first */ - if (pll->state.crtc_mask == 0) - continue; - - if (memcmp(&dpll_hw_state, &pll->state.hw_state, - sizeof(pll->state.hw_state)) == 0) { - found = true; - break; - } - } - - /* Ok no matching timings, maybe there's a free one? */ - for (i = DPLL_ID_SKL_DPLL1; - ((found == false) && (i <= DPLL_ID_SKL_DPLL3)); i++) { - pll = &dev_priv->shared_dplls[i]; - if (pll->state.crtc_mask == 0) { - pll->state.hw_state = dpll_hw_state; - break; - } - } - - return pll; -} - static void intel_atomic_duplicate_dpll_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll_state *shared_dpll) @@ -811,8 +773,8 @@ static struct intel_shared_dpll *hsw_ddi_hdmi_get_dpll(int clock, return pll; } -struct intel_shared_dpll *hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, - int clock) +static struct intel_shared_dpll * +hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, int clock) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_shared_dpll *pll; @@ -1360,8 +1322,9 @@ static bool skl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, } -bool skl_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state) +static bool +skl_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) { uint32_t ctrl1; @@ -1816,8 +1779,9 @@ static bool bxt_ddi_set_dpll_hw_state(int clock, return true; } -bool bxt_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state) +static bool +bxt_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) { struct bxt_clk_div clk_div = {0}; @@ -2016,7 +1980,7 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) dpll_mgr = &skl_pll_mgr; else if (IS_GEN9_LP(dev_priv)) dpll_mgr = &bxt_pll_mgr; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index af1497eb4f9c..f8d13a947c13 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -282,20 +282,4 @@ void intel_shared_dpll_init(struct drm_device *dev); void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state); -/* BXT dpll related functions */ -bool bxt_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state); - - -/* SKL dpll related functions */ -bool skl_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state); -struct intel_shared_dpll *skl_find_link_pll(struct drm_i915_private *dev_priv, - int clock); - - -/* HSW dpll related functions */ -struct intel_shared_dpll *hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, - int clock); - #endif /* _INTEL_DPLL_MGR_H_ */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 064582963ff6..0f766f83a31b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -242,6 +242,9 @@ struct intel_encoder { * be set correctly before calling this function. */ void (*get_config)(struct intel_encoder *, struct intel_crtc_state *pipe_config); + /* Returns a mask of power domains that need to be referenced as part + * of the hardware state readout code. */ + u64 (*get_power_domains)(struct intel_encoder *encoder); /* * Called during system suspend after all pending requests for the * encoder are flushed (for example for DP AUX transactions) and @@ -250,6 +253,7 @@ struct intel_encoder { void (*suspend)(struct intel_encoder *); int crtc_mask; enum hpd_pin hpd_pin; + enum intel_display_power_domain power_domain; /* for communication with audio component; protected by av_mutex */ const struct drm_connector *audio_connector; }; @@ -334,13 +338,20 @@ struct dpll { struct intel_atomic_state { struct drm_atomic_state base; - unsigned int cdclk; - - /* - * Calculated device cdclk, can be different from cdclk - * only when all crtc's are DPMS off. - */ - unsigned int dev_cdclk; + struct { + /* + * Logical state of cdclk (used for all scaling, watermark, + * etc. calculations and checks). This is computed as if all + * enabled crtcs were active. + */ + struct intel_cdclk_state logical; + + /* + * Actual state of cdclk, can be different from the logical + * state only when all crtc's are DPMS off. + */ + struct intel_cdclk_state actual; + } cdclk; bool dpll_set, modeset; @@ -357,9 +368,6 @@ struct intel_atomic_state { unsigned int active_crtcs; unsigned int min_pixclk[I915_MAX_PIPES]; - /* SKL/KBL Only */ - unsigned int cdclk_pll_vco; - struct intel_shared_dpll_state shared_dpll[I915_NUM_PLLS]; /* @@ -485,6 +493,24 @@ struct skl_pipe_wm { uint32_t linetime; }; +enum vlv_wm_level { + VLV_WM_LEVEL_PM2, + VLV_WM_LEVEL_PM5, + VLV_WM_LEVEL_DDR_DVFS, + NUM_VLV_WM_LEVELS, +}; + +struct vlv_wm_state { + struct vlv_pipe_wm wm[NUM_VLV_WM_LEVELS]; + struct vlv_sr_wm sr[NUM_VLV_WM_LEVELS]; + uint8_t num_levels; + bool cxsr; +}; + +struct vlv_fifo_state { + u16 plane[I915_MAX_PLANES]; +}; + struct intel_crtc_wm_state { union { struct { @@ -509,6 +535,17 @@ struct intel_crtc_wm_state { struct skl_pipe_wm optimal; struct skl_ddb_entry ddb; } skl; + + struct { + /* "raw" watermarks (not inverted) */ + struct vlv_pipe_wm raw[NUM_VLV_WM_LEVELS]; + /* intermediate watermarks (inverted) */ + struct vlv_wm_state intermediate; + /* optimal watermarks (inverted) */ + struct vlv_wm_state optimal; + /* display FIFO split */ + struct vlv_fifo_state fifo_state; + } vlv; }; /* @@ -539,12 +576,19 @@ struct intel_crtc_state { bool disable_cxsr; bool update_wm_pre, update_wm_post; /* watermarks are updated */ bool fb_changed; /* fb on any of the planes is changed */ + bool fifo_changed; /* FIFO split is changed */ /* Pipe source size (ie. panel fitter input size) * All planes will be positioned inside this space, * and get clipped at the edges. */ int pipe_src_w, pipe_src_h; + /* + * Pipe pixel rate, adjusted for + * panel fitter/pipe scaler downscaling. + */ + unsigned int pixel_rate; + /* Whether to set up the PCH/FDI. Note that we never allow sharing * between pch encoders and cpu encoders. */ bool has_pch_encoder; @@ -581,6 +625,14 @@ struct intel_crtc_state { */ bool dither; + /* + * Dither gets enabled for 18bpp which causes CRC mismatch errors for + * compliance video pattern tests. + * Disable dither only if it is a compliance test request for + * 18bpp. + */ + bool dither_force_disable; + /* Controls for the clock computation, to override various stages. */ bool clock_set; @@ -674,15 +726,9 @@ struct intel_crtc_state { /* Gamma mode programmed on the pipe */ uint32_t gamma_mode; -}; -struct vlv_wm_state { - struct vlv_pipe_wm wm[3]; - struct vlv_sr_wm sr[3]; - uint8_t num_active_planes; - uint8_t num_levels; - uint8_t level; - bool cxsr; + /* bitmask of visible planes (enum plane_id) */ + u8 active_planes; }; struct intel_crtc { @@ -698,7 +744,7 @@ struct intel_crtc { bool active; bool lowfreq_avail; u8 plane_ids_mask; - unsigned long enabled_power_domains; + unsigned long long enabled_power_domains; struct intel_overlay *overlay; struct intel_flip_work *flip_work; @@ -730,10 +776,8 @@ struct intel_crtc { /* watermarks currently being used */ union { struct intel_pipe_wm ilk; + struct vlv_wm_state vlv; } active; - - /* allow CxSR on this pipe */ - bool cxsr_allowed; } wm; int scanline_offset; @@ -747,27 +791,6 @@ struct intel_crtc { /* scalers available on this crtc */ int num_scalers; - - struct vlv_wm_state wm_state; -}; - -struct intel_plane_wm_parameters { - uint32_t horiz_pixels; - uint32_t vert_pixels; - /* - * For packed pixel formats: - * bytes_per_pixel - holds bytes per pixel - * For planar pixel formats: - * bytes_per_pixel - holds bytes per pixel for uv-plane - * y_bytes_per_pixel - holds bytes per pixel for y-plane - */ - uint8_t bytes_per_pixel; - uint8_t y_bytes_per_pixel; - bool enabled; - bool scaled; - u64 tiling; - unsigned int rotation; - uint16_t fifo_size; }; struct intel_plane { @@ -779,13 +802,6 @@ struct intel_plane { int max_downscale; uint32_t frontbuffer_bit; - /* Since we need to change the watermarks before/after - * enabling/disabling the planes, we need to store the parameters here - * as the other pieces of the struct may not reflect the values we want - * for the watermark calculations. Currently only Haswell uses this. - */ - struct intel_plane_wm_parameters wm; - /* * NOTE: Do not place new plane state fields here (e.g., when adding * new plane properties). New runtime state should now be placed in @@ -891,12 +907,17 @@ struct intel_dp_desc { struct intel_dp_compliance_data { unsigned long edid; + uint8_t video_pattern; + uint16_t hdisplay, vdisplay; + uint8_t bpc; }; struct intel_dp_compliance { unsigned long test_type; struct intel_dp_compliance_data test_data; bool test_active; + int test_link_rate; + u8 test_lane_count; }; struct intel_dp { @@ -911,6 +932,7 @@ struct intel_dp { bool has_audio; bool detect_done; bool channel_eq_status; + bool reset_link_params; enum hdmi_force_audio force_audio; bool limited_color_range; bool color_range_auto; @@ -928,6 +950,7 @@ struct intel_dp { /* sink or branch descriptor */ struct intel_dp_desc desc; struct drm_dp_aux aux; + enum intel_display_power_domain aux_power_domain; uint8_t train_set[4]; int panel_power_up_delay; int panel_power_down_delay; @@ -990,7 +1013,6 @@ struct intel_dp { struct intel_lspcon { bool active; enum drm_lspcon_mode mode; - bool desc_valid; }; struct intel_digital_port { @@ -1003,6 +1025,7 @@ struct intel_digital_port { enum irqreturn (*hpd_pulse)(struct intel_digital_port *, bool); bool release_cl2_override; uint8_t max_lanes; + enum intel_display_power_domain ddi_io_power_domain; }; struct intel_dp_mst_encoder { @@ -1097,7 +1120,19 @@ intel_attached_encoder(struct drm_connector *connector) static inline struct intel_digital_port * enc_to_dig_port(struct drm_encoder *encoder) { - return container_of(encoder, struct intel_digital_port, base.base); + struct intel_encoder *intel_encoder = to_intel_encoder(encoder); + + switch (intel_encoder->type) { + case INTEL_OUTPUT_UNKNOWN: + WARN_ON(!HAS_DDI(to_i915(encoder->dev))); + case INTEL_OUTPUT_DP: + case INTEL_OUTPUT_EDP: + case INTEL_OUTPUT_HDMI: + return container_of(encoder, struct intel_digital_port, + base.base); + default: + return NULL; + } } static inline struct intel_dp_mst_encoder * @@ -1185,18 +1220,19 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state); void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder); -void hsw_fdi_link_train(struct drm_crtc *crtc); +void hsw_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); -void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc); +void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state); void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder); -void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc); -void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc); +void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state); +void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state); bool intel_ddi_pll_select(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); -void intel_ddi_set_pipe_settings(struct drm_crtc *crtc); +void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state); void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, @@ -1209,11 +1245,12 @@ intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state); void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder); void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); -void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); +void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, + bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); -struct intel_shared_dpll *intel_ddi_get_link_dpll(struct intel_dp *intel_dp, - int clock); -unsigned int intel_fb_align_height(struct drm_device *dev, +u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); + +unsigned int intel_fb_align_height(struct drm_i915_private *dev_priv, unsigned int height, uint32_t pixel_format, uint64_t fb_format_modifier); @@ -1231,12 +1268,24 @@ void i915_audio_component_cleanup(struct drm_i915_private *dev_priv); void intel_audio_init(struct drm_i915_private *dev_priv); void intel_audio_deinit(struct drm_i915_private *dev_priv); +/* intel_cdclk.c */ +void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); +void intel_update_max_cdclk(struct drm_i915_private *dev_priv); +void intel_update_cdclk(struct drm_i915_private *dev_priv); +void intel_update_rawclk(struct drm_i915_private *dev_priv); +bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b); +void intel_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state); + /* intel_display.c */ enum transcoder intel_crtc_pch_transcoder(struct intel_crtc *crtc); -void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco); void intel_update_rawclk(struct drm_i915_private *dev_priv); +int vlv_get_hpll_vco(struct drm_i915_private *dev_priv); int vlv_get_cck_clock(struct drm_i915_private *dev_priv, const char *name, u32 reg, int ref_freq); +int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, + const char *name, u32 reg); void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv); void lpt_disable_iclkip(struct drm_i915_private *dev_priv); extern const struct drm_plane_funcs intel_plane_funcs; @@ -1311,9 +1360,8 @@ struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); void intel_unpin_fb_vma(struct i915_vma *vma); struct drm_framebuffer * -__intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj); +intel_framebuffer_create(struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe); void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe); void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe); @@ -1388,10 +1436,7 @@ int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); bool intel_crtc_active(struct intel_crtc *crtc); void hsw_enable_ips(struct intel_crtc *crtc); void hsw_disable_ips(struct intel_crtc *crtc); -enum intel_display_power_domain -intel_display_port_power_domain(struct intel_encoder *intel_encoder); -enum intel_display_power_domain -intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder); +enum intel_display_power_domain intel_port_to_power_domain(enum port port); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config); @@ -1664,6 +1709,7 @@ int intel_power_domains_init(struct drm_i915_private *); void intel_power_domains_fini(struct drm_i915_private *); void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume); void intel_power_domains_suspend(struct drm_i915_private *dev_priv); +void intel_power_domains_verify_state(struct drm_i915_private *dev_priv); void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume); void bxt_display_core_uninit(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable(struct drm_i915_private *dev_priv); @@ -1692,10 +1738,8 @@ static inline void assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) { assert_rpm_device_not_suspended(dev_priv); - /* FIXME: Needs to be converted back to WARN_ONCE, but currently causes - * too much noise. */ - if (!atomic_read(&dev_priv->pm.wakeref_count)) - DRM_DEBUG_DRIVER("RPM wakelock ref not held during HW access"); + WARN_ONCE(!atomic_read(&dev_priv->pm.wakeref_count), + "RPM wakelock ref not held during HW access"); } /** @@ -1783,6 +1827,7 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb /* out */); void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc, struct skl_pipe_wm *out); +void vlv_wm_sanitize(struct drm_i915_private *dev_priv); bool intel_can_enable_sagv(struct drm_atomic_state *state); int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); @@ -1791,7 +1836,6 @@ bool skl_wm_level_equals(const struct skl_wm_level *l1, bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore); -uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config); bool ilk_disable_lp_wm(struct drm_device *dev); int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6); static inline int intel_enable_rc6(void) @@ -1865,9 +1909,9 @@ intel_atomic_get_existing_plane_state(struct drm_atomic_state *state, return to_intel_plane_state(plane_state); } -int intel_atomic_setup_scalers(struct drm_device *dev, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state); +int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state); /* intel_atomic_plane.c */ struct intel_plane_state *intel_create_plane_state(struct drm_plane *plane); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 16732e7bc08e..323fc097c3ee 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -80,7 +80,7 @@ enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt) } } -static void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port) +void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port) { struct drm_encoder *encoder = &intel_dsi->base.base; struct drm_device *dev = encoder->dev; @@ -357,41 +357,132 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder, return true; } -static void bxt_dsi_device_ready(struct intel_encoder *encoder) +static void glk_dsi_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; - u32 val; + u32 tmp, val; - DRM_DEBUG_KMS("\n"); + /* Set the MIPI mode + * If MIPI_Mode is off, then writing to LP_Wake bit is not reflecting. + * Power ON MIPI IO first and then write into IO reset and LP wake bits + */ + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(MIPI_CTRL(port)); + I915_WRITE(MIPI_CTRL(port), tmp | GLK_MIPIIO_ENABLE); + } + + /* Put the IO into reset */ + tmp = I915_READ(MIPI_CTRL(PORT_A)); + tmp &= ~GLK_MIPIIO_RESET_RELEASED; + I915_WRITE(MIPI_CTRL(PORT_A), tmp); - /* Exit Low power state in 4 steps*/ + /* Program LP Wake */ for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(MIPI_CTRL(port)); + tmp |= GLK_LP_WAKE; + I915_WRITE(MIPI_CTRL(port), tmp); + } - /* 1. Enable MIPI PHY transparent latch */ - val = I915_READ(BXT_MIPI_PORT_CTRL(port)); - I915_WRITE(BXT_MIPI_PORT_CTRL(port), val | LP_OUTPUT_HOLD); - usleep_range(2000, 2500); + /* Wait for Pwr ACK */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_MIPIIO_PORT_POWERED, + GLK_MIPIIO_PORT_POWERED, 20)) + DRM_ERROR("MIPIO port is powergated\n"); + } + + /* Wait for MIPI PHY status bit to set */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_PHY_STATUS_PORT_READY, + GLK_PHY_STATUS_PORT_READY, 20)) + DRM_ERROR("PHY is not ON\n"); + } + + /* Get IO out of reset */ + tmp = I915_READ(MIPI_CTRL(PORT_A)); + I915_WRITE(MIPI_CTRL(PORT_A), tmp | GLK_MIPIIO_RESET_RELEASED); - /* 2. Enter ULPS */ + /* Get IO out of Low power state*/ + for_each_dsi_port(port, intel_dsi->ports) { + if (!(I915_READ(MIPI_DEVICE_READY(port)) & DEVICE_READY)) { + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= DEVICE_READY; + I915_WRITE(MIPI_DEVICE_READY(port), val); + usleep_range(10, 15); + } + + /* Enter ULPS */ val = I915_READ(MIPI_DEVICE_READY(port)); val &= ~ULPS_STATE_MASK; val |= (ULPS_STATE_ENTER | DEVICE_READY); I915_WRITE(MIPI_DEVICE_READY(port), val); - /* at least 2us - relaxed for hrtimer subsystem optimization */ - usleep_range(10, 50); - /* 3. Exit ULPS */ + /* Wait for ULPS Not active */ + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, + GLK_ULPS_NOT_ACTIVE, 20)) + DRM_ERROR("ULPS is still active\n"); + + /* Exit ULPS */ val = I915_READ(MIPI_DEVICE_READY(port)); val &= ~ULPS_STATE_MASK; val |= (ULPS_STATE_EXIT | DEVICE_READY); I915_WRITE(MIPI_DEVICE_READY(port), val); - usleep_range(1000, 1500); - /* Clear ULPS and set device ready */ + /* Enter Normal Mode */ + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_NORMAL_OPERATION | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); + + tmp = I915_READ(MIPI_CTRL(port)); + tmp &= ~GLK_LP_WAKE; + I915_WRITE(MIPI_CTRL(port), tmp); + } + + /* Wait for Stop state */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_DATA_LANE_STOP_STATE, + GLK_DATA_LANE_STOP_STATE, 20)) + DRM_ERROR("Date lane not in STOP state\n"); + } + + /* Wait for AFE LATCH */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + BXT_MIPI_PORT_CTRL(port), AFE_LATCHOUT, + AFE_LATCHOUT, 20)) + DRM_ERROR("D-PHY not entering LP-11 state\n"); + } +} + +static void bxt_dsi_device_ready(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 val; + + DRM_DEBUG_KMS("\n"); + + /* Enable MIPI PHY transparent latch */ + for_each_dsi_port(port, intel_dsi->ports) { + val = I915_READ(BXT_MIPI_PORT_CTRL(port)); + I915_WRITE(BXT_MIPI_PORT_CTRL(port), val | LP_OUTPUT_HOLD); + usleep_range(2000, 2500); + } + + /* Clear ULPS and set device ready */ + for_each_dsi_port(port, intel_dsi->ports) { val = I915_READ(MIPI_DEVICE_READY(port)); val &= ~ULPS_STATE_MASK; + I915_WRITE(MIPI_DEVICE_READY(port), val); + usleep_range(2000, 2500); val |= DEVICE_READY; I915_WRITE(MIPI_DEVICE_READY(port), val); } @@ -442,8 +533,121 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder) if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_dsi_device_ready(encoder); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_BROXTON(dev_priv)) bxt_dsi_device_ready(encoder); + else if (IS_GEMINILAKE(dev_priv)) + glk_dsi_device_ready(encoder); +} + +static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 val; + + /* Enter ULPS */ + for_each_dsi_port(port, intel_dsi->ports) { + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_ENTER | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); + } + + /* Wait for MIPI PHY status bit to unset */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), + GLK_PHY_STATUS_PORT_READY, 0, 20)) + DRM_ERROR("PHY is not turning OFF\n"); + } + + /* Wait for Pwr ACK bit to unset */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), + GLK_MIPIIO_PORT_POWERED, 0, 20)) + DRM_ERROR("MIPI IO Port is not powergated\n"); + } +} + +static void glk_dsi_disable_mipi_io(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 tmp; + + /* Put the IO into reset */ + tmp = I915_READ(MIPI_CTRL(PORT_A)); + tmp &= ~GLK_MIPIIO_RESET_RELEASED; + I915_WRITE(MIPI_CTRL(PORT_A), tmp); + + /* Wait for MIPI PHY status bit to unset */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), + GLK_PHY_STATUS_PORT_READY, 0, 20)) + DRM_ERROR("PHY is not turning OFF\n"); + } + + /* Clear MIPI mode */ + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(MIPI_CTRL(port)); + tmp &= ~GLK_MIPIIO_ENABLE; + I915_WRITE(MIPI_CTRL(port), tmp); + } +} + +static void glk_dsi_clear_device_ready(struct intel_encoder *encoder) +{ + glk_dsi_enter_low_power_mode(encoder); + glk_dsi_disable_mipi_io(encoder); +} + +static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + + DRM_DEBUG_KMS("\n"); + for_each_dsi_port(port, intel_dsi->ports) { + /* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */ + i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? + BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A); + u32 val; + + I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | + ULPS_STATE_ENTER); + usleep_range(2000, 2500); + + I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | + ULPS_STATE_EXIT); + usleep_range(2000, 2500); + + I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | + ULPS_STATE_ENTER); + usleep_range(2000, 2500); + + /* + * On VLV/CHV, wait till Clock lanes are in LP-00 state for MIPI + * Port A only. MIPI Port C has no similar bit for checking. + */ + if ((IS_GEN9_LP(dev_priv) || port == PORT_A) && + intel_wait_for_register(dev_priv, + port_ctrl, AFE_LATCHOUT, 0, + 30)) + DRM_ERROR("DSI LP not going Low\n"); + + /* Disable MIPI PHY transparent latch */ + val = I915_READ(port_ctrl); + I915_WRITE(port_ctrl, val & ~LP_OUTPUT_HOLD); + usleep_range(1000, 1500); + + I915_WRITE(MIPI_DEVICE_READY(port), 0x00); + usleep_range(2000, 2500); + } } static void intel_dsi_port_enable(struct intel_encoder *encoder) @@ -456,12 +660,21 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder) if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) { u32 temp; - - temp = I915_READ(VLV_CHICKEN_3); - temp &= ~PIXEL_OVERLAP_CNT_MASK | + if (IS_GEN9_LP(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) { + temp = I915_READ(MIPI_CTRL(port)); + temp &= ~BXT_PIXEL_OVERLAP_CNT_MASK | + intel_dsi->pixel_overlap << + BXT_PIXEL_OVERLAP_CNT_SHIFT; + I915_WRITE(MIPI_CTRL(port), temp); + } + } else { + temp = I915_READ(VLV_CHICKEN_3); + temp &= ~PIXEL_OVERLAP_CNT_MASK | intel_dsi->pixel_overlap << PIXEL_OVERLAP_CNT_SHIFT; - I915_WRITE(VLV_CHICKEN_3, temp); + I915_WRITE(VLV_CHICKEN_3, temp); + } } for_each_dsi_port(port, intel_dsi->ports) { @@ -509,37 +722,57 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) } } -static void intel_dsi_enable(struct intel_encoder *encoder) -{ - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; - - DRM_DEBUG_KMS("\n"); - - if (is_cmd_mode(intel_dsi)) { - for_each_dsi_port(port, intel_dsi->ports) - I915_WRITE(MIPI_MAX_RETURN_PKT_SIZE(port), 8 * 4); - } else { - msleep(20); /* XXX */ - for_each_dsi_port(port, intel_dsi->ports) - dpi_send_cmd(intel_dsi, TURN_ON, false, port); - msleep(100); - - drm_panel_enable(intel_dsi->panel); +static void intel_dsi_prepare(struct intel_encoder *intel_encoder, + struct intel_crtc_state *pipe_config); +static void intel_dsi_unprepare(struct intel_encoder *encoder); - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); +static void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec) +{ + struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); - intel_dsi_port_enable(encoder); - } + /* For v3 VBTs in vid-mode the delays are part of the VBT sequences */ + if (is_vid_mode(intel_dsi) && dev_priv->vbt.dsi.seq_version >= 3) + return; - intel_panel_enable_backlight(intel_dsi->attached_connector); + msleep(msec); } -static void intel_dsi_prepare(struct intel_encoder *intel_encoder, - struct intel_crtc_state *pipe_config); +/* + * Panel enable/disable sequences from the VBT spec. + * + * Note the spec has AssertReset / DeassertReset swapped from their + * usual naming. We use the normal names to avoid confusion (so below + * they are swapped compared to the spec). + * + * Steps starting with MIPI refer to VBT sequences, note that for v2 + * VBTs several steps which have a VBT in v2 are expected to be handled + * directly by the driver, by directly driving gpios for example. + * + * v2 video mode seq v3 video mode seq command mode seq + * - power on - MIPIPanelPowerOn - power on + * - wait t1+t2 - wait t1+t2 + * - MIPIDeassertResetPin - MIPIDeassertResetPin - MIPIDeassertResetPin + * - io lines to lp-11 - io lines to lp-11 - io lines to lp-11 + * - MIPISendInitialDcsCmds - MIPISendInitialDcsCmds - MIPISendInitialDcsCmds + * - MIPITearOn + * - MIPIDisplayOn + * - turn on DPI - turn on DPI - set pipe to dsr mode + * - MIPIDisplayOn - MIPIDisplayOn + * - wait t5 - wait t5 + * - backlight on - MIPIBacklightOn - backlight on + * ... ... ... issue mem cmds ... + * - backlight off - MIPIBacklightOff - backlight off + * - wait t6 - wait t6 + * - MIPIDisplayOff + * - turn off DPI - turn off DPI - disable pipe dsr mode + * - MIPITearOff + * - MIPIDisplayOff - MIPIDisplayOff + * - io lines to lp-00 - io lines to lp-00 - io lines to lp-00 + * - MIPIAssertResetPin - MIPIAssertResetPin - MIPIAssertResetPin + * - wait t3 - wait t3 + * - power off - MIPIPanelPowerOff - power off + * - wait t4 - wait t4 + */ static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, @@ -548,6 +781,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; + u32 val; DRM_DEBUG_KMS("\n"); @@ -558,13 +792,16 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, intel_disable_dsi_pll(encoder); intel_enable_dsi_pll(encoder, pipe_config); - intel_dsi_prepare(encoder, pipe_config); - - /* Panel Enable over CRC PMIC */ - if (intel_dsi->gpio_panel) - gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); + if (IS_BROXTON(dev_priv)) { + /* Add MIPI IO reset programming for modeset */ + val = I915_READ(BXT_P_CR_GT_DISP_PWRON); + I915_WRITE(BXT_P_CR_GT_DISP_PWRON, + val | MIPIO_RST_CTRL); - msleep(intel_dsi->panel_on_delay); + /* Power up DSI regulator */ + I915_WRITE(BXT_P_DSI_REGULATOR_CFG, STAP_SELECT); + I915_WRITE(BXT_P_DSI_REGULATOR_TX_CTRL, 0); + } if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 val; @@ -575,17 +812,43 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, I915_WRITE(DSPCLK_GATE_D, val); } - /* put device in ready state */ - intel_dsi_device_ready(encoder); + intel_dsi_prepare(encoder, pipe_config); + + /* Power on, try both CRC pmic gpio and VBT */ + if (intel_dsi->gpio_panel) + gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); + intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); - drm_panel_prepare(intel_dsi->panel); + /* Deassert reset */ + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); + /* Put device in ready state (LP-11) */ + intel_dsi_device_ready(encoder); + + /* Send initialization commands in LP mode */ + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); /* Enable port in pre-enable phase itself because as per hw team * recommendation, port should be enabled befor plane & pipe */ - intel_dsi_enable(encoder); + if (is_cmd_mode(intel_dsi)) { + for_each_dsi_port(port, intel_dsi->ports) + I915_WRITE(MIPI_MAX_RETURN_PKT_SIZE(port), 8 * 4); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_TEAR_ON); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); + } else { + msleep(20); /* XXX */ + for_each_dsi_port(port, intel_dsi->ports) + dpi_send_cmd(intel_dsi, TURN_ON, false, port); + intel_dsi_msleep(intel_dsi, 100); + + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); + + intel_dsi_port_enable(encoder); + } + + intel_panel_enable_backlight(intel_dsi->attached_connector); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); } static void intel_dsi_enable_nop(struct intel_encoder *encoder, @@ -604,13 +867,30 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state) { + struct drm_device *dev = encoder->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; DRM_DEBUG_KMS("\n"); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); intel_panel_disable_backlight(intel_dsi->attached_connector); + /* + * Disable Device ready before the port shutdown in order + * to avoid split screen + */ + if (IS_BROXTON(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) + I915_WRITE(MIPI_DEVICE_READY(port), 0); + } + + /* + * According to the spec we should send SHUTDOWN before + * MIPI_SEQ_DISPLAY_OFF only for v3+ VBTs, but field testing + * has shown that the v3 sequence works for v2 VBTs too + */ if (is_vid_mode(intel_dsi)) { /* Send Shutdown command to the panel in LP mode */ for_each_dsi_port(port, intel_dsi->ports) @@ -619,13 +899,25 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, } } -static void intel_dsi_disable(struct intel_encoder *encoder) +static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || + IS_BROXTON(dev_priv)) + vlv_dsi_clear_device_ready(encoder); + else if (IS_GEMINILAKE(dev_priv)) + glk_dsi_clear_device_ready(encoder); +} + +static void intel_dsi_post_disable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; - u32 temp; + u32 val; DRM_DEBUG_KMS("\n"); @@ -634,85 +926,32 @@ static void intel_dsi_disable(struct intel_encoder *encoder) wait_for_dsi_fifo_empty(intel_dsi, port); intel_dsi_port_disable(encoder); - msleep(2); - } - - for_each_dsi_port(port, intel_dsi->ports) { - /* Panel commands can be sent when clock is in LP11 */ - I915_WRITE(MIPI_DEVICE_READY(port), 0x0); - - intel_dsi_reset_clocks(encoder, port); - I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); - - temp = I915_READ(MIPI_DSI_FUNC_PRG(port)); - temp &= ~VID_MODE_FORMAT_MASK; - I915_WRITE(MIPI_DSI_FUNC_PRG(port), temp); - - I915_WRITE(MIPI_DEVICE_READY(port), 0x1); + usleep_range(2000, 5000); } - /* if disable packets are sent before sending shutdown packet then in - * some next enable sequence send turn on packet error is observed */ - drm_panel_disable(intel_dsi->panel); - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); -} - -static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; - - DRM_DEBUG_KMS("\n"); - for_each_dsi_port(port, intel_dsi->ports) { - /* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */ - i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? - BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A); - u32 val; + intel_dsi_unprepare(encoder); - I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | - ULPS_STATE_ENTER); - usleep_range(2000, 2500); - - I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | - ULPS_STATE_EXIT); - usleep_range(2000, 2500); - - I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | - ULPS_STATE_ENTER); - usleep_range(2000, 2500); + /* + * if disable packets are sent before sending shutdown packet then in + * some next enable sequence send turn on packet error is observed + */ + if (is_cmd_mode(intel_dsi)) + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_TEAR_OFF); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF); - /* Wait till Clock lanes are in LP-00 state for MIPI Port A - * only. MIPI Port C has no similar bit for checking - */ - if (intel_wait_for_register(dev_priv, - port_ctrl, AFE_LATCHOUT, 0, - 30)) - DRM_ERROR("DSI LP not going Low\n"); + /* Transition to LP-00 */ + intel_dsi_clear_device_ready(encoder); - /* Disable MIPI PHY transparent latch */ - val = I915_READ(port_ctrl); - I915_WRITE(port_ctrl, val & ~LP_OUTPUT_HOLD); - usleep_range(1000, 1500); + if (IS_BROXTON(dev_priv)) { + /* Power down DSI regulator to save power */ + I915_WRITE(BXT_P_DSI_REGULATOR_CFG, STAP_SELECT); + I915_WRITE(BXT_P_DSI_REGULATOR_TX_CTRL, HS_IO_CTRL_SELECT); - I915_WRITE(MIPI_DEVICE_READY(port), 0x00); - usleep_range(2000, 2500); + /* Add MIPI IO reset programming for modeset */ + val = I915_READ(BXT_P_CR_GT_DISP_PWRON); + I915_WRITE(BXT_P_CR_GT_DISP_PWRON, + val & ~MIPIO_RST_CTRL); } -} - -static void intel_dsi_post_disable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - - DRM_DEBUG_KMS("\n"); - - intel_dsi_disable(encoder); - - intel_dsi_clear_device_ready(encoder); intel_disable_dsi_pll(encoder); @@ -724,11 +963,12 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, I915_WRITE(DSPCLK_GATE_D, val); } - drm_panel_unprepare(intel_dsi->panel); - - msleep(intel_dsi->panel_off_delay); + /* Assert reset */ + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); - /* Panel Disable over CRC PMIC */ + /* Power off, try both CRC pmic gpio and VBT */ + intel_dsi_msleep(intel_dsi, intel_dsi->panel_off_delay); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); if (intel_dsi->gpio_panel) gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0); @@ -736,7 +976,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, * FIXME As we do with eDP, just make a note of the time here * and perform the wait before the next panel power on. */ - msleep(intel_dsi->panel_pwr_cycle_delay); + intel_dsi_msleep(intel_dsi, intel_dsi->panel_pwr_cycle_delay); } static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, @@ -744,14 +984,13 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum intel_display_power_domain power_domain; enum port port; bool active = false; DRM_DEBUG_KMS("\n"); - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; /* @@ -807,7 +1046,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, } out_put_power: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return active; } @@ -1279,6 +1518,14 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, */ I915_WRITE(MIPI_LP_BYTECLK(port), intel_dsi->lp_byte_clk); + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE(MIPI_TLPX_TIME_COUNT(port), + intel_dsi->lp_byte_clk); + /* Shadow of DPHY reg */ + I915_WRITE(MIPI_CLK_LANE_TIMING(port), + intel_dsi->dphy_reg); + } + /* the bw essential for transmitting 16 long packets containing * 252 bytes meant for dcs write memory command is programmed in * this register in terms of byte clocks. based on dsi transfer @@ -1302,6 +1549,30 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, } } +static void intel_dsi_unprepare(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 val; + + if (!IS_GEMINILAKE(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) { + /* Panel commands can be sent when clock is in LP11 */ + I915_WRITE(MIPI_DEVICE_READY(port), 0x0); + + intel_dsi_reset_clocks(encoder, port); + I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); + + val = I915_READ(MIPI_DSI_FUNC_PRG(port)); + val &= ~VID_MODE_FORMAT_MASK; + I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); + + I915_WRITE(MIPI_DEVICE_READY(port), 0x1); + } + } +} + static int intel_dsi_get_modes(struct drm_connector *connector) { struct intel_connector *intel_connector = to_intel_connector(connector); @@ -1485,6 +1756,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) intel_connector->get_hw_state = intel_connector_get_hw_state; intel_encoder->port = port; + /* * On BYT/CHV, pipe A maps to MIPI DSI port A, pipe B maps to MIPI DSI * port C. BXT isn't limited like this. @@ -1560,7 +1832,8 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) * In case of BYT with CRC PMIC, we need to use GPIO for * Panel control. */ - if (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) { + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC)) { intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", GPIOD_OUT_HIGH); @@ -1571,6 +1844,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) } intel_encoder->type = INTEL_OUTPUT_DSI; + intel_encoder->power_domain = POWER_DOMAIN_PORT_DSI; intel_encoder->cloneable = 0; drm_connector_init(dev, connector, &intel_dsi_connector_funcs, DRM_MODE_CONNECTOR_DSI); diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index 5967ea6d6045..548649158abd 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -130,6 +130,11 @@ static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder) return container_of(encoder, struct intel_dsi, base.base); } +void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port); + +void intel_dsi_exec_vbt_sequence(struct intel_dsi *intel_dsi, + enum mipi_seq seq_id); + bool intel_dsi_pll_is_enabled(struct drm_i915_private *dev_priv); int intel_compute_dsi_pll(struct intel_encoder *encoder, struct intel_crtc_state *config); diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 8f683b8b1816..ab922b6eb36a 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -192,6 +192,8 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, break; } + wait_for_dsi_fifo_empty(intel_dsi, port); + out: data += len; @@ -424,10 +426,9 @@ static const char *sequence_name(enum mipi_seq seq_id) return "(unknown)"; } -static void generic_exec_sequence(struct drm_panel *panel, enum mipi_seq seq_id) +void intel_dsi_exec_vbt_sequence(struct intel_dsi *intel_dsi, + enum mipi_seq seq_id) { - struct vbt_panel *vbt_panel = to_vbt_panel(panel); - struct intel_dsi *intel_dsi = vbt_panel->intel_dsi; struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); const u8 *data; fn_mipi_elem_exec mipi_elem_exec; @@ -491,40 +492,6 @@ static void generic_exec_sequence(struct drm_panel *panel, enum mipi_seq seq_id) } } -static int vbt_panel_prepare(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_ASSERT_RESET); - generic_exec_sequence(panel, MIPI_SEQ_POWER_ON); - generic_exec_sequence(panel, MIPI_SEQ_DEASSERT_RESET); - generic_exec_sequence(panel, MIPI_SEQ_INIT_OTP); - - return 0; -} - -static int vbt_panel_unprepare(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_ASSERT_RESET); - generic_exec_sequence(panel, MIPI_SEQ_POWER_OFF); - - return 0; -} - -static int vbt_panel_enable(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_DISPLAY_ON); - generic_exec_sequence(panel, MIPI_SEQ_BACKLIGHT_ON); - - return 0; -} - -static int vbt_panel_disable(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_BACKLIGHT_OFF); - generic_exec_sequence(panel, MIPI_SEQ_DISPLAY_OFF); - - return 0; -} - static int vbt_panel_get_modes(struct drm_panel *panel) { struct vbt_panel *vbt_panel = to_vbt_panel(panel); @@ -548,10 +515,6 @@ static int vbt_panel_get_modes(struct drm_panel *panel) } static const struct drm_panel_funcs vbt_panel_funcs = { - .disable = vbt_panel_disable, - .unprepare = vbt_panel_unprepare, - .prepare = vbt_panel_prepare, - .enable = vbt_panel_enable, .get_modes = vbt_panel_get_modes, }; @@ -571,6 +534,7 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) u32 tclk_prepare_clkzero, ths_prepare_hszero; u32 lp_to_hs_switch, hs_to_lp_switch; u32 pclk, computed_ddr; + u32 mul; u16 burst_mode_ratio; enum port port; @@ -674,11 +638,6 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) break; } - /* - * ui(s) = 1/f [f in hz] - * ui(ns) = 10^9 / (f*10^6) [f in Mhz] -> 10^3/f(Mhz) - */ - /* in Kbps */ ui_num = NS_KHZ_RATIO; ui_den = bitrate; @@ -692,21 +651,26 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) */ intel_dsi->lp_byte_clk = DIV_ROUND_UP(tlpx_ns * ui_den, 8 * ui_num); - /* count values in UI = (ns value) * (bitrate / (2 * 10^6)) - * - * Since txddrclkhs_i is 2xUI, all the count values programmed in - * DPHY param register are divided by 2 + /* DDR clock period = 2 * UI + * UI(sec) = 1/(bitrate * 10^3) (bitrate is in KHZ) + * UI(nsec) = 10^6 / bitrate + * DDR clock period (nsec) = 2 * UI = (2 * 10^6)/ bitrate + * DDR clock count = ns_value / DDR clock period * - * prepare count + * For GEMINILAKE dphy_param_reg will be programmed in terms of + * HS byte clock count for other platform in HS ddr clock count */ + mul = IS_GEMINILAKE(dev_priv) ? 8 : 2; ths_prepare_ns = max(mipi_config->ths_prepare, mipi_config->tclk_prepare); - prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * 2); + + /* prepare count */ + prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * mul); /* exit zero count */ exit_zero_cnt = DIV_ROUND_UP( (ths_prepare_hszero - ths_prepare_ns) * ui_den, - ui_num * 2 + ui_num * mul ); /* @@ -720,12 +684,12 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) /* clk zero count */ clk_zero_cnt = DIV_ROUND_UP( - (tclk_prepare_clkzero - ths_prepare_ns) - * ui_den, 2 * ui_num); + (tclk_prepare_clkzero - ths_prepare_ns) + * ui_den, ui_num * mul); /* trail count */ tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail); - trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, 2 * ui_num); + trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, ui_num * mul); if (prepare_cnt > PREPARE_CNT_MAX || exit_zero_cnt > EXIT_ZERO_CNT_MAX || @@ -801,6 +765,19 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) 8); intel_dsi->clk_hs_to_lp_count += extra_byte_count; + DRM_DEBUG_KMS("Pclk %d\n", intel_dsi->pclk); + DRM_DEBUG_KMS("Pixel overlap %d\n", intel_dsi->pixel_overlap); + DRM_DEBUG_KMS("Lane count %d\n", intel_dsi->lane_count); + DRM_DEBUG_KMS("DPHY param reg 0x%x\n", intel_dsi->dphy_reg); + DRM_DEBUG_KMS("Video mode format %s\n", + intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE ? + "non-burst with sync pulse" : + intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS ? + "non-burst with sync events" : + intel_dsi->video_mode_format == VIDEO_MODE_BURST ? + "burst" : "<unknown>"); + DRM_DEBUG_KMS("Burst mode ratio %d\n", intel_dsi->burst_mode_ratio); + DRM_DEBUG_KMS("Reset timer %d\n", intel_dsi->rst_timer_val); DRM_DEBUG_KMS("Eot %s\n", enableddisabled(intel_dsi->eotp_pkt)); DRM_DEBUG_KMS("Clockstop %s\n", enableddisabled(!intel_dsi->clock_stop)); DRM_DEBUG_KMS("Mode %s\n", intel_dsi->operation_mode ? "command" : "video"); diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 61440e5c2563..2ff2ee7f3b78 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -206,17 +206,24 @@ static bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv) return false; /* - * Both dividers must be programmed with valid values even if only one - * of the PLL is used, see BSpec/Broxton Clocks. Check this here for + * Dividers must be programmed with valid values. As per BSEPC, for + * GEMINLAKE only PORT A divider values are checked while for BXT + * both divider values are validated. Check this here for * paranoia, since BIOS is known to misconfigure PLLs in this way at * times, and since accessing DSI registers with invalid dividers * causes a system hang. */ val = I915_READ(BXT_DSI_PLL_CTL); - if (!(val & BXT_DSIA_16X_MASK) || !(val & BXT_DSIC_16X_MASK)) { - DRM_DEBUG_DRIVER("PLL is enabled with invalid divider settings (%08x)\n", - val); - enabled = false; + if (IS_GEMINILAKE(dev_priv)) { + if (!(val & BXT_DSIA_16X_MASK)) { + DRM_DEBUG_DRIVER("Invalid PLL divider (%08x)\n", val); + enabled = false; + } + } else { + if (!(val & BXT_DSIA_16X_MASK) || !(val & BXT_DSIC_16X_MASK)) { + DRM_DEBUG_DRIVER("Invalid PLL divider (%08x)\n", val); + enabled = false; + } } return enabled; @@ -372,6 +379,53 @@ static void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) ESCAPE_CLOCK_DIVIDER_SHIFT); } +static void glk_dsi_program_esc_clock(struct drm_device *dev, + const struct intel_crtc_state *config) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + u32 dsi_rate = 0; + u32 pll_ratio = 0; + u32 ddr_clk = 0; + u32 div1_value = 0; + u32 div2_value = 0; + u32 txesc1_div = 0; + u32 txesc2_div = 0; + + pll_ratio = config->dsi_pll.ctrl & BXT_DSI_PLL_RATIO_MASK; + + dsi_rate = (BXT_REF_CLOCK_KHZ * pll_ratio) / 2; + + ddr_clk = dsi_rate / 2; + + /* Variable divider value */ + div1_value = DIV_ROUND_CLOSEST(ddr_clk, 20000); + + /* Calculate TXESC1 divider */ + if (div1_value <= 10) + txesc1_div = div1_value; + else if ((div1_value > 10) && (div1_value <= 20)) + txesc1_div = DIV_ROUND_UP(div1_value, 2); + else if ((div1_value > 20) && (div1_value <= 30)) + txesc1_div = DIV_ROUND_UP(div1_value, 4); + else if ((div1_value > 30) && (div1_value <= 40)) + txesc1_div = DIV_ROUND_UP(div1_value, 6); + else if ((div1_value > 40) && (div1_value <= 50)) + txesc1_div = DIV_ROUND_UP(div1_value, 8); + else + txesc1_div = 10; + + /* Calculate TXESC2 divider */ + div2_value = DIV_ROUND_UP(div1_value, txesc1_div); + + if (div2_value < 10) + txesc2_div = div2_value; + else + txesc2_div = 10; + + I915_WRITE(MIPIO_TXESC_CLK_DIV1, txesc1_div & GLK_TX_ESC_CLK_DIV1_MASK); + I915_WRITE(MIPIO_TXESC_CLK_DIV2, txesc2_div & GLK_TX_ESC_CLK_DIV2_MASK); +} + /* Program BXT Mipi clocks and dividers */ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, const struct intel_crtc_state *config) @@ -416,11 +470,7 @@ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, rx_div_lower = rx_div & RX_DIVIDER_BIT_1_2; rx_div_upper = (rx_div & RX_DIVIDER_BIT_3_4) >> 2; - /* As per bpsec program the 8/3X clock divider to the below value */ - if (dev_priv->vbt.dsi.config->is_cmd_mode) - mipi_8by3_divider = 0x2; - else - mipi_8by3_divider = 0x3; + mipi_8by3_divider = 0x2; tmp |= BXT_MIPI_8X_BY3_DIVIDER(port, mipi_8by3_divider); tmp |= BXT_MIPI_TX_ESCLK_DIVIDER(port, tx_div); @@ -430,11 +480,12 @@ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); } -static int bxt_compute_dsi_pll(struct intel_encoder *encoder, +static int gen9lp_compute_dsi_pll(struct intel_encoder *encoder, struct intel_crtc_state *config) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - u8 dsi_ratio; + u8 dsi_ratio, dsi_ratio_min, dsi_ratio_max; u32 dsi_clk; dsi_clk = dsi_clk_from_pclk(intel_dsi->pclk, intel_dsi->pixel_format, @@ -446,11 +497,20 @@ static int bxt_compute_dsi_pll(struct intel_encoder *encoder, * round 'up' the result */ dsi_ratio = DIV_ROUND_UP(dsi_clk * 2, BXT_REF_CLOCK_KHZ); - if (dsi_ratio < BXT_DSI_PLL_RATIO_MIN || - dsi_ratio > BXT_DSI_PLL_RATIO_MAX) { + + if (IS_BROXTON(dev_priv)) { + dsi_ratio_min = BXT_DSI_PLL_RATIO_MIN; + dsi_ratio_max = BXT_DSI_PLL_RATIO_MAX; + } else { + dsi_ratio_min = GLK_DSI_PLL_RATIO_MIN; + dsi_ratio_max = GLK_DSI_PLL_RATIO_MAX; + } + + if (dsi_ratio < dsi_ratio_min || dsi_ratio > dsi_ratio_max) { DRM_ERROR("Cant get a suitable ratio from DSI PLL ratios\n"); return -ECHRNG; - } + } else + DRM_DEBUG_KMS("DSI PLL calculation is Done!!\n"); /* * Program DSI ratio and Select MIPIC and MIPIA PLL output as 8x @@ -462,13 +522,13 @@ static int bxt_compute_dsi_pll(struct intel_encoder *encoder, /* As per recommendation from hardware team, * Prog PVD ratio =1 if dsi ratio <= 50 */ - if (dsi_ratio <= 50) + if (IS_BROXTON(dev_priv) && dsi_ratio <= 50) config->dsi_pll.ctrl |= BXT_DSI_PLL_PVD_RATIO_1; return 0; } -static void bxt_enable_dsi_pll(struct intel_encoder *encoder, +static void gen9lp_enable_dsi_pll(struct intel_encoder *encoder, const struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -483,8 +543,12 @@ static void bxt_enable_dsi_pll(struct intel_encoder *encoder, POSTING_READ(BXT_DSI_PLL_CTL); /* Program TX, RX, Dphy clocks */ - for_each_dsi_port(port, intel_dsi->ports) - bxt_dsi_program_clocks(encoder->base.dev, port, config); + if (IS_BROXTON(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) + bxt_dsi_program_clocks(encoder->base.dev, port, config); + } else { + glk_dsi_program_esc_clock(encoder->base.dev, config); + } /* Enable DSI PLL */ val = I915_READ(BXT_DSI_PLL_ENABLE); @@ -522,7 +586,7 @@ int intel_compute_dsi_pll(struct intel_encoder *encoder, if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) return vlv_compute_dsi_pll(encoder, config); else if (IS_GEN9_LP(dev_priv)) - return bxt_compute_dsi_pll(encoder, config); + return gen9lp_compute_dsi_pll(encoder, config); return -ENODEV; } @@ -535,7 +599,7 @@ void intel_enable_dsi_pll(struct intel_encoder *encoder, if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_enable_dsi_pll(encoder, config); else if (IS_GEN9_LP(dev_priv)) - bxt_enable_dsi_pll(encoder, config); + gen9lp_enable_dsi_pll(encoder, config); } void intel_disable_dsi_pll(struct intel_encoder *encoder) @@ -548,19 +612,30 @@ void intel_disable_dsi_pll(struct intel_encoder *encoder) bxt_disable_dsi_pll(encoder); } -static void bxt_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) +static void gen9lp_dsi_reset_clocks(struct intel_encoder *encoder, + enum port port) { u32 tmp; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); /* Clear old configurations */ - tmp = I915_READ(BXT_MIPI_CLOCK_CTL); - tmp &= ~(BXT_MIPI_TX_ESCLK_FIXDIV_MASK(port)); - tmp &= ~(BXT_MIPI_RX_ESCLK_UPPER_FIXDIV_MASK(port)); - tmp &= ~(BXT_MIPI_8X_BY3_DIVIDER_MASK(port)); - tmp &= ~(BXT_MIPI_RX_ESCLK_LOWER_FIXDIV_MASK(port)); - I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); + if (IS_BROXTON(dev_priv)) { + tmp = I915_READ(BXT_MIPI_CLOCK_CTL); + tmp &= ~(BXT_MIPI_TX_ESCLK_FIXDIV_MASK(port)); + tmp &= ~(BXT_MIPI_RX_ESCLK_UPPER_FIXDIV_MASK(port)); + tmp &= ~(BXT_MIPI_8X_BY3_DIVIDER_MASK(port)); + tmp &= ~(BXT_MIPI_RX_ESCLK_LOWER_FIXDIV_MASK(port)); + I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); + } else { + tmp = I915_READ(MIPIO_TXESC_CLK_DIV1); + tmp &= ~GLK_TX_ESC_CLK_DIV1_MASK; + I915_WRITE(MIPIO_TXESC_CLK_DIV1, tmp); + + tmp = I915_READ(MIPIO_TXESC_CLK_DIV2); + tmp &= ~GLK_TX_ESC_CLK_DIV2_MASK; + I915_WRITE(MIPIO_TXESC_CLK_DIV2, tmp); + } I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); } @@ -569,7 +644,7 @@ void intel_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); if (IS_GEN9_LP(dev_priv)) - bxt_dsi_reset_clocks(encoder, port); + gen9lp_dsi_reset_clocks(encoder, port); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_dsi_reset_clocks(encoder, port); } diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 50da89dcb92b..6025839ed3b7 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -515,6 +515,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) "DVO %c", port_name(port)); intel_encoder->type = INTEL_OUTPUT_DVO; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; intel_encoder->crtc_mask = (1 << 0) | (1 << 1); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 371acf109e34..73fe718516a5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -28,8 +28,8 @@ static const struct engine_info { const char *name; - unsigned exec_id; - enum intel_engine_hw_id hw_id; + unsigned int exec_id; + unsigned int hw_id; u32 mmio_base; unsigned irq_shift; int (*init_legacy)(struct intel_engine_cs *engine); @@ -110,21 +110,20 @@ intel_engine_setup(struct drm_i915_private *dev_priv, } /** - * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * intel_engines_init_early() - allocate the Engine Command Streamers * @dev_priv: i915 device private * * Return: non-zero if the initialization failed. */ -int intel_engines_init(struct drm_i915_private *dev_priv) +int intel_engines_init_early(struct drm_i915_private *dev_priv) { struct intel_device_info *device_info = mkwrite_device_info(dev_priv); unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; unsigned int mask = 0; - int (*init)(struct intel_engine_cs *engine); struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int i; - int ret; + int err; WARN_ON(ring_mask == 0); WARN_ON(ring_mask & @@ -134,20 +133,8 @@ int intel_engines_init(struct drm_i915_private *dev_priv) if (!HAS_ENGINE(dev_priv, i)) continue; - if (i915.enable_execlists) - init = intel_engines[i].init_execlists; - else - init = intel_engines[i].init_legacy; - - if (!init) - continue; - - ret = intel_engine_setup(dev_priv, i); - if (ret) - goto cleanup; - - ret = init(dev_priv->engine[i]); - if (ret) + err = intel_engine_setup(dev_priv, i); + if (err) goto cleanup; mask |= ENGINE_MASK(i); @@ -166,14 +153,67 @@ int intel_engines_init(struct drm_i915_private *dev_priv) return 0; cleanup: + for_each_engine(engine, dev_priv, id) + kfree(engine); + return err; +} + +/** + * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * @dev_priv: i915 device private + * + * Return: non-zero if the initialization failed. + */ +int intel_engines_init(struct drm_i915_private *dev_priv) +{ + struct intel_device_info *device_info = mkwrite_device_info(dev_priv); + struct intel_engine_cs *engine; + enum intel_engine_id id, err_id; + unsigned int mask = 0; + int err = 0; + for_each_engine(engine, dev_priv, id) { + int (*init)(struct intel_engine_cs *engine); + if (i915.enable_execlists) - intel_logical_ring_cleanup(engine); + init = intel_engines[id].init_execlists; else - intel_engine_cleanup(engine); + init = intel_engines[id].init_legacy; + if (!init) { + kfree(engine); + dev_priv->engine[id] = NULL; + continue; + } + + err = init(engine); + if (err) { + err_id = id; + goto cleanup; + } + + mask |= ENGINE_MASK(id); } - return ret; + /* + * Catch failures to update intel_engines table when the new engines + * are added to the driver by a warning and disabling the forgotten + * engines. + */ + if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) + device_info->ring_mask = mask; + + device_info->num_rings = hweight32(mask); + + return 0; + +cleanup: + for_each_engine(engine, dev_priv, id) { + if (id >= err_id) + kfree(engine); + else + dev_priv->gt.cleanup_engine(engine); + } + return err; } void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) @@ -212,8 +252,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) engine->irq_seqno_barrier(engine); GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); - engine->timeline->last_submitted_seqno = seqno; - engine->hangcheck.seqno = seqno; /* After manually advancing the seqno, fake the interrupt in case @@ -482,3 +520,601 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, break; } } + +static int wa_add(struct drm_i915_private *dev_priv, + i915_reg_t addr, + const u32 mask, const u32 val) +{ + const u32 idx = dev_priv->workarounds.count; + + if (WARN_ON(idx >= I915_MAX_WA_REGS)) + return -ENOSPC; + + dev_priv->workarounds.reg[idx].addr = addr; + dev_priv->workarounds.reg[idx].value = val; + dev_priv->workarounds.reg[idx].mask = mask; + + dev_priv->workarounds.count++; + + return 0; +} + +#define WA_REG(addr, mask, val) do { \ + const int r = wa_add(dev_priv, (addr), (mask), (val)); \ + if (r) \ + return r; \ + } while (0) + +#define WA_SET_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) + +#define WA_CLR_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) + +#define WA_SET_FIELD_MASKED(addr, mask, value) \ + WA_REG(addr, mask, _MASKED_FIELD(mask, value)) + +#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) +#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) + +#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) + +static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, + i915_reg_t reg) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_workarounds *wa = &dev_priv->workarounds; + const uint32_t index = wa->hw_whitelist_count[engine->id]; + + if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) + return -EINVAL; + + WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), + i915_mmio_reg_offset(reg)); + wa->hw_whitelist_count[engine->id]++; + + return 0; +} + +static int gen8_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisableAsyncFlipPerfMode:bdw,chv */ + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); + + /* WaDisablePartialInstShootdown:bdw,chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:bdw,chv */ + /* WaHdcDisableFetchWhenMasked:bdw,chv */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_DONOT_FETCH_MEM_WHEN_MASKED | + HDC_FORCE_NON_COHERENT); + + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping + * polygons in the same 8x4 pixel/sample area to be processed without + * stalling waiting for the earlier ones to write to Hierarchical Z + * buffer." + * + * This optimization is off by default for BDW and CHV; turn it on. + */ + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + + /* Wa4x4STCOptimizationDisable:bdw,chv */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); + + return 0; +} + +static int bdw_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen8_init_workarounds(engine); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* WaDisableDopClockGating:bdw + * + * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * to disable EUTC clock gating. + */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); + + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + WA_SET_BIT_MASKED(HDC_CHICKEN0, + /* WaForceContextSaveRestoreNonCoherent:bdw */ + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ + (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); + + return 0; +} + +static int chv_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen8_init_workarounds(engine); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* Improve HiZ throughput on CHV. */ + WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); + + return 0; +} + +static int gen9_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */ + I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); + + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */ + I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + + /* WaDisableKillLogic:bxt,skl,kbl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + ECOCHK_DIS_TLB); + + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); + + /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_DG_MIRROR_FIX_ENABLE); + + /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, + GEN9_RHWO_OPTIMIZATION_DISABLE); + /* + * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set + * but we do that in per ctx batchbuffer as there is an issue + * with this register not getting restored on ctx restore + */ + } + + /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_GPGPU_PREEMPTION); + + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ + /* WaDisablePartialResolveInVc:skl,bxt,kbl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | + GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); + + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); + + /* WaDisableMaskBasedCammingInRCC:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, + PIXEL_MASK_CAMMING_DISABLE); + + /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); + + /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are + * both tied to WaForceContextSaveRestoreNonCoherent + * in some hsds for skl. We keep the tie for all gen9. The + * documentation is a bit hazy and so we want to get common behaviour, + * even though there is no clear evidence we would need both on kbl/bxt. + * This area has been source of system hangs so we play it safe + * and mimic the skl regardless of what bspec says. + * + * Use Force Non-Coherent whenever executing a 3D context. This + * is a workaround for a possible hang in the unlikely event + * a TLB invalidation occurs during a PSD flush. + */ + + /* WaForceEnableNonCoherent:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); + + /* WaDisableHDCInvalidation:skl,bxt,kbl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + BDW_DISABLE_HDC_INVALIDATION); + + /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ + if (IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + + /* WaOCLCoherentLineFlush:skl,bxt,kbl */ + I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | + GEN8_LQSC_FLUSH_COHERENT_LINES)); + + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */ + ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); + if (ret) + return ret; + + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ + ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); + if (ret) + return ret; + + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */ + ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); + if (ret) + return ret; + + return 0; +} + +static int skl_tune_iz_hashing(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + u8 vals[3] = { 0, 0, 0 }; + unsigned int i; + + for (i = 0; i < 3; i++) { + u8 ss; + + /* + * Only consider slices where one, and only one, subslice has 7 + * EUs + */ + if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) + continue; + + /* + * subslice_7eu[i] != 0 (because of the check above) and + * ss_max == 4 (maximum number of subslices possible per slice) + * + * -> 0 <= ss <= 3; + */ + ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; + vals[i] = 3 - ss; + } + + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) + return 0; + + /* Tune IZ hashing. See intel_device_info_runtime_init() */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN9_IZ_HASHING_MASK(2) | + GEN9_IZ_HASHING_MASK(1) | + GEN9_IZ_HASHING_MASK(0), + GEN9_IZ_HASHING(2, vals[2]) | + GEN9_IZ_HASHING(1, vals[1]) | + GEN9_IZ_HASHING(0, vals[0])); + + return 0; +} + +static int skl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* + * Actual WA is to disable percontext preemption granularity control + * until D0 which is the default case so this is equivalent to + * !WaDisablePerCtxtPreemptionGranularityControl:skl + */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); + + /* WaEnableGapsTsvCreditFix:skl */ + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + + /* WaDisableGafsUnitClkGating:skl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:skl */ + if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaDisableLSQCROPERFforOCL:skl */ + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + + return skl_tune_iz_hashing(engine); +} + +static int bxt_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaStoreMultiplePTEenable:bxt */ + /* This is a requirement according to Hardware specification */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); + + /* WaSetClckGatingDisableMedia:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & + ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); + } + + /* WaDisableThreadStallDopClockGating:bxt */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); + + /* WaDisablePooledEuLoadBalancingFix:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { + WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, + GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); + } + + /* WaDisableSbeCacheDispatchPortSharing:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + } + + /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ + /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ + /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ + /* WaDisableLSQCROPERFforOCL:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); + if (ret) + return ret; + + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + } + + /* WaProgramL3SqcReg1DefaultForPerf:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) + I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | + L3_HIGH_PRIO_CREDITS(2)); + + /* WaToEnableHwFixForPushConstHWBug:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaInPlaceDecompressionHang:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + return 0; +} + +static int kbl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaEnableGapsTsvCreditFix:kbl */ + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + + /* WaDisableDynamicCreditSharing:kbl */ + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + WA_SET_BIT(GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); + + /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FENCE_DEST_SLM_DISABLE); + + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableGafsUnitClkGating:kbl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaDisableSbeCacheDispatchPortSharing:kbl */ + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + + /* WaInPlaceDecompressionHang:kbl */ + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaDisableLSQCROPERFforOCL:kbl */ + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + + return 0; +} + +static int glk_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaToEnableHwFixForPushConstHWBug:glk */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + return 0; +} + +int init_workarounds_ring(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int err; + + WARN_ON(engine->id != RCS); + + dev_priv->workarounds.count = 0; + dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; + + if (IS_BROADWELL(dev_priv)) + err = bdw_init_workarounds(engine); + else if (IS_CHERRYVIEW(dev_priv)) + err = chv_init_workarounds(engine); + else if (IS_SKYLAKE(dev_priv)) + err = skl_init_workarounds(engine); + else if (IS_BROXTON(dev_priv)) + err = bxt_init_workarounds(engine); + else if (IS_KABYLAKE(dev_priv)) + err = kbl_init_workarounds(engine); + else if (IS_GEMINILAKE(dev_priv)) + err = glk_init_workarounds(engine); + else + err = 0; + if (err) + return err; + + DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n", + engine->name, dev_priv->workarounds.count); + return 0; +} + +int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) +{ + struct i915_workarounds *w = &req->i915->workarounds; + u32 *cs; + int ret, i; + + if (w->count == 0) + return 0; + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + cs = intel_ring_begin(req, (w->count * 2 + 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(w->count); + for (i = 0; i < w->count; i++) { + *cs++ = i915_mmio_reg_offset(w->reg[i].addr); + *cs++ = w->reg[i].value; + } + *cs++ = MI_NOOP; + + intel_ring_advance(req, cs); + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + return 0; +} + +/** + * intel_engine_is_idle() - Report if the engine has finished process all work + * @engine: the intel_engine_cs + * + * Return true if there are no requests pending, nothing left to be submitted + * to hardware, and that the engine is idle. + */ +bool intel_engine_is_idle(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + /* Any inflight/incomplete requests? */ + if (!i915_seqno_passed(intel_engine_get_seqno(engine), + intel_engine_last_submit(engine))) + return false; + + /* Interrupt/tasklet pending? */ + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) + return false; + + /* Both ports drained, no more ELSP submission? */ + if (engine->execlist_port[0].request) + return false; + + /* Ring stopped? */ + if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) + return false; + + return true; +} + +bool intel_engines_are_idle(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) { + if (!intel_engine_is_idle(engine)) + return false; + } + + return true; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_engine.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 89fe5c8464df..17d418b23d77 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -537,8 +537,7 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, * reserved range size, so it always assumes the maximum (8mb) is used. * If we enable FBC using a CFB on that memory range we'll get FIFO * underruns, even if that range is not reserved by the BIOS. */ - if (IS_BROADWELL(dev_priv) || - IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv)) end = ggtt->stolen_size - 8 * 1024 * 1024; else end = U64_MAX; @@ -628,7 +627,8 @@ err_fb: kfree(compressed_llb); i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb); err_llb: - pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); + if (drm_mm_initialized(&dev_priv->mm.stolen)) + pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); return -ENOSPC; } @@ -743,8 +743,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - cache->crtc.hsw_bdw_pixel_rate = - ilk_pipe_pixel_rate(crtc_state); + cache->crtc.hsw_bdw_pixel_rate = crtc_state->pixel_rate; cache->plane.rotation = plane_state->base.rotation; cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; @@ -819,7 +818,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) /* WaFbcExceedCdClockThreshold:hsw,bdw */ if ((IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) && - cache->crtc.hsw_bdw_pixel_rate >= dev_priv->cdclk_freq * 95 / 100) { + cache->crtc.hsw_bdw_pixel_rate >= dev_priv->cdclk.hw.cdclk * 95 / 100) { fbc->no_fbc_reason = "pixel rate is too big"; return false; } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 281c5c48a84d..f7e9a4e69595 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -45,6 +45,14 @@ #include <drm/i915_drm.h> #include "i915_drv.h" +static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev) +{ + struct drm_i915_gem_object *obj = ifbdev->fb->obj; + unsigned int origin = ifbdev->vma->fence ? ORIGIN_GTT : ORIGIN_CPU; + + intel_fb_obj_invalidate(obj, origin); +} + static int intel_fbdev_set_par(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; @@ -53,12 +61,8 @@ static int intel_fbdev_set_par(struct fb_info *info) int ret; ret = drm_fb_helper_set_par(info); - - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -71,12 +75,8 @@ static int intel_fbdev_blank(int blank, struct fb_info *info) int ret; ret = drm_fb_helper_blank(blank, info); - - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -87,15 +87,11 @@ static int intel_fbdev_pan_display(struct fb_var_screeninfo *var, struct drm_fb_helper *fb_helper = info->par; struct intel_fbdev *ifbdev = container_of(fb_helper, struct intel_fbdev, helper); - int ret; - ret = drm_fb_helper_pan_display(var, info); - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + ret = drm_fb_helper_pan_display(var, info); + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -121,7 +117,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_mode_fb_cmd2 mode_cmd = {}; - struct drm_i915_gem_object *obj = NULL; + struct drm_i915_gem_object *obj; int size, ret; /* we don't do packed 24bpp */ @@ -136,14 +132,13 @@ static int intelfb_alloc(struct drm_fb_helper *helper, mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); - mutex_lock(&dev->struct_mutex); - size = mode_cmd.pitches[0] * mode_cmd.height; size = PAGE_ALIGN(size); /* If the FB is too big, just don't use it since fbdev is not very * important and we should probably use that space with FBC or other * features. */ + obj = NULL; if (size * 2 < ggtt->stolen_usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); if (obj == NULL) @@ -151,24 +146,22 @@ static int intelfb_alloc(struct drm_fb_helper *helper, if (IS_ERR(obj)) { DRM_ERROR("failed to allocate framebuffer\n"); ret = PTR_ERR(obj); - goto out; + goto err; } - fb = __intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) { - i915_gem_object_put(obj); ret = PTR_ERR(fb); - goto out; + goto err_obj; } - mutex_unlock(&dev->struct_mutex); - ifbdev->fb = to_intel_framebuffer(fb); return 0; -out: - mutex_unlock(&dev->struct_mutex); +err_obj: + i915_gem_object_put(obj); +err: return ret; } @@ -355,23 +348,23 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, bool *enabled, int width, int height) { struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); - unsigned long conn_configured, mask; + unsigned long conn_configured, conn_seq, mask; unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); int i, j; bool *save_enabled; bool fallback = true; int num_connectors_enabled = 0; int num_connectors_detected = 0; - int pass = 0; save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL); if (!save_enabled) return false; memcpy(save_enabled, enabled, count); - mask = BIT(count) - 1; + mask = GENMASK(count - 1, 0); conn_configured = 0; retry: + conn_seq = conn_configured; for (i = 0; i < count; i++) { struct drm_fb_helper_connector *fb_conn; struct drm_connector *connector; @@ -385,7 +378,7 @@ retry: if (conn_configured & BIT(i)) continue; - if (pass == 0 && !connector->has_tile) + if (conn_seq == 0 && !connector->has_tile) continue; if (connector->status == connector_status_connected) @@ -496,10 +489,8 @@ retry: conn_configured |= BIT(i); } - if ((conn_configured & mask) != mask) { - pass++; + if ((conn_configured & mask) != mask && conn_configured != conn_seq) goto retry; - } /* * If the BIOS didn't enable everything it could, fall back to have the @@ -628,7 +619,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, } cur_size = intel_crtc->config->base.adjusted_mode.crtc_vdisplay; - cur_size = intel_fb_align_height(dev, cur_size, + cur_size = intel_fb_align_height(to_i915(dev), cur_size, fb->base.format->format, fb->base.modifier); cur_size *= fb->base.pitches[0]; @@ -838,11 +829,6 @@ void intel_fbdev_restore_mode(struct drm_device *dev) if (!ifbdev->fb) return; - if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper)) { - DRM_DEBUG("failed to restore crtc mode\n"); - } else { - mutex_lock(&dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&dev->struct_mutex); - } + if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper) == 0) + intel_fbdev_invalidate(ifbdev); } diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index e660d8b4bbc3..966e255ca053 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -54,7 +54,7 @@ static bool ivb_can_enable_err_int(struct drm_device *dev) struct intel_crtc *crtc; enum pipe pipe; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { crtc = intel_get_crtc_for_pipe(dev_priv, pipe); @@ -72,7 +72,7 @@ static bool cpt_can_enable_serr_int(struct drm_device *dev) enum pipe pipe; struct intel_crtc *crtc; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { crtc = intel_get_crtc_for_pipe(dev_priv, pipe); @@ -90,7 +90,7 @@ static void i9xx_check_fifo_underruns(struct intel_crtc *crtc) i915_reg_t reg = PIPESTAT(crtc->pipe); u32 pipestat = I915_READ(reg) & 0xffff0000; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if ((pipestat & PIPE_FIFO_UNDERRUN_STATUS) == 0) return; @@ -98,6 +98,7 @@ static void i9xx_check_fifo_underruns(struct intel_crtc *crtc) I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS); POSTING_READ(reg); + trace_intel_cpu_fifo_underrun(dev_priv, crtc->pipe); DRM_ERROR("pipe %c underrun\n", pipe_name(crtc->pipe)); } @@ -109,7 +110,7 @@ static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev, i915_reg_t reg = PIPESTAT(pipe); u32 pipestat = I915_READ(reg) & 0xffff0000; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (enable) { I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS); @@ -139,7 +140,7 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) enum pipe pipe = crtc->pipe; uint32_t err_int = I915_READ(GEN7_ERR_INT); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if ((err_int & ERR_INT_FIFO_UNDERRUN(pipe)) == 0) return; @@ -147,6 +148,7 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) I915_WRITE(GEN7_ERR_INT, ERR_INT_FIFO_UNDERRUN(pipe)); POSTING_READ(GEN7_ERR_INT); + trace_intel_cpu_fifo_underrun(dev_priv, pipe); DRM_ERROR("fifo underrun on pipe %c\n", pipe_name(pipe)); } @@ -204,7 +206,7 @@ static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc) enum transcoder pch_transcoder = (enum transcoder) crtc->pipe; uint32_t serr_int = I915_READ(SERR_INT); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if ((serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)) == 0) return; @@ -212,6 +214,7 @@ static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc) I915_WRITE(SERR_INT, SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)); POSTING_READ(SERR_INT); + trace_intel_pch_fifo_underrun(dev_priv, pch_transcoder); DRM_ERROR("pch fifo underrun on pch transcoder %s\n", transcoder_name(pch_transcoder)); } @@ -248,7 +251,7 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); bool old; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); old = !crtc->cpu_fifo_underrun_disabled; crtc->cpu_fifo_underrun_disabled = !enable; @@ -368,9 +371,11 @@ void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, crtc->cpu_fifo_underrun_disabled) return; - if (intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false)) + if (intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false)) { + trace_intel_cpu_fifo_underrun(dev_priv, pipe); DRM_ERROR("CPU pipe %c FIFO underrun\n", pipe_name(pipe)); + } intel_fbc_handle_fifo_underrun_irq(dev_priv); } @@ -388,9 +393,11 @@ void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, enum transcoder pch_transcoder) { if (intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, - false)) + false)) { + trace_intel_pch_fifo_underrun(dev_priv, pch_transcoder); DRM_ERROR("PCH transcoder %s FIFO underrun\n", transcoder_name(pch_transcoder)); + } } /** diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index 966de4c7c7a2..fcfc217e754e 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -114,13 +114,12 @@ static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv, } void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin, unsigned int frontbuffer_bits) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - if (retire) { + if (origin == ORIGIN_CS) { spin_lock(&dev_priv->fb_tracking.lock); /* Filter out new bits since rendering started. */ frontbuffer_bits &= dev_priv->fb_tracking.busy_bits; diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h index 7bab41218cf7..63cd9a753a72 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.h +++ b/drivers/gpu/drm/i915/intel_frontbuffer.h @@ -38,7 +38,6 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, enum fb_op_origin origin, unsigned int frontbuffer_bits); void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin, unsigned int frontbuffer_bits); @@ -69,15 +68,12 @@ static inline bool intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, /** * intel_fb_obj_flush - flush frontbuffer object * @obj: GEM object to flush - * @retire: set when retiring asynchronous rendering * @origin: which operation caused the flush * * This function gets called every time rendering on the given object has - * completed and frontbuffer caching can be started again. If @retire is true - * then any delayed flushes will be unblocked. + * completed and frontbuffer caching can be started again. */ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin) { unsigned int frontbuffer_bits; @@ -86,7 +82,7 @@ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, if (!frontbuffer_bits) return; - __intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits); + __intel_fb_obj_flush(obj, origin, frontbuffer_bits); } #endif /* __INTEL_FRONTBUFFER_H__ */ diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 2f1cf9aea04e..9885f760f2ef 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -520,10 +520,6 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) guc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; - DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", - intel_uc_fw_status_repr(guc_fw->fetch_status), - intel_uc_fw_status_repr(guc_fw->load_status)); - intel_guc_auth_huc(dev_priv); if (i915.enable_guc_submission) { @@ -536,6 +532,11 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) guc_interrupts_capture(dev_priv); } + DRM_INFO("GuC %s (firmware %s [version %u.%u])\n", + i915.enable_guc_submission ? "submission enabled" : "loaded", + guc_fw->path, + guc_fw->major_ver_found, guc_fw->minor_ver_found); + return 0; fail: @@ -713,12 +714,9 @@ fail: DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", err, fw, uc_fw->obj); - mutex_lock(&dev_priv->drm.struct_mutex); - obj = uc_fw->obj; + obj = fetch_and_zero(&uc_fw->obj); if (obj) i915_gem_object_put(obj); - uc_fw->obj = NULL; - mutex_unlock(&dev_priv->drm.struct_mutex); release_firmware(fw); /* OK even if fw is NULL */ uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; @@ -792,16 +790,17 @@ void intel_guc_init(struct drm_i915_private *dev_priv) void intel_guc_fini(struct drm_i915_private *dev_priv) { struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + struct drm_i915_gem_object *obj; mutex_lock(&dev_priv->drm.struct_mutex); guc_interrupts_release(dev_priv); i915_guc_submission_disable(dev_priv); i915_guc_submission_fini(dev_priv); - - if (guc_fw->obj) - i915_gem_object_put(guc_fw->obj); - guc_fw->obj = NULL; mutex_unlock(&dev_priv->drm.struct_mutex); + obj = fetch_and_zero(&guc_fw->obj); + if (obj) + i915_gem_object_put(obj); + guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; } diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index f05971f5586f..dce742243ba6 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -480,3 +480,7 @@ void intel_hangcheck_init(struct drm_i915_private *i915) INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work, i915_hangcheck_elapsed); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_hangcheck.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index ebae2bd83918..c2184f755ec6 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -902,12 +902,11 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -927,7 +926,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -1869,14 +1868,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, switch (port) { case PORT_B: - /* - * On BXT A0/A1, sw needs to activate DDIA HPD logic and - * interrupts to check the external panel connection. - */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - intel_encoder->hpd_pin = HPD_PORT_A; - else - intel_encoder->hpd_pin = HPD_PORT_B; + intel_encoder->hpd_pin = HPD_PORT_B; break; case PORT_C: intel_encoder->hpd_pin = HPD_PORT_C; @@ -1988,6 +1980,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, } intel_encoder->type = INTEL_OUTPUT_HDMI; + intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index b62e3f8ad415..ba763e7d7dcf 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -100,7 +100,6 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port) } #define HPD_STORM_DETECT_PERIOD 1000 -#define HPD_STORM_THRESHOLD 5 #define HPD_STORM_REENABLE_DELAY (2 * 60 * 1000) /** @@ -112,9 +111,13 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port) * storms. Only the pin specific stats and state are changed, the caller is * responsible for further action. * - * @HPD_STORM_THRESHOLD irqs are allowed within @HPD_STORM_DETECT_PERIOD ms, - * otherwise it's considered an irq storm, and the irq state is set to - * @HPD_MARK_DISABLED. + * The number of irqs that are allowed within @HPD_STORM_DETECT_PERIOD is + * stored in @dev_priv->hotplug.hpd_storm_threshold which defaults to + * @HPD_STORM_DEFAULT_THRESHOLD. If this threshold is exceeded, it's + * considered an irq storm and the irq state is set to @HPD_MARK_DISABLED. + * + * The HPD threshold can be controlled through i915_hpd_storm_ctl in debugfs, + * and should only be adjusted for automated hotplug testing. * * Return true if an irq storm was detected on @pin. */ @@ -123,13 +126,15 @@ static bool intel_hpd_irq_storm_detect(struct drm_i915_private *dev_priv, { unsigned long start = dev_priv->hotplug.stats[pin].last_jiffies; unsigned long end = start + msecs_to_jiffies(HPD_STORM_DETECT_PERIOD); + const int threshold = dev_priv->hotplug.hpd_storm_threshold; bool storm = false; if (!time_in_range(jiffies, start, end)) { dev_priv->hotplug.stats[pin].last_jiffies = jiffies; dev_priv->hotplug.stats[pin].count = 0; DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: 0\n", pin); - } else if (dev_priv->hotplug.stats[pin].count > HPD_STORM_THRESHOLD) { + } else if (dev_priv->hotplug.stats[pin].count > threshold && + threshold) { dev_priv->hotplug.stats[pin].state = HPD_MARK_DISABLED; DRM_DEBUG_KMS("HPD interrupt storm detected on PIN %d\n", pin); storm = true; @@ -152,7 +157,7 @@ static void intel_hpd_irq_storm_disable(struct drm_i915_private *dev_priv) enum hpd_pin pin; bool hpd_disabled = false; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); list_for_each_entry(connector, &mode_config->connector_list, head) { if (connector->polled != DRM_CONNECTOR_POLL_HPD) @@ -219,7 +224,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) } } } - if (dev_priv->display.hpd_irq_setup) + if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); @@ -425,7 +430,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, } } - if (storm_detected) + if (storm_detected && dev_priv->display_irqs_enabled) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock(&dev_priv->irq_lock); @@ -471,10 +476,12 @@ void intel_hpd_init(struct drm_i915_private *dev_priv) * Interrupt setup is already guaranteed to be single-threaded, this is * just to make the assert_spin_locked checks happy. */ - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.hpd_irq_setup) - dev_priv->display.hpd_irq_setup(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) { + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->display_irqs_enabled) + dev_priv->display.hpd_irq_setup(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); + } } static void i915_hpd_poll_init_work(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index c144609425f6..e660109fc51e 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -181,12 +181,14 @@ void intel_huc_init(struct drm_i915_private *dev_priv) } huc_fw->path = fw_path; + + if (huc_fw->path == NULL) + return; + huc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; DRM_DEBUG_DRIVER("HuC firmware pending, path %s\n", fw_path); - WARN(huc_fw->path == NULL, "HuC present but no fw path\n"); - intel_uc_fw_fetch(dev_priv, huc_fw); } @@ -274,12 +276,11 @@ fail: void intel_huc_fini(struct drm_i915_private *dev_priv) { struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + struct drm_i915_gem_object *obj; - mutex_lock(&dev_priv->drm.struct_mutex); - if (huc_fw->obj) - i915_gem_object_put(huc_fw->obj); - huc_fw->obj = NULL; - mutex_unlock(&dev_priv->drm.struct_mutex); + obj = fetch_and_zero(&huc_fw->obj); + if (obj) + i915_gem_object_put(obj); huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; } diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index bce1ba80f277..b6401e8f1bd6 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -74,7 +74,7 @@ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv, { if (IS_GEN9_LP(dev_priv)) return &gmbus_pins_bxt[pin]; - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) return &gmbus_pins_skl[pin]; else if (IS_BROADWELL(dev_priv)) return &gmbus_pins_bdw[pin]; @@ -89,7 +89,7 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, if (IS_GEN9_LP(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bxt); - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) size = ARRAY_SIZE(gmbus_pins_skl); else if (IS_BROADWELL(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bdw); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ebf8023d21e6..89f38e7def9f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -190,13 +190,7 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 -#define GEN8_CTX_VALID (1<<0) -#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) -#define GEN8_CTX_FORCE_RESTORE (1<<2) -#define GEN8_CTX_L3LLC_COHERENT (1<<5) -#define GEN8_CTX_PRIVILEGE (1<<8) - -#define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \ +#define CTX_REG(reg_state, pos, reg, val) do { \ (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \ (reg_state)[(pos)+1] = (val); \ } while (0) @@ -212,14 +206,6 @@ reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \ } while (0) -enum { - FAULT_AND_HANG = 0, - FAULT_AND_HALT, /* Debug only */ - FAULT_AND_STREAM, - FAULT_AND_CONTINUE /* Unsupported */ -}; -#define GEN8_CTX_ID_SHIFT 32 -#define GEN8_CTX_ID_WIDTH 21 #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 @@ -267,30 +253,6 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enabl return 0; } -static void -logical_ring_init_platform_invariants(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - engine->disable_lite_restore_wa = - IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) && - (engine->id == VCS || engine->id == VCS2); - - engine->ctx_desc_template = GEN8_CTX_VALID; - if (IS_GEN8(dev_priv)) - engine->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT; - engine->ctx_desc_template |= GEN8_CTX_PRIVILEGE; - - /* TODO: WaDisableLiteRestore when we start using semaphore - * signalling between Command Streamers */ - /* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */ - - /* WaEnableForceRestoreInCtxtDescForVCS:skl */ - /* WaEnableForceRestoreInCtxtDescForVCS:bxt */ - if (engine->disable_lite_restore_wa) - engine->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; -} - /** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context @@ -304,7 +266,7 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) * * This is what a descriptor looks like, from LSB to MSB:: * - * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) + * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) * bits 12-31: LRCA, GTT address of (the HWSP of) this context * bits 32-52: ctx ID, a globally unique tag * bits 53-54: mbz, reserved for use by hardware @@ -319,8 +281,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH)); - desc = ctx->desc_template; /* bits 3-4 */ - desc |= engine->ctx_desc_template; /* bits 0-11 */ + desc = ctx->desc_template; /* bits 0-11 */ desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE; /* bits 12-31 */ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ @@ -364,6 +325,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; + GEM_BUG_ON(!IS_ALIGNED(rq->tail, 8)); reg_state[CTX_RING_TAIL+1] = rq->tail; /* True 32b PPGTT with dynamic page allocation: update PDP @@ -371,7 +333,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) * PML4 is allocated during ppgtt init, so this is not needed * in 48-bit mode. */ - if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + if (ppgtt && !i915_vm_is_48bit(&ppgtt->base)) execlists_update_context_pdps(ppgtt, reg_state); return ce->lrc_desc; @@ -385,17 +347,20 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); u64 desc[2]; + GEM_BUG_ON(port[0].count > 1); if (!port[0].count) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_IN); desc[0] = execlists_update_context(port[0].request); - engine->preempt_wa = port[0].count++; /* bdw only? fixed on skl? */ + GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0])); + port[0].count++; if (port[1].request) { GEM_BUG_ON(port[1].count); execlists_context_status_change(port[1].request, INTEL_CONTEXT_SCHEDULE_IN); desc[1] = execlists_update_context(port[1].request); + GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1])); port[1].count = 1; } else { desc[1] = 0; @@ -514,6 +479,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) cursor->priotree.priority = INT_MAX; __i915_gem_request_submit(cursor); + trace_i915_gem_request_in(cursor, port - engine->execlist_port); last = cursor; submit = true; } @@ -532,37 +498,11 @@ static bool execlists_elsp_idle(struct intel_engine_cs *engine) return !engine->execlist_port[0].request; } -/** - * intel_execlists_idle() - Determine if all engine submission ports are idle - * @dev_priv: i915 device private - * - * Return true if there are no requests pending on any of the submission ports - * of any engines. - */ -bool intel_execlists_idle(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (!i915.enable_execlists) - return true; - - for_each_engine(engine, dev_priv, id) - if (!execlists_elsp_idle(engine)) - return false; - - return true; -} - -static bool execlists_elsp_ready(struct intel_engine_cs *engine) +static bool execlists_elsp_ready(const struct intel_engine_cs *engine) { - int port; - - port = 1; /* wait for a free slot */ - if (engine->disable_lite_restore_wa || engine->preempt_wa) - port = 0; /* wait for GPU to be idle before continuing */ + const struct execlist_port *port = engine->execlist_port; - return !engine->execlist_port[port].request; + return port[0].count + port[1].count < 2; } /* @@ -577,7 +517,7 @@ static void intel_lrc_irq_handler(unsigned long data) intel_uncore_forcewake_get(dev_priv, engine->fw_domains); - if (!execlists_elsp_idle(engine)) { + while (test_and_clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { u32 __iomem *csb_mmio = dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); u32 __iomem *buf = @@ -587,31 +527,55 @@ static void intel_lrc_irq_handler(unsigned long data) csb = readl(csb_mmio); head = GEN8_CSB_READ_PTR(csb); tail = GEN8_CSB_WRITE_PTR(csb); + if (head == tail) + break; + if (tail < head) tail += GEN8_CSB_ENTRIES; - while (head < tail) { + do { unsigned int idx = ++head % GEN8_CSB_ENTRIES; unsigned int status = readl(buf + 2 * idx); + /* We are flying near dragons again. + * + * We hold a reference to the request in execlist_port[] + * but no more than that. We are operating in softirq + * context and so cannot hold any mutex or sleep. That + * prevents us stopping the requests we are processing + * in port[] from being retired simultaneously (the + * breadcrumb will be complete before we see the + * context-switch). As we only hold the reference to the + * request, any pointer chasing underneath the request + * is subject to a potential use-after-free. Thus we + * store all of the bookkeeping within port[] as + * required, and avoid using unguarded pointers beneath + * request itself. The same applies to the atomic + * status notifier. + */ + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + /* Check the context/desc id for this event matches */ + GEM_DEBUG_BUG_ON(readl(buf + 2 * idx + 1) != + port[0].context_id); + GEM_BUG_ON(port[0].count == 0); if (--port[0].count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); + GEM_BUG_ON(!i915_gem_request_completed(port[0].request)); execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_OUT); + trace_i915_gem_request_out(port[0].request); i915_gem_request_put(port[0].request); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); - - engine->preempt_wa = false; } GEM_BUG_ON(port[0].count == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); - } + } while (head < tail); writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, GEN8_CSB_WRITE_PTR(csb) << 8), @@ -658,10 +622,11 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - if (insert_request(&request->priotree, &engine->execlist_queue)) + if (insert_request(&request->priotree, &engine->execlist_queue)) { engine->execlist_first = &request->priotree.node; - if (execlists_elsp_idle(engine)) - tasklet_hi_schedule(&engine->irq_tasklet); + if (execlists_elsp_ready(engine)) + tasklet_hi_schedule(&engine->irq_tasklet); + } spin_unlock_irqrestore(&engine->timeline->lock, flags); } @@ -784,11 +749,9 @@ static int execlists_context_pin(struct intel_engine_cs *engine, } GEM_BUG_ON(!ce->state); - flags = PIN_GLOBAL; + flags = PIN_GLOBAL | PIN_HIGH; if (ctx->ggtt_offset_bias) flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias; - if (i915_gem_context_is_kernel(ctx)) - flags |= PIN_HIGH; ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, flags); if (ret) @@ -847,6 +810,7 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; + u32 *cs; int ret; GEM_BUG_ON(!ce->pin_count); @@ -871,9 +835,11 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) goto err; } - ret = intel_ring_begin(request, 0); - if (ret) + cs = intel_ring_begin(request, 0); + if (IS_ERR(cs)) { + ret = PTR_ERR(cs); goto err_unreserve; + } if (!ce->initialised) { ret = engine->init_context(request); @@ -900,51 +866,6 @@ err: return ret; } -static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) -{ - int ret, i; - struct intel_ring *ring = req->ring; - struct i915_workarounds *w = &req->i915->workarounds; - - if (w->count == 0) - return 0; - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - ret = intel_ring_begin(req, w->count * 2 + 2); - if (ret) - return ret; - - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); - for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(ring, w->reg[i].addr); - intel_ring_emit(ring, w->reg[i].value); - } - intel_ring_emit(ring, MI_NOOP); - - intel_ring_advance(ring); - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - return 0; -} - -#define wa_ctx_emit(batch, index, cmd) \ - do { \ - int __index = (index)++; \ - if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \ - return -ENOSPC; \ - } \ - batch[__index] = (cmd); \ - } while (0) - -#define wa_ctx_emit_reg(batch, index, reg) \ - wa_ctx_emit((batch), (index), i915_mmio_reg_offset(reg)) - /* * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after * PIPE_CONTROL instruction. This is required for the flush to happen correctly @@ -961,56 +882,29 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) * This WA is also required for Gen9 so extracting as a function avoids * code duplication. */ -static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, - uint32_t *batch, - uint32_t index) +static u32 * +gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) { - uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES); - - wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); - wa_ctx_emit(batch, index, 0); - - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, l3sqc4_flush); - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE)); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - - wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); - wa_ctx_emit(batch, index, 0); - - return index; -} + *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = 0; -static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx, - uint32_t offset, - uint32_t start_alignment) -{ - return wa_ctx->offset = ALIGN(offset, start_alignment); -} + *batch++ = MI_LOAD_REGISTER_IMM(1); + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; -static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, - uint32_t offset, - uint32_t size_alignment) -{ - wa_ctx->size = offset - wa_ctx->offset; + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); - WARN(wa_ctx->size % size_alignment, - "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n", - wa_ctx->size, size_alignment); - return 0; + *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = 0; + + return batch; } /* @@ -1028,42 +922,28 @@ static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together * makes a complete batch buffer. */ -static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t scratch_addr; - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaDisableCtxRestoreArbitration:bdw,chv */ - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE); + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ - if (IS_BROADWELL(engine->i915)) { - int rc = gen8_emit_flush_coherentl3_wa(engine, batch, index); - if (rc < 0) - return rc; - index = rc; - } + if (IS_BROADWELL(engine->i915)) + batch = gen8_emit_flush_coherentl3_wa(engine, batch); /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* Actual scratch location is at 128 bytes offset */ - scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - wa_ctx_emit(batch, index, scratch_addr); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES); /* Pad to end of cacheline */ - while (index % CACHELINE_DWORDS) - wa_ctx_emit(batch, index, MI_NOOP); + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; /* * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because @@ -1071,7 +951,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, * in the register CTX_RCS_INDIRECT_CTX */ - return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); + return batch; } /* @@ -1083,65 +963,40 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. */ -static int gen8_init_perctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaDisableCtxRestoreArbitration:bdw,chv */ - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE); - - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *batch++ = MI_BATCH_BUFFER_END; - return wa_ctx_end(wa_ctx, *offset = index, 1); + return batch; } -static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { - int ret; - struct drm_i915_private *dev_priv = engine->i915; - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ + batch = gen8_emit_flush_coherentl3_wa(engine, batch); - /* WaDisableCtxRestoreArbitration:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE); - - /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */ - ret = gen8_emit_flush_coherentl3_wa(engine, batch, index); - if (ret < 0) - return ret; - index = ret; - - /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl */ - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE( - GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE)); - wa_ctx_emit(batch, index, MI_NOOP); + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ + *batch++ = MI_LOAD_REGISTER_IMM(1); + *batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2); + *batch++ = _MASKED_BIT_DISABLE( + GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE); + *batch++ = MI_NOOP; /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { - u32 scratch_addr = - i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - wa_ctx_emit(batch, index, scratch_addr); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) { + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES); } - /* WaMediaPoolStateCmdInWABB:bxt */ + /* WaMediaPoolStateCmdInWABB:bxt,glk */ if (HAS_POOLED_EU(engine->i915)) { /* * EU pool configuration is setup along with golden context @@ -1156,73 +1011,37 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, * possible configurations, to avoid duplication they are * not shown here again. */ - u32 eu_pool_config = 0x00777000; - wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_STATE); - wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_ENABLE); - wa_ctx_emit(batch, index, eu_pool_config); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + *batch++ = GEN9_MEDIA_POOL_STATE; + *batch++ = GEN9_MEDIA_POOL_ENABLE; + *batch++ = 0x00777000; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; } /* Pad to end of cacheline */ - while (index % CACHELINE_DWORDS) - wa_ctx_emit(batch, index, MI_NOOP); + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; - return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); + return batch; } -static int gen9_init_perctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) { - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0); - wa_ctx_emit(batch, index, - _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING)); - wa_ctx_emit(batch, index, MI_NOOP); - } - - /* WaClearTdlStateAckDirtyBits:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_B0)) { - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(4)); + *batch++ = MI_BATCH_BUFFER_END; - wa_ctx_emit_reg(batch, index, GEN8_STATE_ACK); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE1); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE2); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN7_ROW_CHICKEN2); - /* dummy write to CS, mask bits are 0 to ensure the register is not modified */ - wa_ctx_emit(batch, index, 0x0); - wa_ctx_emit(batch, index, MI_NOOP); - } - - /* WaDisableCtxRestoreArbitration:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE); - - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); - - return wa_ctx_end(wa_ctx, *offset = index, 1); + return batch; } -static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) +#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) + +static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) { struct drm_i915_gem_object *obj; struct i915_vma *vma; int err; - obj = i915_gem_object_create(engine->i915, PAGE_ALIGN(size)); + obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1244,82 +1063,79 @@ err: return err; } -static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine) +static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) { i915_vma_unpin_and_release(&engine->wa_ctx.vma); } +typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); + static int intel_init_workaround_bb(struct intel_engine_cs *engine) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - uint32_t *batch; - uint32_t offset; + struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, + &wa_ctx->per_ctx }; + wa_bb_func_t wa_bb_fn[2]; struct page *page; + void *batch, *batch_ptr; + unsigned int i; int ret; - WARN_ON(engine->id != RCS); + if (WARN_ON(engine->id != RCS || !engine->scratch)) + return -EINVAL; - /* update this when WA for higher Gen are added */ - if (INTEL_GEN(engine->i915) > 9) { - DRM_ERROR("WA batch buffer is not initialized for Gen%d\n", - INTEL_GEN(engine->i915)); + switch (INTEL_GEN(engine->i915)) { + case 9: + wa_bb_fn[0] = gen9_init_indirectctx_bb; + wa_bb_fn[1] = gen9_init_perctx_bb; + break; + case 8: + wa_bb_fn[0] = gen8_init_indirectctx_bb; + wa_bb_fn[1] = gen8_init_perctx_bb; + break; + default: + MISSING_CASE(INTEL_GEN(engine->i915)); return 0; } - /* some WA perform writes to scratch page, ensure it is valid */ - if (!engine->scratch) { - DRM_ERROR("scratch page not allocated for %s\n", engine->name); - return -EINVAL; - } - - ret = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE); + ret = lrc_setup_wa_ctx(engine); if (ret) { DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret); return ret; } page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); - batch = kmap_atomic(page); - offset = 0; - - if (IS_GEN8(engine->i915)) { - ret = gen8_init_indirectctx_bb(engine, - &wa_ctx->indirect_ctx, - batch, - &offset); - if (ret) - goto out; - - ret = gen8_init_perctx_bb(engine, - &wa_ctx->per_ctx, - batch, - &offset); - if (ret) - goto out; - } else if (IS_GEN9(engine->i915)) { - ret = gen9_init_indirectctx_bb(engine, - &wa_ctx->indirect_ctx, - batch, - &offset); - if (ret) - goto out; + batch = batch_ptr = kmap_atomic(page); - ret = gen9_init_perctx_bb(engine, - &wa_ctx->per_ctx, - batch, - &offset); - if (ret) - goto out; + /* + * Emit the two workaround batch buffers, recording the offset from the + * start of the workaround batch buffer object for each and their + * respective sizes. + */ + for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { + wa_bb[i]->offset = batch_ptr - batch; + if (WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, CACHELINE_BYTES))) { + ret = -EINVAL; + break; + } + batch_ptr = wa_bb_fn[i](engine, batch_ptr); + wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); } -out: + BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); + kunmap_atomic(batch); if (ret) - lrc_destroy_wa_ctx_obj(engine); + lrc_destroy_wa_ctx(engine); return ret; } +static u32 port_seqno(struct execlist_port *port) +{ + return port->request ? port->request->global_seqno : 0; +} + static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1334,7 +1150,6 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); I915_WRITE(RING_HWS_PGA(engine->mmio_base), engine->status_page.ggtt_offset); @@ -1343,7 +1158,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name); /* After a GPU reset, we may have requests to replay */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); if (!execlists_elsp_idle(engine)) { + DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n", + engine->name, + port_seqno(&engine->execlist_port[0]), + port_seqno(&engine->execlist_port[1])); engine->execlist_port[0].count = 0; engine->execlist_port[1].count = 0; execlists_submit_ports(engine); @@ -1388,7 +1208,6 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - struct drm_i915_private *dev_priv = engine->i915; struct execlist_port *port = engine->execlist_port; struct intel_context *ce; @@ -1429,7 +1248,6 @@ static void reset_common_ring(struct intel_engine_cs *engine, return; /* Catch up with any missed context-switch interrupts */ - I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0)); if (request->ctx != port[0].request->ctx) { i915_gem_request_put(port[0].request); port[0] = port[1]; @@ -1440,42 +1258,42 @@ static void reset_common_ring(struct intel_engine_cs *engine, /* Reset WaIdleLiteRestore:bdw,skl as well */ request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; - int i, ret; + const int num_lri_cmds = GEN8_3LVL_PDPES * 2; + u32 *cs; + int i; - ret = intel_ring_begin(req, num_lri_cmds * 2 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, num_lri_cmds * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_lri_cmds)); - for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { + *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds); + for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, i)); - intel_ring_emit(ring, upper_32_bits(pd_daddr)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, i)); - intel_ring_emit(ring, lower_32_bits(pd_daddr)); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i)); + *cs++ = lower_32_bits(pd_daddr); } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } static int gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, - unsigned int dispatch_flags) + const unsigned int flags) { - struct intel_ring *ring = req->ring; - bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); + u32 *cs; int ret; /* Don't rely in hw updating PDPs, specially in lite-restore. @@ -1485,30 +1303,28 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * not idle). PML4 is allocated during ppgtt init so this is * not needed in 48-bit.*/ if (req->ctx->ppgtt && - (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) { - if (!USES_FULL_48BIT_PPGTT(req->i915) && - !intel_vgpu_active(req->i915)) { - ret = intel_logical_ring_emit_pdps(req); - if (ret) - return ret; - } + (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings) && + !i915_vm_is_48bit(&req->ctx->ppgtt->base) && + !intel_vgpu_active(req->i915)) { + ret = intel_logical_ring_emit_pdps(req); + if (ret) + return ret; req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | - (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) | + (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1529,13 +1345,11 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ring *ring = request->ring; - u32 cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(request, 4); - if (ret) - return ret; + cs = intel_ring_begin(request, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW + 1; @@ -1552,13 +1366,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) cmd |= MI_INVALIDATE_BSD; } - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, - I915_GEM_HWS_SCRATCH_ADDR | - MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ + intel_ring_advance(request, cs); return 0; } @@ -1566,13 +1378,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) static int gen8_emit_flush_render(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; u32 scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; - u32 flags = 0; - int ret; + u32 *cs, flags = 0; int len; flags |= PIPE_CONTROL_CS_STALL; @@ -1614,62 +1424,25 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, if (dc_flush_wa) len += 12; - ret = intel_ring_begin(request, len); - if (ret) - return ret; + cs = intel_ring_begin(request, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); - if (vf_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - } + if (vf_flush_wa) + cs = gen8_emit_pipe_control(cs, 0, 0); - if (dc_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, PIPE_CONTROL_DC_FLUSH_ENABLE); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - } + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - - if (dc_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, PIPE_CONTROL_CS_STALL); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - } + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); - intel_ring_advance(ring); + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); - return 0; -} + intel_ring_advance(request, cs); -static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) -{ - /* - * On BXT A steppings there is a HW coherency issue whereby the - * MI_STORE_DATA_IMM storing the completed request's seqno - * occasionally doesn't invalidate the CPU cache. Work around this by - * clflushing the corresponding cacheline whenever the caller wants - * the coherency to be guaranteed. Note that this cacheline is known - * to be clean at this point, since we only write it in - * bxt_a_set_seqno(), where we also do a clflush after the write. So - * this clflush in practice becomes an invalidate operation. - */ - intel_flush_status_page(engine, I915_GEM_HWS_INDEX); + return 0; } /* @@ -1677,34 +1450,34 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *out) +static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) { - *out++ = MI_NOOP; - *out++ = MI_NOOP; - request->wa_tail = intel_ring_offset(request->ring, out); + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + request->wa_tail = intel_ring_offset(request, cs); } -static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, - u32 *out) +static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *out++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; - *out++ = 0; - *out++ = request->global_seqno; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; - request->tail = intel_ring_offset(request->ring, out); + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = request->global_seqno; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + request->tail = intel_ring_offset(request, cs); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); - gen8_emit_wa_tail(request, out); + gen8_emit_wa_tail(request, cs); } static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, - u32 *out) + u32 *cs) { /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); @@ -1713,20 +1486,20 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE); - *out++ = intel_hws_seqno_address(request->engine); - *out++ = 0; - *out++ = request->global_seqno; + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = intel_hws_seqno_address(request->engine); + *cs++ = 0; + *cs++ = request->global_seqno; /* We're thrashing one dword of HWS. */ - *out++ = 0; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; - request->tail = intel_ring_offset(request->ring, out); + *cs++ = 0; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + request->tail = intel_ring_offset(request, cs); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); - gen8_emit_wa_tail(request, out); + gen8_emit_wa_tail(request, cs); } static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; @@ -1735,7 +1508,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) { int ret; - ret = intel_logical_ring_workarounds_emit(req); + ret = intel_ring_workarounds_emit(req); if (ret) return ret; @@ -1781,7 +1554,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); - lrc_destroy_wa_ctx_obj(engine); + lrc_destroy_wa_ctx(engine); engine->i915 = NULL; dev_priv->engine[engine->id] = NULL; kfree(engine); @@ -1819,8 +1592,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; engine->emit_bb_start = gen8_emit_bb_start; - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) - engine->irq_seqno_barrier = bxt_a_seqno_barrier; } static inline void @@ -1877,7 +1648,6 @@ logical_ring_setup(struct intel_engine_cs *engine) tasklet_init(&engine->irq_tasklet, intel_lrc_irq_handler, (unsigned long)engine); - logical_ring_init_platform_invariants(engine); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); } @@ -2015,105 +1785,89 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } -static void execlists_init_reg_state(u32 *reg_state, +static void execlists_init_reg_state(u32 *regs, struct i915_gem_context *ctx, struct intel_engine_cs *engine, struct intel_ring *ring) { struct drm_i915_private *dev_priv = engine->i915; struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt; + u32 base = engine->mmio_base; + bool rcs = engine->id == RCS; + + /* A context is actually a big batch buffer with several + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The + * values we are setting here are only for the first context restore: + * on a subsequent save, the GPU will recreate this batchbuffer with new + * values (including all the missing MI_LOAD_REGISTER_IMM commands that + * we are not initializing here). + */ + regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | + MI_LRI_FORCE_POSTED; + + CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine), + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | + (HAS_RESOURCE_STREAMER(dev_priv) ? + CTX_CTRL_RS_CTX_ENABLE : 0))); + CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); + CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); + CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); + CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), + RING_CTL_SIZE(ring->size) | RING_VALID); + CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); + CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); + CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); + CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); + CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); + CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); + if (rcs) { + CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); + CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); + CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, + RING_INDIRECT_CTX_OFFSET(base), 0); - /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM - * commands followed by (reg, value) pairs. The values we are setting here are - * only for the first context restore: on a subsequent save, the GPU will - * recreate this batchbuffer with new values (including all the missing - * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */ - reg_state[CTX_LRI_HEADER_0] = - MI_LOAD_REGISTER_IMM(engine->id == RCS ? 14 : 11) | MI_LRI_FORCE_POSTED; - ASSIGN_CTX_REG(reg_state, CTX_CONTEXT_CONTROL, - RING_CONTEXT_CONTROL(engine), - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - (HAS_RESOURCE_STREAMER(dev_priv) ? - CTX_CTRL_RS_CTX_ENABLE : 0))); - ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base), - 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base), - 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, - RING_START(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, - RING_CTL(engine->mmio_base), - RING_CTL_SIZE(ring->size) | RING_VALID); - ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, - RING_BBADDR_UDW(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, - RING_BBADDR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_BB_STATE, - RING_BBSTATE(engine->mmio_base), - RING_BB_PPGTT); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_U, - RING_SBBADDR_UDW(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_L, - RING_SBBADDR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_STATE, - RING_SBBSTATE(engine->mmio_base), 0); - if (engine->id == RCS) { - ASSIGN_CTX_REG(reg_state, CTX_BB_PER_CTX_PTR, - RING_BB_PER_CTX_PTR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX, - RING_INDIRECT_CTX(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, - RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0); if (engine->wa_ctx.vma) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - reg_state[CTX_RCS_INDIRECT_CTX+1] = - (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) | - (wa_ctx->indirect_ctx.size / CACHELINE_DWORDS); + regs[CTX_RCS_INDIRECT_CTX + 1] = + (ggtt_offset + wa_ctx->indirect_ctx.offset) | + (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = + regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] = intel_lr_indirect_ctx_offset(engine) << 6; - reg_state[CTX_BB_PER_CTX_PTR+1] = - (ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) | - 0x01; + regs[CTX_BB_PER_CTX_PTR + 1] = + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } } - reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; - ASSIGN_CTX_REG(reg_state, CTX_CTX_TIMESTAMP, - RING_CTX_TIMESTAMP(engine->mmio_base), 0); + + regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; + + CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); /* PDP values well be assigned later if needed */ - ASSIGN_CTX_REG(reg_state, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), - 0); - - if (ppgtt && USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0); + CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0); + CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0); + CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0); + CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0); + CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0); + CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0); + CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); + + if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and * other PDP Descriptors are ignored. */ - ASSIGN_CTX_PML4(ppgtt, reg_state); + ASSIGN_CTX_PML4(ppgtt, regs); } - if (engine->id == RCS) { - reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - make_rpcs(dev_priv)); + if (rcs) { + regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); + CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, + make_rpcs(dev_priv)); } } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 0c852c024227..5fc07761caff 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -68,8 +68,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int logical_render_ring_init(struct intel_engine_cs *engine); int logical_xcs_ring_init(struct intel_engine_cs *engine); -int intel_engines_init(struct drm_i915_private *dev_priv); - /* Logical Ring Contexts */ /* One extra page is added before LRC for GuC as shared data */ @@ -90,6 +88,5 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists); void intel_execlists_enable_submission(struct drm_i915_private *dev_priv); -bool intel_execlists_idle(struct drm_i915_private *dev_priv); #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index c300647ef604..71cbe9c08932 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -162,21 +162,8 @@ static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); unsigned long start = jiffies; - if (!lspcon->desc_valid) - return; - while (1) { - struct intel_dp_desc desc; - - /* - * The w/a only applies in PCON mode and we don't expect any - * AUX errors. - */ - if (!__intel_dp_read_desc(intel_dp, &desc)) - return; - - if (intel_digital_port_connected(dev_priv, dig_port) && - !memcmp(&intel_dp->desc, &desc, sizeof(desc))) { + if (intel_digital_port_connected(dev_priv, dig_port)) { DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n", jiffies_to_msecs(jiffies - start)); return; @@ -253,7 +240,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) return false; } - lspcon->desc_valid = intel_dp_read_desc(dp); + intel_dp_read_desc(dp); DRM_DEBUG_KMS("Success: LSPCON init\n"); return true; diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 9ca4dc4d2378..8b942ef2b3ec 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -91,12 +91,11 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -114,7 +113,7 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -1066,6 +1065,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); intel_encoder->type = INTEL_OUTPUT_LVDS; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->cloneable = 0; if (HAS_PCH_SPLIT(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index c787fc4e6eb9..92e461c68385 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -178,7 +178,7 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->table = skylake_mocs_table; result = true; @@ -191,7 +191,7 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, "Platform that should have a MOCS table does not.\n"); } - /* WaDisableSkipCaching:skl,bxt,kbl */ + /* WaDisableSkipCaching:skl,bxt,kbl,glk */ if (IS_GEN9(dev_priv)) { int i; @@ -276,23 +276,22 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) static int emit_mocs_control_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ring *ring = req->ring; enum intel_engine_id engine = req->engine->id; unsigned int index; - int ret; + u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - ret = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); - if (ret) - return ret; + cs = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES); for (index = 0; index < table->size; index++) { - intel_ring_emit_reg(ring, mocs_register(engine, index)); - intel_ring_emit(ring, table->table[index].control_value); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = table->table[index].control_value; } /* @@ -304,12 +303,12 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, * that value to all the used entries. */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { - intel_ring_emit_reg(ring, mocs_register(engine, index)); - intel_ring_emit(ring, table->table[0].control_value); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = table->table[0].control_value; } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -336,29 +335,27 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ring *ring = req->ring; unsigned int i; - int ret; + u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - ret = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); - if (ret) - return ret; + cs = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); for (i = 0; i < table->size/2; i++) { - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 2*i, 2*i+1)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 2 * i, 2 * i + 1); } if (table->size & 0x01) { /* Odd table size - 1 left over */ - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 2*i, 0)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 2 * i, 0); i++; } @@ -368,12 +365,12 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, * they are reserved by the hardware. */ for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 0, 0)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 0, 0); } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 0608fad7f593..5ef9f5bfb92c 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -267,8 +267,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_gem_request *req; - struct intel_ring *ring; - int ret; + u32 *cs; WARN_ON(overlay->active); WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); @@ -277,10 +276,10 @@ static int intel_overlay_on(struct intel_overlay *overlay) if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 4); - if (ret) { + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } overlay->active = true; @@ -288,12 +287,11 @@ static int intel_overlay_on(struct intel_overlay *overlay) if (IS_I830(dev_priv)) i830_overlay_clock_gating(dev_priv, false); - ring = req->ring; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); - intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_ON; + *cs++ = overlay->flip_addr | OFC_UPDATE; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return intel_overlay_do_wait_request(overlay, req, NULL); } @@ -326,10 +324,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay, { struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_gem_request *req; - struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; - u32 tmp; - int ret; + u32 tmp, *cs; WARN_ON(!overlay->active); @@ -345,16 +341,15 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 2); - if (ret) { + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(ring, flip_addr); - intel_ring_advance(ring); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; + *cs++ = flip_addr; + intel_ring_advance(req, cs); intel_overlay_flip_prepare(overlay, vma); @@ -408,9 +403,7 @@ static void intel_overlay_off_tail(struct i915_gem_active *active, static int intel_overlay_off(struct intel_overlay *overlay) { struct drm_i915_gem_request *req; - struct intel_ring *ring; - u32 flip_addr = overlay->flip_addr; - int ret; + u32 *cs, flip_addr = overlay->flip_addr; WARN_ON(!overlay->active); @@ -424,25 +417,23 @@ static int intel_overlay_off(struct intel_overlay *overlay) if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 6); - if (ret) { + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - /* wait for overlay to go idle */ - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(ring, flip_addr); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; + *cs++ = flip_addr; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; /* turn overlay off */ - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); - intel_ring_emit(ring, flip_addr); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_OFF; + *cs++ = flip_addr; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); intel_overlay_flip_prepare(overlay, NULL); @@ -465,6 +456,7 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) static int intel_overlay_release_old_vid(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; + u32 *cs; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -478,23 +470,20 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ struct drm_i915_gem_request *req; - struct intel_ring *ring; req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 2); - if (ret) { + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - intel_ring_emit(ring, - MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); ret = intel_overlay_do_wait_request(overlay, req, intel_overlay_release_old_vid_tail); diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 1a6ff26dea20..cb50c527401f 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1315,7 +1315,7 @@ static u32 i9xx_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) if (IS_PINEVIEW(dev_priv)) clock = KHz(dev_priv->rawclk_freq); else - clock = KHz(dev_priv->cdclk_freq); + clock = KHz(dev_priv->cdclk.hw.cdclk); return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 32); } @@ -1333,7 +1333,7 @@ static u32 i965_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) if (IS_G4X(dev_priv)) clock = KHz(dev_priv->rawclk_freq); else - clock = KHz(dev_priv->cdclk_freq); + clock = KHz(dev_priv->cdclk.hw.cdclk); return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 128); } diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 5aa524e32df7..9fd9c70baeed 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -80,7 +80,7 @@ static int i915_pipe_crc_release(struct inode *inode, struct file *filep) static int pipe_crc_data_count(struct intel_pipe_crc *pipe_crc) { - assert_spin_locked(&pipe_crc->lock); + lockdep_assert_held(&pipe_crc->lock); return CIRC_CNT(pipe_crc->head, pipe_crc->tail, INTEL_PIPE_CRC_ENTRIES_NR); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 249623d45be0..99e09f63d4b3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -65,12 +65,12 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_CONFIG0, I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); - /* WaEnableChickenDCPR:skl,bxt,kbl */ + /* WaEnableChickenDCPR:skl,bxt,kbl,glk */ I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */ - /* WaFbcWakeMemOn:skl,bxt,kbl */ + /* WaFbcWakeMemOn:skl,bxt,kbl,glk */ I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS | DISP_FBC_MEMORY_WAKE); @@ -99,9 +99,31 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) * Wa: Backlight PWM may stop in the asserted state, causing backlight * to stay fully on. */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | - PWM1_GATING_DIS | PWM2_GATING_DIS); + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + PWM1_GATING_DIS | PWM2_GATING_DIS); +} + +static void glk_init_clock_gating(struct drm_i915_private *dev_priv) +{ + gen9_init_clock_gating(dev_priv); + + /* + * WaDisablePWMClockGating:glk + * Backlight PWM may stop in the asserted state, causing backlight + * to stay fully on. + */ + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + PWM1_GATING_DIS | PWM2_GATING_DIS); + + /* WaDDIIOTimeout:glk */ + if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) { + u32 val = I915_READ(CHICKEN_MISC_2); + val &= ~(GLK_CL0_PWR_DOWN | + GLK_CL1_PWR_DOWN | + GLK_CL2_PWR_DOWN); + I915_WRITE(CHICKEN_MISC_2, val); + } + } static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) @@ -355,6 +377,8 @@ static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enabl return false; } + trace_intel_memory_cxsr(dev_priv, was_enabled, enable); + DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n", enableddisabled(enable), enableddisabled(was_enabled)); @@ -393,15 +417,15 @@ static const int pessimal_latency_ns = 5000; #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) -static int vlv_get_fifo_size(struct intel_plane *plane) +static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - int sprite0_start, sprite1_start, size; - - if (plane->id == PLANE_CURSOR) - return 63; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; + enum pipe pipe = crtc->pipe; + int sprite0_start, sprite1_start; - switch (plane->pipe) { + switch (pipe) { uint32_t dsparb, dsparb2, dsparb3; case PIPE_A: dsparb = I915_READ(DSPARB); @@ -422,26 +446,21 @@ static int vlv_get_fifo_size(struct intel_plane *plane) sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20); break; default: - return 0; - } - - switch (plane->id) { - case PLANE_PRIMARY: - size = sprite0_start; - break; - case PLANE_SPRITE0: - size = sprite1_start - sprite0_start; - break; - case PLANE_SPRITE1: - size = 512 - 1 - sprite1_start; - break; - default: - return 0; + MISSING_CASE(pipe); + return; } - DRM_DEBUG_KMS("%s FIFO size: %d\n", plane->base.name, size); + fifo_state->plane[PLANE_PRIMARY] = sprite0_start; + fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start; + fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start; + fifo_state->plane[PLANE_CURSOR] = 63; - return size; + DRM_DEBUG_KMS("Pipe %c FIFO size: %d/%d/%d/%d\n", + pipe_name(pipe), + fifo_state->plane[PLANE_PRIMARY], + fifo_state->plane[PLANE_SPRITE0], + fifo_state->plane[PLANE_SPRITE1], + fifo_state->plane[PLANE_CURSOR]); } static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane) @@ -871,6 +890,8 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv, enum pipe pipe; for_each_pipe(dev_priv, pipe) { + trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm); + I915_WRITE(VLV_DDL(pipe), (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) | (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) | @@ -941,12 +962,6 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv, #undef FW_WM_VLV -enum vlv_wm_level { - VLV_WM_LEVEL_PM2, - VLV_WM_LEVEL_PM5, - VLV_WM_LEVEL_DDR_DVFS, -}; - /* latency must be in 0.1us units. */ static unsigned int vlv_wm_method2(unsigned int pixel_rate, unsigned int pipe_htotal, @@ -1017,71 +1032,114 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, return min_t(int, wm, USHRT_MAX); } -static void vlv_compute_fifo(struct intel_crtc *crtc) +static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes) { - struct drm_device *dev = crtc->base.dev; - struct vlv_wm_state *wm_state = &crtc->wm_state; - struct intel_plane *plane; - unsigned int total_rate = 0; - const int fifo_size = 512 - 1; + return (active_planes & (BIT(PLANE_SPRITE0) | + BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1); +} + +static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + const struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2]; + struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; + unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); + int num_active_planes = hweight32(active_planes); + const int fifo_size = 511; int fifo_extra, fifo_left = fifo_size; + int sprite0_fifo_extra = 0; + unsigned int total_rate; + enum plane_id plane_id; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - struct intel_plane_state *state = - to_intel_plane_state(plane->base.state); + /* + * When enabling sprite0 after sprite1 has already been enabled + * we tend to get an underrun unless sprite0 already has some + * FIFO space allcoated. Hence we always allocate at least one + * cacheline for sprite0 whenever sprite1 is enabled. + * + * All other plane enable sequences appear immune to this problem. + */ + if (vlv_need_sprite0_fifo_workaround(active_planes)) + sprite0_fifo_extra = 1; - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) - continue; + total_rate = raw->plane[PLANE_PRIMARY] + + raw->plane[PLANE_SPRITE0] + + raw->plane[PLANE_SPRITE1] + + sprite0_fifo_extra; - if (state->base.visible) { - wm_state->num_active_planes++; - total_rate += state->base.fb->format->cpp[0]; - } - } + if (total_rate > fifo_size) + return -EINVAL; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - struct intel_plane_state *state = - to_intel_plane_state(plane->base.state); - unsigned int rate; + if (total_rate == 0) + total_rate = 1; - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { - plane->wm.fifo_size = 63; - continue; - } + for_each_plane_id_on_crtc(crtc, plane_id) { + unsigned int rate; - if (!state->base.visible) { - plane->wm.fifo_size = 0; + if ((active_planes & BIT(plane_id)) == 0) { + fifo_state->plane[plane_id] = 0; continue; } - rate = state->base.fb->format->cpp[0]; - plane->wm.fifo_size = fifo_size * rate / total_rate; - fifo_left -= plane->wm.fifo_size; + rate = raw->plane[plane_id]; + fifo_state->plane[plane_id] = fifo_size * rate / total_rate; + fifo_left -= fifo_state->plane[plane_id]; } - fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1); + fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra; + fifo_left -= sprite0_fifo_extra; + + fifo_state->plane[PLANE_CURSOR] = 63; + + fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1); /* spread the remainder evenly */ - for_each_intel_plane_on_crtc(dev, crtc, plane) { + for_each_plane_id_on_crtc(crtc, plane_id) { int plane_extra; if (fifo_left == 0) break; - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) - continue; - - /* give it all to the first plane if none are active */ - if (plane->wm.fifo_size == 0 && - wm_state->num_active_planes) + if ((active_planes & BIT(plane_id)) == 0) continue; plane_extra = min(fifo_extra, fifo_left); - plane->wm.fifo_size += plane_extra; + fifo_state->plane[plane_id] += plane_extra; fifo_left -= plane_extra; } - WARN_ON(fifo_left != 0); + WARN_ON(active_planes != 0 && fifo_left != 0); + + /* give it all to the first plane if none are active */ + if (active_planes == 0) { + WARN_ON(fifo_left != fifo_size); + fifo_state->plane[PLANE_PRIMARY] = fifo_left; + } + + return 0; +} + +static int vlv_num_wm_levels(struct drm_i915_private *dev_priv) +{ + return dev_priv->wm.max_level + 1; +} + +/* mark all levels starting from 'level' as invalid */ +static void vlv_invalidate_wms(struct intel_crtc *crtc, + struct vlv_wm_state *wm_state, int level) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + for (; level < vlv_num_wm_levels(dev_priv); level++) { + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) + wm_state->wm[level].plane[plane_id] = USHRT_MAX; + + wm_state->sr[level].cursor = USHRT_MAX; + wm_state->sr[level].plane = USHRT_MAX; + } } static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) @@ -1092,136 +1150,213 @@ static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) return fifo_size - wm; } -static void vlv_invert_wms(struct intel_crtc *crtc) +/* + * Starting from 'level' set all higher + * levels to 'value' in the "raw" watermarks. + */ +static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, + int level, enum plane_id plane_id, u16 value) { - struct vlv_wm_state *wm_state = &crtc->wm_state; - int level; - - for (level = 0; level < wm_state->num_levels; level++) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const int sr_fifo_size = - INTEL_INFO(dev_priv)->num_pipes * 512 - 1; - struct intel_plane *plane; + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + int num_levels = vlv_num_wm_levels(dev_priv); + bool dirty = false; - wm_state->sr[level].plane = - vlv_invert_wm_value(wm_state->sr[level].plane, - sr_fifo_size); - wm_state->sr[level].cursor = - vlv_invert_wm_value(wm_state->sr[level].cursor, - 63); + for (; level < num_levels; level++) { + struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; - for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { - wm_state->wm[level].plane[plane->id] = - vlv_invert_wm_value(wm_state->wm[level].plane[plane->id], - plane->wm.fifo_size); - } + dirty |= raw->plane[plane_id] != value; + raw->plane[plane_id] = value; } + + return dirty; } -static void vlv_compute_wm(struct intel_crtc *crtc) +static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct vlv_wm_state *wm_state = &crtc->wm_state; - struct intel_plane *plane; + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + enum plane_id plane_id = plane->id; + int num_levels = vlv_num_wm_levels(to_i915(plane->base.dev)); int level; + bool dirty = false; + + if (!plane_state->base.visible) { + dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); + goto out; + } + + for (level = 0; level < num_levels; level++) { + struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + int wm = vlv_compute_wm_level(crtc_state, plane_state, level); + int max_wm = plane_id == PLANE_CURSOR ? 63 : 511; + + if (wm > max_wm) + break; - memset(wm_state, 0, sizeof(*wm_state)); + dirty |= raw->plane[plane_id] != wm; + raw->plane[plane_id] = wm; + } - wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed; - wm_state->num_levels = dev_priv->wm.max_level + 1; + /* mark all higher levels as invalid */ + dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX); - wm_state->num_active_planes = 0; +out: + if (dirty) + DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n", + plane->base.name, + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); - vlv_compute_fifo(crtc); + return dirty; +} - if (wm_state->num_active_planes != 1) - wm_state->cxsr = false; +static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, + enum plane_id plane_id, int level) +{ + const struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[level]; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; - for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { - struct intel_plane_state *state = + return raw->plane[plane_id] <= fifo_state->plane[plane_id]; +} + +static bool vlv_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level) +{ + return vlv_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) && + vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) && + vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) && + vlv_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); +} + +static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_atomic_state *state = + to_intel_atomic_state(crtc_state->base.state); + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + int num_active_planes = hweight32(crtc_state->active_planes & + ~BIT(PLANE_CURSOR)); + bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base); + struct intel_plane_state *plane_state; + struct intel_plane *plane; + enum plane_id plane_id; + int level, ret, i; + unsigned int dirty = 0; + + for_each_intel_plane_in_state(state, plane, plane_state, i) { + const struct intel_plane_state *old_plane_state = to_intel_plane_state(plane->base.state); - int level; - if (!state->base.visible) + if (plane_state->base.crtc != &crtc->base && + old_plane_state->base.crtc != &crtc->base) continue; - /* normal watermarks */ - for (level = 0; level < wm_state->num_levels; level++) { - int wm = vlv_compute_wm_level(crtc->config, state, level); - int max_wm = plane->wm.fifo_size; + if (vlv_plane_wm_compute(crtc_state, plane_state)) + dirty |= BIT(plane->id); + } - /* hack */ - if (WARN_ON(level == 0 && wm > max_wm)) - wm = max_wm; + /* + * DSPARB registers may have been reset due to the + * power well being turned off. Make sure we restore + * them to a consistent state even if no primary/sprite + * planes are initially active. + */ + if (needs_modeset) + crtc_state->fifo_changed = true; - if (wm > max_wm) - break; + if (!dirty) + return 0; - wm_state->wm[level].plane[plane->id] = wm; - } + /* cursor changes don't warrant a FIFO recompute */ + if (dirty & ~BIT(PLANE_CURSOR)) { + const struct intel_crtc_state *old_crtc_state = + to_intel_crtc_state(crtc->base.state); + const struct vlv_fifo_state *old_fifo_state = + &old_crtc_state->wm.vlv.fifo_state; - wm_state->num_levels = level; + ret = vlv_compute_fifo(crtc_state); + if (ret) + return ret; - if (!wm_state->cxsr) - continue; + if (needs_modeset || + memcmp(old_fifo_state, fifo_state, + sizeof(*fifo_state)) != 0) + crtc_state->fifo_changed = true; + } - /* maxfifo watermarks */ - if (plane->id == PLANE_CURSOR) { - for (level = 0; level < wm_state->num_levels; level++) - wm_state->sr[level].cursor = - wm_state->wm[level].plane[PLANE_CURSOR]; - } else { - for (level = 0; level < wm_state->num_levels; level++) - wm_state->sr[level].plane = - max(wm_state->sr[level].plane, - wm_state->wm[level].plane[plane->id]); + /* initially allow all levels */ + wm_state->num_levels = vlv_num_wm_levels(dev_priv); + /* + * Note that enabling cxsr with no primary/sprite planes + * enabled can wedge the pipe. Hence we only allow cxsr + * with exactly one enabled primary/sprite plane. + */ + wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1; + + for (level = 0; level < wm_state->num_levels; level++) { + const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; + + if (!vlv_crtc_wm_is_valid(crtc_state, level)) + break; + + for_each_plane_id_on_crtc(crtc, plane_id) { + wm_state->wm[level].plane[plane_id] = + vlv_invert_wm_value(raw->plane[plane_id], + fifo_state->plane[plane_id]); } - } - /* clear any (partially) filled invalid levels */ - for (level = wm_state->num_levels; level < dev_priv->wm.max_level + 1; level++) { - memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level])); - memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level])); + wm_state->sr[level].plane = + vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY], + raw->plane[PLANE_SPRITE0], + raw->plane[PLANE_SPRITE1]), + sr_fifo_size); + + wm_state->sr[level].cursor = + vlv_invert_wm_value(raw->plane[PLANE_CURSOR], + 63); } - vlv_invert_wms(crtc); + if (level == 0) + return -EINVAL; + + /* limit to only levels we can actually handle */ + wm_state->num_levels = level; + + /* invalidate the higher levels */ + vlv_invalidate_wms(crtc, wm_state, level); + + return 0; } #define VLV_FIFO(plane, value) \ (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) -static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) +static void vlv_atomic_update_fifo(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *plane; - int sprite0_start = 0, sprite1_start = 0, fifo_size = 0; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + int sprite0_start, sprite1_start, fifo_size; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - switch (plane->id) { - case PLANE_PRIMARY: - sprite0_start = plane->wm.fifo_size; - break; - case PLANE_SPRITE0: - sprite1_start = sprite0_start + plane->wm.fifo_size; - break; - case PLANE_SPRITE1: - fifo_size = sprite1_start + plane->wm.fifo_size; - break; - case PLANE_CURSOR: - WARN_ON(plane->wm.fifo_size != 63); - break; - default: - MISSING_CASE(plane->id); - break; - } - } + if (!crtc_state->fifo_changed) + return; + + sprite0_start = fifo_state->plane[PLANE_PRIMARY]; + sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start; + fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start; - WARN_ON(fifo_size != 512 - 1); + WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63); + WARN_ON(fifo_size != 511); - DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n", - pipe_name(crtc->pipe), sprite0_start, - sprite1_start, fifo_size); + trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size); spin_lock(&dev_priv->wm.dsparb_lock); @@ -1289,6 +1424,46 @@ static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) #undef VLV_FIFO +static int vlv_compute_intermediate_wm(struct drm_device *dev, + struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) +{ + struct vlv_wm_state *intermediate = &crtc_state->wm.vlv.intermediate; + const struct vlv_wm_state *optimal = &crtc_state->wm.vlv.optimal; + const struct vlv_wm_state *active = &crtc->wm.active.vlv; + int level; + + intermediate->num_levels = min(optimal->num_levels, active->num_levels); + intermediate->cxsr = optimal->cxsr && active->cxsr && + !crtc_state->disable_cxsr; + + for (level = 0; level < intermediate->num_levels; level++) { + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) { + intermediate->wm[level].plane[plane_id] = + min(optimal->wm[level].plane[plane_id], + active->wm[level].plane[plane_id]); + } + + intermediate->sr[level].plane = min(optimal->sr[level].plane, + active->sr[level].plane); + intermediate->sr[level].cursor = min(optimal->sr[level].cursor, + active->sr[level].cursor); + } + + vlv_invalidate_wms(crtc, intermediate, level); + + /* + * If our intermediate WM are identical to the final WM, then we can + * omit the post-vblank programming; only update if it's different. + */ + if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0) + crtc_state->wm.need_postvbl_update = true; + + return 0; +} + static void vlv_merge_wm(struct drm_i915_private *dev_priv, struct vlv_wm_values *wm) { @@ -1299,7 +1474,7 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, wm->cxsr = true; for_each_intel_crtc(&dev_priv->drm, crtc) { - const struct vlv_wm_state *wm_state = &crtc->wm_state; + const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; if (!crtc->active) continue; @@ -1318,14 +1493,11 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, wm->level = VLV_WM_LEVEL_PM2; for_each_intel_crtc(&dev_priv->drm, crtc) { - struct vlv_wm_state *wm_state = &crtc->wm_state; + const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; enum pipe pipe = crtc->pipe; - if (!crtc->active) - continue; - wm->pipe[pipe] = wm_state->wm[wm->level]; - if (wm->cxsr) + if (crtc->active && wm->cxsr) wm->sr = wm_state->sr[wm->level]; wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2; @@ -1345,22 +1517,15 @@ static bool is_enabling(int old, int new, int threshold) return old < threshold && new >= threshold; } -static void vlv_update_wm(struct intel_crtc *crtc) +static void vlv_program_watermarks(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum pipe pipe = crtc->pipe; struct vlv_wm_values *old_wm = &dev_priv->wm.vlv; struct vlv_wm_values new_wm = {}; - vlv_compute_wm(crtc); vlv_merge_wm(dev_priv, &new_wm); - if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) { - /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(crtc); - + if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) return; - } if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS)) chv_set_memory_dvfs(dev_priv, false); @@ -1371,17 +1536,8 @@ static void vlv_update_wm(struct intel_crtc *crtc) if (is_disabling(old_wm->cxsr, new_wm.cxsr, true)) _intel_set_memory_cxsr(dev_priv, false); - /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(crtc); - vlv_write_wm_values(dev_priv, &new_wm); - DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, " - "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n", - pipe_name(pipe), new_wm.pipe[pipe].plane[PLANE_PRIMARY], new_wm.pipe[pipe].plane[PLANE_CURSOR], - new_wm.pipe[pipe].plane[PLANE_SPRITE0], new_wm.pipe[pipe].plane[PLANE_SPRITE1], - new_wm.sr.plane, new_wm.sr.cursor, new_wm.level, new_wm.cxsr); - if (is_enabling(old_wm->cxsr, new_wm.cxsr, true)) _intel_set_memory_cxsr(dev_priv, true); @@ -1394,6 +1550,33 @@ static void vlv_update_wm(struct intel_crtc *crtc) *old_wm = new_wm; } +static void vlv_initial_watermarks(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + + mutex_lock(&dev_priv->wm.wm_mutex); + crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate; + vlv_program_watermarks(dev_priv); + mutex_unlock(&dev_priv->wm.wm_mutex); +} + +static void vlv_optimize_watermarks(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + + if (!crtc_state->wm.need_postvbl_update) + return; + + mutex_lock(&dev_priv->wm.wm_mutex); + intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; + vlv_program_watermarks(dev_priv); + mutex_unlock(&dev_priv->wm.wm_mutex); +} + #define single_plane_enabled(mask) is_power_of_2(mask) static void g4x_update_wm(struct intel_crtc *crtc) @@ -1701,39 +1884,6 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) I915_WRITE(FW_BLC, fwater_lo); } -uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) -{ - uint32_t pixel_rate; - - pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; - - /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to - * adjust the pixel_rate here. */ - - if (pipe_config->pch_pfit.enabled) { - uint64_t pipe_w, pipe_h, pfit_w, pfit_h; - uint32_t pfit_size = pipe_config->pch_pfit.size; - - pipe_w = pipe_config->pipe_src_w; - pipe_h = pipe_config->pipe_src_h; - - pfit_w = (pfit_size >> 16) & 0xFFFF; - pfit_h = pfit_size & 0xFFFF; - if (pipe_w < pfit_w) - pipe_w = pfit_w; - if (pipe_h < pfit_h) - pipe_h = pfit_h; - - if (WARN_ON(!pfit_w || !pfit_h)) - return pixel_rate; - - pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, - pfit_w * pfit_h); - } - - return pixel_rate; -} - /* latency must be in 0.1us units. */ static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) { @@ -1807,12 +1957,12 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, cpp = pstate->base.fb->format->cpp[0]; - method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); + method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); if (!is_lp) return method1; - method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + method2 = ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, drm_rect_width(&pstate->base.dst), cpp, mem_value); @@ -1836,8 +1986,8 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, cpp = pstate->base.fb->format->cpp[0]; - method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); - method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); + method2 = ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, drm_rect_width(&pstate->base.dst), cpp, mem_value); @@ -1852,20 +2002,24 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, const struct intel_plane_state *pstate, uint32_t mem_value) { + int cpp; + /* - * We treat the cursor plane as always-on for the purposes of watermark - * calculation. Until we have two-stage watermark programming merged, - * this is necessary to avoid flickering. + * Treat cursor with fb as always visible since cursor updates + * can happen faster than the vrefresh rate, and the current + * watermark code doesn't handle that correctly. Cursor updates + * which set/clear the fb or change the cursor size are going + * to get throttled by intel_legacy_cursor_update() to work + * around this problem with the watermark code. */ - int cpp = 4; - int width = pstate->base.visible ? pstate->base.crtc_w : 64; - - if (!cstate->base.active) + if (!cstate->base.active || !pstate->base.fb) return 0; - return ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + cpp = pstate->base.fb->format->cpp[0]; + + return ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, - width, cpp, mem_value); + pstate->base.crtc_w, cpp, mem_value); } /* Only for WM_LP. */ @@ -2095,7 +2249,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) return 0; if (WARN_ON(adjusted_mode->crtc_clock == 0)) return 0; - if (WARN_ON(intel_state->cdclk == 0)) + if (WARN_ON(intel_state->cdclk.logical.cdclk == 0)) return 0; /* The WM are computed with base on how long it takes to fill a single @@ -2104,7 +2258,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, adjusted_mode->crtc_clock); ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, - intel_state->cdclk); + intel_state->cdclk.logical.cdclk); return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | PIPE_WM_LINETIME_TIME(linetime); @@ -2173,7 +2327,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, } /* - * WaWmMemoryReadLatency:skl + * WaWmMemoryReadLatency:skl,glk * * punit doesn't take into account the read latency so we need * to add 2us to the various latency levels we retrieve from the @@ -2498,8 +2652,8 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ - if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) == 0) - newstate->wm.need_postvbl_update = false; + if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0) + newstate->wm.need_postvbl_update = true; return 0; } @@ -2895,8 +3049,7 @@ static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || - IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) return true; return false; @@ -3547,7 +3700,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst * Adjusted plane pixel rate is just the pipe's adjusted pixel rate * with additional adjustments for plane-specific scaling. */ - adjusted_pixel_rate = ilk_pipe_pixel_rate(cstate); + adjusted_pixel_rate = cstate->pixel_rate; downscale_amount = skl_plane_downscale_amount(pstate); pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; @@ -3775,7 +3928,7 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate) if (!cstate->base.active) return 0; - pixel_rate = ilk_pipe_pixel_rate(cstate); + pixel_rate = cstate->pixel_rate; if (WARN_ON(pixel_rate == 0)) return 0; @@ -4539,15 +4692,11 @@ void vlv_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct vlv_wm_values *wm = &dev_priv->wm.vlv; - struct intel_plane *plane; - enum pipe pipe; + struct intel_crtc *crtc; u32 val; vlv_read_wm_values(dev_priv, wm); - for_each_intel_plane(dev, plane) - plane->wm.fifo_size = vlv_get_fifo_size(plane); - wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; wm->level = VLV_WM_LEVEL_PM2; @@ -4585,18 +4734,107 @@ void vlv_wm_get_hw_state(struct drm_device *dev) mutex_unlock(&dev_priv->rps.hw_lock); } - for_each_pipe(dev_priv, pipe) + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct vlv_wm_state *active = &crtc->wm.active.vlv; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + enum pipe pipe = crtc->pipe; + enum plane_id plane_id; + int level; + + vlv_get_fifo_size(crtc_state); + + active->num_levels = wm->level + 1; + active->cxsr = wm->cxsr; + + for (level = 0; level < active->num_levels; level++) { + struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[level]; + + active->sr[level].plane = wm->sr.plane; + active->sr[level].cursor = wm->sr.cursor; + + for_each_plane_id_on_crtc(crtc, plane_id) { + active->wm[level].plane[plane_id] = + wm->pipe[pipe].plane[plane_id]; + + raw->plane[plane_id] = + vlv_invert_wm_value(active->wm[level].plane[plane_id], + fifo_state->plane[plane_id]); + } + } + + for_each_plane_id_on_crtc(crtc, plane_id) + vlv_raw_plane_wm_set(crtc_state, level, + plane_id, USHRT_MAX); + vlv_invalidate_wms(crtc, active, level); + + crtc_state->wm.vlv.optimal = *active; + crtc_state->wm.vlv.intermediate = *active; + DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", pipe_name(pipe), wm->pipe[pipe].plane[PLANE_PRIMARY], wm->pipe[pipe].plane[PLANE_CURSOR], wm->pipe[pipe].plane[PLANE_SPRITE0], wm->pipe[pipe].plane[PLANE_SPRITE1]); + } DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); } +void vlv_wm_sanitize(struct drm_i915_private *dev_priv) +{ + struct intel_plane *plane; + struct intel_crtc *crtc; + + mutex_lock(&dev_priv->wm.wm_mutex); + + for_each_intel_plane(&dev_priv->drm, plane) { + struct intel_crtc *crtc = + intel_get_crtc_for_pipe(dev_priv, plane->pipe); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + enum plane_id plane_id = plane->id; + int level; + + if (plane_state->base.visible) + continue; + + for (level = 0; level < wm_state->num_levels; level++) { + struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[level]; + + raw->plane[plane_id] = 0; + + wm_state->wm[level].plane[plane_id] = + vlv_invert_wm_value(raw->plane[plane_id], + fifo_state->plane[plane_id]); + } + } + + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + crtc_state->wm.vlv.intermediate = + crtc_state->wm.vlv.optimal; + crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; + } + + vlv_program_watermarks(dev_priv); + + mutex_unlock(&dev_priv->wm.wm_mutex); +} + void ilk_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -4680,7 +4918,7 @@ bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val) { u16 rgvswctl; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); rgvswctl = I915_READ16(MEMSWCTL); if (rgvswctl & MEMCTL_CMD_STS) { @@ -4891,6 +5129,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) break; } + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(dev_priv)) + goto skip_hw_write; + I915_WRITE(GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(dev_priv, ei_up)); I915_WRITE(GEN6_RP_UP_THRESHOLD, @@ -4911,6 +5155,7 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); +skip_hw_write: dev_priv->rps.power = new_power; dev_priv->rps.up_threshold = threshold_up; dev_priv->rps.down_threshold = threshold_down; @@ -4934,16 +5179,8 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) /* gen6_set_rps is called to update the frequency request, but should also be * called when the range (min_delay and max_delay) is modified so that we can * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ -static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) +static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - return; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq); - WARN_ON(val < dev_priv->rps.min_freq); - /* min/max delay may still have been modified so be sure to * write the limits value. */ @@ -4969,17 +5206,15 @@ static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - POSTING_READ(GEN6_RPNSWREQ); - dev_priv->rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); + + return 0; } -static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) +static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) { - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq); - WARN_ON(val < dev_priv->rps.min_freq); + int err; if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), "Odd GPU freq value\n")) @@ -4988,13 +5223,17 @@ static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); if (val != dev_priv->rps.cur_freq) { - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - if (!IS_CHERRYVIEW(dev_priv)) - gen6_set_rps_thresholds(dev_priv, val); + err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + if (err) + return err; + + gen6_set_rps_thresholds(dev_priv, val); } dev_priv->rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); + + return 0; } /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down @@ -5007,6 +5246,7 @@ static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { u32 val = dev_priv->rps.idle_freq; + int err; if (dev_priv->rps.cur_freq <= val) return; @@ -5024,14 +5264,19 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) * power than the render powerwell. */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); - valleyview_set_rps(dev_priv, val); + err = valleyview_set_rps(dev_priv, val); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); + + if (err) + DRM_ERROR("Failed to set RPS for idle\n"); } void gen6_rps_busy(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->rps.hw_lock); if (dev_priv->rps.enabled) { + u8 freq; + if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, @@ -5039,11 +5284,17 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) gen6_enable_rps_interrupts(dev_priv); - /* Ensure we start at the user's desired frequency */ - intel_set_rps(dev_priv, - clamp(dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit)); + /* Use the user's desired frequency as a guide, but for better + * performance, jump directly to RPe as our starting frequency. + */ + freq = max(dev_priv->rps.cur_freq, + dev_priv->rps.efficient_freq); + + if (intel_set_rps(dev_priv, + clamp(freq, + dev_priv->rps.min_freq_softlimit, + dev_priv->rps.max_freq_softlimit))) + DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); } mutex_unlock(&dev_priv->rps.hw_lock); } @@ -5111,12 +5362,25 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, spin_unlock(&dev_priv->rps.client_lock); } -void intel_set_rps(struct drm_i915_private *dev_priv, u8 val) +int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { + int err; + + lockdep_assert_held(&dev_priv->rps.hw_lock); + GEM_BUG_ON(val > dev_priv->rps.max_freq); + GEM_BUG_ON(val < dev_priv->rps.min_freq); + + if (!dev_priv->rps.enabled) { + dev_priv->rps.cur_freq = val; + return 0; + } + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - valleyview_set_rps(dev_priv, val); + err = valleyview_set_rps(dev_priv, val); else - gen6_set_rps(dev_priv, val); + err = gen6_set_rps(dev_priv, val); + + return err; } static void gen9_disable_rc6(struct drm_i915_private *dev_priv) @@ -5294,7 +5558,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + IS_GEN9_BC(dev_priv)) { u32 ddcc_status = 0; if (sandybridge_pcode_read(dev_priv, @@ -5307,7 +5571,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.max_freq); } - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ @@ -5320,7 +5584,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) } static void reset_rps(struct drm_i915_private *dev_priv, - void (*set)(struct drm_i915_private *, u8)) + int (*set)(struct drm_i915_private *, u8)) { u8 freq = dev_priv->rps.cur_freq; @@ -5328,7 +5592,8 @@ static void reset_rps(struct drm_i915_private *dev_priv, dev_priv->rps.power = -1; dev_priv->rps.cur_freq = -1; - set(dev_priv, freq); + if (set(dev_priv, freq)) + DRM_ERROR("Failed to reset RPS to initial values\n"); } /* See the Gen9_GT_PM_Programming_Guide doc for the below */ @@ -5336,22 +5601,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) { intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - /* - * BIOS could leave the Hw Turbo enabled, so need to explicitly - * clear out the Control register just to avoid inconsitency - * with debugfs interface, which will show Turbo as enabled - * only and that is not expected by the User after adding the - * WaGsvDisableTurbo. Apart from this there is no problem even - * if the Turbo is left enabled in the Control register, as the - * Up/Down interrupts would remain masked. - */ - gen9_disable_rps(dev_priv); - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - return; - } - /* Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RC_VIDEO_FREQ, GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); @@ -5411,18 +5660,9 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) if (intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); - /* WaRsUseTimeoutMode:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */ - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - rc6_mask); - } else { - I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_EI_MODE(1) | - rc6_mask); - } + I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + I915_WRITE(GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_EI_MODE(1) | rc6_mask); /* * 3b: Enable Coarse Power Gating only when RC6 is enabled. @@ -5637,7 +5877,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) /* convert DDR frequency from units of 266.6MHz to bandwidth */ min_ring_freq = mult_frac(min_ring_freq, 8, 3); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; @@ -5655,7 +5895,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) int diff = max_gpu_freq - gpu_freq; unsigned int ia_freq = 0, ring_freq = 0; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* * ring_freq = 2 * GT. ring_freq is in 100MHz units * No floor required for ring frequency on SKL. @@ -5739,6 +5979,17 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) return rp1; } +static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpn; + + val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); + rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & + FB_GFX_FREQ_FUSE_MASK); + + return rpn; +} + static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) { u32 val, rp1; @@ -5975,8 +6226,7 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), dev_priv->rps.rp1_freq); - /* PUnit validated range is only [RPe, RP0] */ - dev_priv->rps.min_freq = dev_priv->rps.efficient_freq; + dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); @@ -6199,7 +6449,7 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) unsigned long now = jiffies_to_msecs(jiffies), diff1; int i; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); diff1 = now - dev_priv->ips.last_time1; @@ -6304,7 +6554,7 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) u64 now, diff, diffms; u32 count; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); now = ktime_get_raw_ns(); diffms = now - dev_priv->ips.last_time2; @@ -6349,7 +6599,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) unsigned long t, corr, state1, corr2, state2; u32 pxvid, ext_v; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); pxvid = (pxvid >> 24) & 0x7f; @@ -6775,7 +7025,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); @@ -7260,6 +7510,14 @@ static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv) | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); lpt_init_clock_gating(dev_priv); + + /* WaDisableDopClockGating:bdw + * + * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP + * clock gating. + */ + I915_WRITE(GEN6_UCGCTL1, + I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); } static void haswell_init_clock_gating(struct drm_i915_private *dev_priv) @@ -7656,8 +7914,10 @@ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) dev_priv->display.init_clock_gating = skylake_init_clock_gating; else if (IS_KABYLAKE(dev_priv)) dev_priv->display.init_clock_gating = kabylake_init_clock_gating; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_BROXTON(dev_priv)) dev_priv->display.init_clock_gating = bxt_init_clock_gating; + else if (IS_GEMINILAKE(dev_priv)) + dev_priv->display.init_clock_gating = glk_init_clock_gating; else if (IS_BROADWELL(dev_priv)) dev_priv->display.init_clock_gating = broadwell_init_clock_gating; else if (IS_CHERRYVIEW(dev_priv)) @@ -7727,7 +7987,11 @@ void intel_init_pm(struct drm_i915_private *dev_priv) } } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_setup_wm_latency(dev_priv); - dev_priv->display.update_wm = vlv_update_wm; + dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm; + dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm; + dev_priv->display.initial_watermarks = vlv_initial_watermarks; + dev_priv->display.optimize_watermarks = vlv_optimize_watermarks; + dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo; } else if (IS_PINEVIEW(dev_priv)) { if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), dev_priv->is_ddr3, @@ -7916,10 +8180,10 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, * @timeout_base_ms: timeout for polling with preemption enabled * * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE - * reports an error or an overall timeout of @timeout_base_ms+10 ms expires. + * reports an error or an overall timeout of @timeout_base_ms+50 ms expires. * The request is acknowledged once the PCODE reply dword equals @reply after * applying @reply_mask. Polling is first attempted with preemption enabled - * for @timeout_base_ms and if this times out for another 10 ms with + * for @timeout_base_ms and if this times out for another 50 ms with * preemption disabled. * * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some @@ -7955,14 +8219,15 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, * worst case) _and_ PCODE was busy for some reason even after a * (queued) request and @timeout_base_ms delay. As a workaround retry * the poll with preemption disabled to maximize the number of - * requests. Increase the timeout from @timeout_base_ms to 10ms to + * requests. Increase the timeout from @timeout_base_ms to 50ms to * account for interrupts that could reduce the number of these - * requests. + * requests, and for any quirks of the PCODE firmware that delays + * the request completion. */ DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); WARN_ON_ONCE(timeout_base_ms > 3); preempt_disable(); - ret = wait_for_atomic(COND, 10); + ret = wait_for_atomic(COND, 50); preempt_enable(); out: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 91bc4abf5d3e..4a864f8c9387 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -39,7 +39,7 @@ */ #define LEGACY_REQUEST_SIZE 200 -int __intel_ring_space(int head, int tail, int size) +static int __intel_ring_space(int head, int tail, int size) { int space = head - tail; if (space <= 0) @@ -61,22 +61,20 @@ void intel_ring_update_space(struct intel_ring *ring) static int gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - u32 cmd; - int ret; + u32 cmd, *cs; cmd = MI_FLUSH; if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -84,9 +82,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - u32 cmd; - int ret; + u32 cmd, *cs; /* * read/write caches: @@ -123,13 +119,13 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) cmd |= MI_INVALIDATE_ISP; } - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -174,35 +170,33 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - int ret; - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); /* low dword */ - intel_ring_emit(ring, 0); /* high dword */ - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + u32 *cs; + + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; /* low dword */ + *cs++ = 0; /* high dword */ + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -210,10 +204,9 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) static int gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; + u32 *cs, flags = 0; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ @@ -247,15 +240,15 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -263,20 +256,17 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = 0; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -284,11 +274,9 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) static int gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; - int ret; + u32 *cs, flags = 0; /* * Ensure that any following seqno writes only happen when the render @@ -332,37 +320,15 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen7_render_ring_cs_stall_wa(req); } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); - - return 0; -} - -static int -gen8_emit_pipe_control(struct drm_i915_gem_request *req, - u32 flags, u32 scratch_addr) -{ - struct intel_ring *ring = req->ring; - int ret; - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -370,12 +336,14 @@ gen8_emit_pipe_control(struct drm_i915_gem_request *req, static int gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; - int ret; + u32 flags; + u32 *cs; - flags |= PIPE_CONTROL_CS_STALL; + cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + flags = PIPE_CONTROL_CS_STALL; if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; @@ -394,15 +362,19 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ - ret = gen8_emit_pipe_control(req, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD, - 0); - if (ret) - return ret; + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD, + 0); } - return gen8_emit_pipe_control(req, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, + i915_ggtt_offset(req->engine->scratch) + + 2 * CACHELINE_BYTES); + + intel_ring_advance(req, cs); + + return 0; } static void ring_setup_phys_status_page(struct intel_engine_cs *engine) @@ -657,41 +629,6 @@ static void reset_ring_common(struct intel_engine_cs *engine, } } -static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) -{ - struct intel_ring *ring = req->ring; - struct i915_workarounds *w = &req->i915->workarounds; - int ret, i; - - if (w->count == 0) - return 0; - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - ret = intel_ring_begin(req, (w->count * 2 + 2)); - if (ret) - return ret; - - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); - for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(ring, w->reg[i].addr); - intel_ring_emit(ring, w->reg[i].value); - } - intel_ring_emit(ring, MI_NOOP); - - intel_ring_advance(ring); - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); - - return 0; -} - static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) { int ret; @@ -707,498 +644,6 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) return 0; } -static int wa_add(struct drm_i915_private *dev_priv, - i915_reg_t addr, - const u32 mask, const u32 val) -{ - const u32 idx = dev_priv->workarounds.count; - - if (WARN_ON(idx >= I915_MAX_WA_REGS)) - return -ENOSPC; - - dev_priv->workarounds.reg[idx].addr = addr; - dev_priv->workarounds.reg[idx].value = val; - dev_priv->workarounds.reg[idx].mask = mask; - - dev_priv->workarounds.count++; - - return 0; -} - -#define WA_REG(addr, mask, val) do { \ - const int r = wa_add(dev_priv, (addr), (mask), (val)); \ - if (r) \ - return r; \ - } while (0) - -#define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) - -#define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) - -#define WA_SET_FIELD_MASKED(addr, mask, value) \ - WA_REG(addr, mask, _MASKED_FIELD(mask, value)) - -#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) -#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) - -#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) - -static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, - i915_reg_t reg) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct i915_workarounds *wa = &dev_priv->workarounds; - const uint32_t index = wa->hw_whitelist_count[engine->id]; - - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) - return -EINVAL; - - WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); - wa->hw_whitelist_count[engine->id]++; - - return 0; -} - -static int gen8_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); - - /* WaDisableAsyncFlipPerfMode:bdw,chv */ - WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); - - /* WaDisablePartialInstShootdown:bdw,chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - /* WaForceEnableNonCoherent:bdw,chv */ - /* WaHdcDisableFetchWhenMasked:bdw,chv */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_DONOT_FETCH_MEM_WHEN_MASKED | - HDC_FORCE_NON_COHERENT); - - /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: - * "The Hierarchical Z RAW Stall Optimization allows non-overlapping - * polygons in the same 8x4 pixel/sample area to be processed without - * stalling waiting for the earlier ones to write to Hierarchical Z - * buffer." - * - * This optimization is off by default for BDW and CHV; turn it on. - */ - WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); - - /* Wa4x4STCOptimizationDisable:bdw,chv */ - WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4); - - return 0; -} - -static int bdw_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* WaDisableDopClockGating:bdw */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); - - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - WA_SET_BIT_MASKED(HDC_CHICKEN0, - /* WaForceContextSaveRestoreNonCoherent:bdw */ - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); - - return 0; -} - -static int chv_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* Improve HiZ throughput on CHV. */ - WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); - - return 0; -} - -static int gen9_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl */ - I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); - - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl */ - I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); - - /* WaDisableKillLogic:bxt,skl,kbl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - ECOCHK_DIS_TLB); - - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl */ - /* WaDisablePartialInstShootdown:skl,bxt,kbl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - - /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_DG_MIRROR_FIX_ENABLE); - - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, - GEN9_RHWO_OPTIMIZATION_DISABLE); - /* - * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set - * but we do that in per ctx batchbuffer as there is an issue - * with this register not getting restored on ctx restore - */ - } - - /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_GPGPU_PREEMPTION); - - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl */ - /* WaDisablePartialResolveInVc:skl,bxt,kbl */ - WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | - GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); - - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl */ - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); - - /* WaDisableMaskBasedCammingInRCC:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, - PIXEL_MASK_CAMMING_DISABLE); - - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); - - /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are - * both tied to WaForceContextSaveRestoreNonCoherent - * in some hsds for skl. We keep the tie for all gen9. The - * documentation is a bit hazy and so we want to get common behaviour, - * even though there is no clear evidence we would need both on kbl/bxt. - * This area has been source of system hangs so we play it safe - * and mimic the skl regardless of what bspec says. - * - * Use Force Non-Coherent whenever executing a 3D context. This - * is a workaround for a possible hang in the unlikely event - * a TLB invalidation occurs during a PSD flush. - */ - - /* WaForceEnableNonCoherent:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT); - - /* WaDisableHDCInvalidation:skl,bxt,kbl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - BDW_DISABLE_HDC_INVALIDATION); - - /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ - if (IS_SKYLAKE(dev_priv) || - IS_KABYLAKE(dev_priv) || - IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); - - /* WaOCLCoherentLineFlush:skl,bxt,kbl */ - I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_FLUSH_COHERENT_LINES)); - - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt */ - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); - if (ret) - return ret; - - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; - - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); - if (ret) - return ret; - - return 0; -} - -static int skl_tune_iz_hashing(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - u8 vals[3] = { 0, 0, 0 }; - unsigned int i; - - for (i = 0; i < 3; i++) { - u8 ss; - - /* - * Only consider slices where one, and only one, subslice has 7 - * EUs - */ - if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) - continue; - - /* - * subslice_7eu[i] != 0 (because of the check above) and - * ss_max == 4 (maximum number of subslices possible per slice) - * - * -> 0 <= ss <= 3; - */ - ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; - vals[i] = 3 - ss; - } - - if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) - return 0; - - /* Tune IZ hashing. See intel_device_info_runtime_init() */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN9_IZ_HASHING_MASK(2) | - GEN9_IZ_HASHING_MASK(1) | - GEN9_IZ_HASHING_MASK(0), - GEN9_IZ_HASHING(2, vals[2]) | - GEN9_IZ_HASHING(1, vals[1]) | - GEN9_IZ_HASHING(0, vals[0])); - - return 0; -} - -static int skl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* - * Actual WA is to disable percontext preemption granularity control - * until D0 which is the default case so this is equivalent to - * !WaDisablePerCtxtPreemptionGranularityControl:skl - */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - - /* WaEnableGapsTsvCreditFix:skl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableGafsUnitClkGating:skl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - /* WaDisableLSQCROPERFforOCL:skl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return skl_tune_iz_hashing(engine); -} - -static int bxt_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaStoreMultiplePTEenable:bxt */ - /* This is a requirement according to Hardware specification */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); - - /* WaSetClckGatingDisableMedia:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); - } - - /* WaDisableThreadStallDopClockGating:bxt */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); - - /* WaDisablePooledEuLoadBalancingFix:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { - WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, - GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); - } - - /* WaDisableSbeCacheDispatchPortSharing:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - } - - /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ - /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ - /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ - /* WaDisableLSQCROPERFforOCL:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); - if (ret) - return ret; - - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - } - - /* WaProgramL3SqcReg1DefaultForPerf:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); - - /* WaToEnableHwFixForPushConstHWBug:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaInPlaceDecompressionHang:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - return 0; -} - -static int kbl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:kbl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) - WA_SET_BIT(GAMT_CHKN_BIT_REG, - GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); - - /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FENCE_DEST_SLM_DISABLE); - - /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableGafsUnitClkGating:kbl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaDisableSbeCacheDispatchPortSharing:kbl */ - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - /* WaInPlaceDecompressionHang:kbl */ - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - /* WaDisableLSQCROPERFforOCL:kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return 0; -} - -int init_workarounds_ring(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WARN_ON(engine->id != RCS); - - dev_priv->workarounds.count = 0; - dev_priv->workarounds.hw_whitelist_count[RCS] = 0; - - if (IS_BROADWELL(dev_priv)) - return bdw_init_workarounds(engine); - - if (IS_CHERRYVIEW(dev_priv)) - return chv_init_workarounds(engine); - - if (IS_SKYLAKE(dev_priv)) - return skl_init_workarounds(engine); - - if (IS_BROXTON(dev_priv)) - return bxt_init_workarounds(engine); - - if (IS_KABYLAKE(dev_priv)) - return kbl_init_workarounds(engine); - - return 0; -} - static int init_render_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1257,7 +702,7 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) i915_vma_unpin_and_release(&dev_priv->semaphore); } -static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; @@ -1268,23 +713,22 @@ static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); - *out++ = lower_32_bits(gtt_offset); - *out++ = upper_32_bits(gtt_offset); - *out++ = req->global_seqno; - *out++ = 0; - *out++ = (MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - *out++ = 0; + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL; + *cs++ = lower_32_bits(gtt_offset); + *cs++ = upper_32_bits(gtt_offset); + *cs++ = req->global_seqno; + *cs++ = 0; + *cs++ = MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id); + *cs++ = 0; } - return out; + return cs; } -static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; @@ -1295,19 +739,19 @@ static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *out++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; - *out++ = upper_32_bits(gtt_offset); - *out++ = req->global_seqno; - *out++ = (MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - *out++ = 0; + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; + *cs++ = upper_32_bits(gtt_offset); + *cs++ = req->global_seqno; + *cs++ = MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id); + *cs++ = 0; } - return out; + return cs; } -static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *engine; @@ -1322,16 +766,16 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { - *out++ = MI_LOAD_REGISTER_IMM(1); - *out++ = i915_mmio_reg_offset(mbox_reg); - *out++ = req->global_seqno; + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(mbox_reg); + *cs++ = req->global_seqno; num_rings++; } } if (num_rings & 1) - *out++ = MI_NOOP; + *cs++ = MI_NOOP; - return out; + return cs; } static void i9xx_submit_request(struct drm_i915_gem_request *request) @@ -1340,18 +784,19 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) i915_gem_request_submit(request); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); I915_WRITE_TAIL(request->engine, request->tail); } -static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) +static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) { - *out++ = MI_STORE_DWORD_INDEX; - *out++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; - *out++ = req->global_seqno; - *out++ = MI_USER_INTERRUPT; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; + *cs++ = req->global_seqno; + *cs++ = MI_USER_INTERRUPT; - req->tail = intel_ring_offset(req->ring, out); + req->tail = intel_ring_offset(req, cs); + GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); } static const int i9xx_emit_breadcrumb_sz = 4; @@ -1364,34 +809,33 @@ static const int i9xx_emit_breadcrumb_sz = 4; * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) +static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) { return i9xx_emit_breadcrumb(req, - req->engine->semaphore.signal(req, out)); + req->engine->semaphore.signal(req, cs)); } static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) + u32 *cs) { struct intel_engine_cs *engine = req->engine; if (engine->semaphore.signal) - out = engine->semaphore.signal(req, out); - - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE); - *out++ = intel_hws_seqno_address(engine); - *out++ = 0; - *out++ = req->global_seqno; + cs = engine->semaphore.signal(req, cs); + + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = intel_hws_seqno_address(engine); + *cs++ = 0; + *cs++ = req->global_seqno; /* We're thrashing one dword of HWS. */ - *out++ = 0; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; + *cs++ = 0; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; - req->tail = intel_ring_offset(req->ring, out); + req->tail = intel_ring_offset(req, cs); + GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); } static const int gen8_render_emit_breadcrumb_sz = 8; @@ -1408,24 +852,21 @@ static int gen8_ring_sync_to(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id); struct i915_hw_ppgtt *ppgtt; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_SAD_GTE_SDD); - intel_ring_emit(ring, signal->global_seqno); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_advance(ring); + *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = signal->global_seqno; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + intel_ring_advance(req, cs); /* When the !RCS engines idle waiting upon a semaphore, they lose their * pagetables and we must reload them before executing the batch. @@ -1442,28 +883,27 @@ static int gen6_ring_sync_to(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal) { - struct intel_ring *ring = req->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; - int ret; + u32 *cs; WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, dw1 | wait_mbox); + *cs++ = dw1 | wait_mbox; /* Throughout all of the GEM code, seqno passed implies our current * seqno is >= the last seqno executed. However for hardware the * comparison is strictly greater than. */ - intel_ring_emit(ring, signal->global_seqno - 1); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = signal->global_seqno - 1; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1564,16 +1004,15 @@ i8xx_irq_disable(struct intel_engine_cs *engine) static int bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1639,20 +1078,16 @@ i965_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - MI_BATCH_GTT | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965)); - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & + I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -1666,59 +1101,56 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - u32 cs_offset = i915_ggtt_offset(req->engine->scratch); - int ret; + u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch); - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Evict the invalid PTE TLBs */ - intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); - intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */ - intel_ring_emit(ring, cs_offset); - intel_ring_emit(ring, 0xdeadbeef); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; + *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ + *cs++ = cs_offset; + *cs++ = 0xdeadbeef; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) return -ENOSPC; - ret = intel_ring_begin(req, 6 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 6 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Blit the batch (which has now all relocs applied) to the * stable batch scratch bo area (so that the CS never * stumbles over its tlb invalidation bug) ... */ - intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(ring, - BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); - intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096); - intel_ring_emit(ring, cs_offset); - intel_ring_emit(ring, 4096); - intel_ring_emit(ring, offset); - - intel_ring_emit(ring, MI_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; + *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; + *cs++ = cs_offset; + *cs++ = 4096; + *cs++ = offset; + + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); /* ... and execute it. */ offset = cs_offset; } - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : + MI_BATCH_NON_SECURE); + intel_ring_advance(req, cs); return 0; } @@ -1728,17 +1160,16 @@ i915_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : + MI_BATCH_NON_SECURE); + intel_ring_advance(req, cs); return 0; } @@ -1985,7 +1416,7 @@ intel_ring_free(struct intel_ring *ring) kfree(ring); } -static int context_pin(struct i915_gem_context *ctx, unsigned int flags) +static int context_pin(struct i915_gem_context *ctx) { struct i915_vma *vma = ctx->engine[RCS].state; int ret; @@ -2000,7 +1431,8 @@ static int context_pin(struct i915_gem_context *ctx, unsigned int flags) return ret; } - return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags); + return i915_vma_pin(vma, 0, I915_GTT_MIN_ALIGNMENT, + PIN_GLOBAL | PIN_HIGH); } static int intel_ring_context_pin(struct intel_engine_cs *engine, @@ -2015,13 +1447,7 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, return 0; if (ce->state) { - unsigned int flags; - - flags = 0; - if (i915_gem_context_is_kernel(ctx)) - flags = PIN_HIGH; - - ret = context_pin(ctx, flags); + ret = context_pin(ctx); if (ret) goto error; } @@ -2152,7 +1578,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) static int ring_request_alloc(struct drm_i915_gem_request *request) { - int ret; + u32 *cs; GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); @@ -2165,9 +1591,9 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) GEM_BUG_ON(!request->engine->buffer); request->ring = request->engine->buffer; - ret = intel_ring_begin(request, 0); - if (ret) - return ret; + cs = intel_ring_begin(request, 0); + if (IS_ERR(cs)) + return PTR_ERR(cs); request->reserved_space -= LEGACY_REQUEST_SIZE; return 0; @@ -2222,7 +1648,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) return 0; } -int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) +u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { struct intel_ring *ring = req->ring; int remain_actual = ring->size - ring->tail; @@ -2230,6 +1656,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) int bytes = num_dwords * sizeof(u32); int total_bytes, wait_bytes; bool need_wrap = false; + u32 *cs; total_bytes = bytes + req->reserved_space; @@ -2256,7 +1683,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) if (wait_bytes > ring->space) { int ret = wait_for_space(req, wait_bytes); if (unlikely(ret)) - return ret; + return ERR_PTR(ret); } if (unlikely(need_wrap)) { @@ -2269,31 +1696,34 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ring->space -= remain_actual; } + GEM_BUG_ON(ring->tail > ring->size - bytes); + cs = ring->vaddr + ring->tail; + ring->tail += bytes; ring->space -= bytes; GEM_BUG_ON(ring->space < 0); - return 0; + + return cs; } /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; int num_dwords = - (ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); - int ret; + (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + u32 *cs; if (num_dwords == 0) return 0; num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; - ret = intel_ring_begin(req, num_dwords); - if (ret) - return ret; + cs = intel_ring_begin(req, num_dwords); + if (IS_ERR(cs)) + return PTR_ERR(cs); while (num_dwords--) - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -2337,13 +1767,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - uint32_t cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW; if (INTEL_GEN(req->i915) >= 8) @@ -2365,16 +1793,16 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ } else { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -2383,23 +1811,21 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; bool ppgtt = USES_PPGTT(req->i915) && !(dispatch_flags & I915_DISPATCH_SECURE); - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & + I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -2409,22 +1835,19 @@ hsw_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | + (dispatch_flags & I915_DISPATCH_RS ? + MI_BATCH_RESOURCE_STREAMER : 0); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -2434,20 +1857,17 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965)); + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE_I965); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -2456,13 +1876,11 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - uint32_t cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW; if (INTEL_GEN(req->i915) >= 8) @@ -2483,17 +1901,16 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) */ if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, - I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ } else { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 79c2b8d72322..0ef491df5b4e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,7 @@ #include "i915_gem_batch_pool.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_selftest.h" #define I915_CMD_HASH_ORDER 9 @@ -144,6 +145,7 @@ struct intel_ring { u32 head; u32 tail; + int space; int size; int effective_size; @@ -184,26 +186,26 @@ struct i915_ctx_workarounds { struct drm_i915_gem_request; struct intel_render_state; +/* + * Engine IDs definitions. + * Keep instances of the same type engine together. + */ +enum intel_engine_id { + RCS = 0, + BCS, + VCS, + VCS2, +#define _VCS(n) (VCS + (n)) + VECS +}; + struct intel_engine_cs { struct drm_i915_private *i915; const char *name; - enum intel_engine_id { - RCS = 0, - BCS, - VCS, - VCS2, /* Keep instances of the same type engine together. */ - VECS - } id; -#define _VCS(n) (VCS + (n)) + enum intel_engine_id id; unsigned int exec_id; - enum intel_engine_hw_id { - RCS_HW = 0, - VCS_HW, - BCS_HW, - VECS_HW, - VCS2_HW - } hw_id; - enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */ + unsigned int hw_id; + unsigned int guc_id; u32 mmio_base; unsigned int irq_shift; struct intel_ring *buffer; @@ -211,6 +213,11 @@ struct intel_engine_cs { struct intel_render_state *render_state; + atomic_t irq_count; + unsigned long irq_posted; +#define ENGINE_IRQ_BREADCRUMB 0 +#define ENGINE_IRQ_EXECLIST 1 + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the @@ -228,22 +235,22 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */ - bool irq_posted; + spinlock_t irq_lock; /* protects irq_*; irqsafe */ + struct intel_wait *irq_wait; /* oldest waiter by retirement */ - spinlock_t lock; /* protects the lists of requests; irqsafe */ + spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ - struct intel_wait *first_wait; /* oldest waiter by retirement */ struct task_struct *signaler; /* used for fence signalling */ - struct drm_i915_gem_request *first_signal; + struct drm_i915_gem_request __rcu *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ - unsigned long timeout; + unsigned int hangcheck_interrupts; + bool irq_armed : 1; bool irq_enabled : 1; - bool rpm_wakelock : 1; + I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; /* @@ -285,7 +292,7 @@ struct intel_engine_cs { #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) void (*emit_breadcrumb)(struct drm_i915_gem_request *req, - u32 *out); + u32 *cs); int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into @@ -368,7 +375,7 @@ struct intel_engine_cs { /* AKA wait() */ int (*sync_to)(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal); - u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out); + u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); } semaphore; /* Execlists */ @@ -376,13 +383,11 @@ struct intel_engine_cs { struct execlist_port { struct drm_i915_gem_request *request; unsigned int count; + GEM_DEBUG_DECL(u32 context_id); } execlist_port[2]; struct rb_root execlist_queue; struct rb_node *execlist_first; unsigned int fw_domains; - bool disable_lite_restore_wa; - bool preempt_wa; - u32 ctx_desc_template; /* Contexts are pinned whilst they are active on the GPU. The last * context executed remains active whilst the GPU is idle - the @@ -492,21 +497,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); -int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -static inline void intel_ring_emit(struct intel_ring *ring, u32 data) -{ - *(uint32_t *)(ring->vaddr + ring->tail) = data; - ring->tail += 4; -} +u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n); -static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg) -{ - intel_ring_emit(ring, i915_mmio_reg_offset(reg)); -} - -static inline void intel_ring_advance(struct intel_ring *ring) +static inline void +intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) { /* Dummy function. * @@ -516,16 +512,18 @@ static inline void intel_ring_advance(struct intel_ring *ring) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ + GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); } -static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) +static inline u32 +intel_ring_offset(struct drm_i915_gem_request *req, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - ring->vaddr; - return offset & (ring->size - 1); + u32 offset = addr - req->ring->vaddr; + GEM_BUG_ON(offset > req->ring->size); + return offset & (req->ring->size - 1); } -int __intel_ring_space(int head, int tail, int size); void intel_ring_update_space(struct intel_ring *ring); void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); @@ -558,10 +556,11 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) * wtih serialising this hint with anything, so document it as * a hint and nothing more. */ - return READ_ONCE(engine->timeline->last_submitted_seqno); + return READ_ONCE(engine->timeline->seqno); } int init_workarounds_ring(struct intel_engine_cs *engine); +int intel_ring_workarounds_emit(struct drm_i915_gem_request *req); void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); @@ -583,12 +582,51 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) +static inline void intel_wait_init(struct intel_wait *wait, + struct drm_i915_gem_request *rq) +{ + wait->tsk = current; + wait->request = rq; +} + +static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) { wait->tsk = current; wait->seqno = seqno; } +static inline bool intel_wait_has_seqno(const struct intel_wait *wait) +{ + return wait->seqno; +} + +static inline bool +intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) +{ + wait->seqno = seqno; + return intel_wait_has_seqno(wait); +} + +static inline bool +intel_wait_update_request(struct intel_wait *wait, + const struct drm_i915_gem_request *rq) +{ + return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq)); +} + +static inline bool +intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) +{ + return wait->seqno == seqno; +} + +static inline bool +intel_wait_check_request(const struct intel_wait *wait, + const struct drm_i915_gem_request *rq) +{ + return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq)); +} + static inline bool intel_wait_complete(const struct intel_wait *wait) { return RB_EMPTY_NODE(&wait->node); @@ -599,38 +637,36 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_enable_signaling(struct drm_i915_gem_request *request); +void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { - return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh); + return READ_ONCE(engine->breadcrumbs.irq_wait); } -static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) +unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); +#define ENGINE_WAKEUP_WAITER BIT(0) +#define ENGINE_WAKEUP_ASLEEP BIT(1) + +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); + +void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); + +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) { - bool wakeup = false; + memset(batch, 0, 6 * sizeof(u32)); - /* Note that for this not to dangerously chase a dangling pointer, - * we must hold the rcu_read_lock here. - * - * Also note that tsk is likely to be in !TASK_RUNNING state so an - * early test for tsk->state != TASK_RUNNING before wake_up_process() - * is unlikely to be beneficial. - */ - if (intel_engine_has_waiter(engine)) { - struct task_struct *tsk; - - rcu_read_lock(); - tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); - if (tsk) - wakeup = wake_up_process(tsk); - rcu_read_unlock(); - } + batch[0] = GFX_OP_PIPE_CONTROL(6); + batch[1] = flags; + batch[2] = offset; - return wakeup; + return batch + 6; } -void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915); +bool intel_engine_is_idle(struct intel_engine_cs *engine); +bool intel_engines_are_idle(struct drm_i915_private *dev_priv); #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index c0b7e95b5b8e..012bc358a33a 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -49,19 +49,6 @@ * present for a given platform. */ -#define for_each_power_well(i, power_well, domain_mask, power_domains) \ - for (i = 0; \ - i < (power_domains)->power_well_count && \ - ((power_well) = &(power_domains)->power_wells[i]); \ - i++) \ - for_each_if ((power_well)->domains & (domain_mask)) - -#define for_each_power_well_rev(i, power_well, domain_mask, power_domains) \ - for (i = (power_domains)->power_well_count - 1; \ - i >= 0 && ((power_well) = &(power_domains)->power_wells[i]);\ - i--) \ - for_each_if ((power_well)->domains & (domain_mask)) - bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, int power_well_id); @@ -106,6 +93,16 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "PORT_DDI_D_LANES"; case POWER_DOMAIN_PORT_DDI_E_LANES: return "PORT_DDI_E_LANES"; + case POWER_DOMAIN_PORT_DDI_A_IO: + return "PORT_DDI_A_IO"; + case POWER_DOMAIN_PORT_DDI_B_IO: + return "PORT_DDI_B_IO"; + case POWER_DOMAIN_PORT_DDI_C_IO: + return "PORT_DDI_C_IO"; + case POWER_DOMAIN_PORT_DDI_D_IO: + return "PORT_DDI_D_IO"; + case POWER_DOMAIN_PORT_DDI_E_IO: + return "PORT_DDI_E_IO"; case POWER_DOMAIN_PORT_DSI: return "PORT_DSI"; case POWER_DOMAIN_PORT_CRT: @@ -198,19 +195,15 @@ static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains; struct i915_power_well *power_well; bool is_enabled; - int i; if (dev_priv->pm.suspended) return false; - power_domains = &dev_priv->power_domains; - is_enabled = true; - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { + for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain)) { if (power_well->always_on) continue; @@ -385,124 +378,121 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, } #define SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_E_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_A_E_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_E_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_D_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \ BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DPIO_CMN_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_INIT)) -#define GLK_DISPLAY_DDI_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define GLK_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_INIT)) -#define GLK_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO)) +#define GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO)) +#define GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO)) #define GLK_DPIO_CMN_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DPIO_CMN_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DPIO_CMN_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DC_OFF_POWER_DOMAINS ( \ GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) static void assert_can_enable_dc9(struct drm_i915_private *dev_priv) { @@ -732,7 +722,7 @@ gen9_sanitize_power_well_requests(struct drm_i915_private *dev_priv, * other request bits to be set, so WARN for those. */ if (power_well_id == SKL_DISP_PW_1 || - ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && + (IS_GEN9_BC(dev_priv) && power_well_id == SKL_DISP_PW_MISC_IO)) DRM_DEBUG_DRIVER("Clearing auxiliary requests for %s forced on " "by DMC\n", power_well->name); @@ -847,14 +837,14 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv, static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - hsw_set_power_well(dev_priv, power_well, power_well->count > 0); - - /* - * We're taking over the BIOS, so clear any requests made by it since - * the driver is in charge now. - */ - if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) + /* Take over the request bit if set by BIOS. */ + if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) { + if (!(I915_READ(HSW_PWR_WELL_DRIVER) & + HSW_PWR_WELL_ENABLE_REQUEST)) + I915_WRITE(HSW_PWR_WELL_DRIVER, + HSW_PWR_WELL_ENABLE_REQUEST); I915_WRITE(HSW_PWR_WELL_BIOS, 0); + } } static void hsw_power_well_enable(struct drm_i915_private *dev_priv, @@ -881,10 +871,17 @@ static bool skl_power_well_enabled(struct drm_i915_private *dev_priv, static void skl_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - skl_set_power_well(dev_priv, power_well, power_well->count > 0); + uint32_t mask = SKL_POWER_WELL_REQ(power_well->id); + uint32_t bios_req = I915_READ(HSW_PWR_WELL_BIOS); + + /* Take over the request bit if set by BIOS. */ + if (bios_req & mask) { + uint32_t drv_req = I915_READ(HSW_PWR_WELL_DRIVER); - /* Clear any request made by BIOS as driver is taking over */ - I915_WRITE(HSW_PWR_WELL_BIOS, 0); + if (!(drv_req & mask)) + I915_WRITE(HSW_PWR_WELL_DRIVER, drv_req | mask); + I915_WRITE(HSW_PWR_WELL_BIOS, bios_req & ~mask); + } } static void skl_power_well_enable(struct drm_i915_private *dev_priv, @@ -917,16 +914,6 @@ static bool bxt_dpio_cmn_power_well_enabled(struct drm_i915_private *dev_priv, return bxt_ddi_phy_is_enabled(dev_priv, power_well->data); } -static void bxt_dpio_cmn_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - if (power_well->count > 0) - bxt_dpio_cmn_power_well_enable(dev_priv, power_well); - else - bxt_dpio_cmn_power_well_disable(dev_priv, power_well); -} - - static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv) { struct i915_power_well *power_well; @@ -964,10 +951,12 @@ static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv) static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { + struct intel_cdclk_state cdclk_state = {}; + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); - WARN_ON(dev_priv->cdclk_freq != - dev_priv->display.get_display_clock_speed(dev_priv)); + dev_priv->display.get_cdclk(dev_priv, &cdclk_state); + WARN_ON(!intel_cdclk_state_compare(&dev_priv->cdclk.hw, &cdclk_state)); gen9_assert_dbuf_enabled(dev_priv); @@ -987,13 +976,9 @@ static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv, gen9_enable_dc5(dev_priv); } -static void gen9_dc_off_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) { - if (power_well->count > 0) - gen9_dc_off_power_well_enable(dev_priv, power_well); - else - gen9_dc_off_power_well_disable(dev_priv, power_well); } static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv, @@ -1043,12 +1028,6 @@ out: mutex_unlock(&dev_priv->rps.hw_lock); } -static void vlv_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - vlv_set_power_well(dev_priv, power_well, power_well->count > 0); -} - static void vlv_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -1249,7 +1228,7 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, false); } -#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1) +#define POWER_DOMAIN_MASK (GENMASK_ULL(POWER_DOMAIN_NUM - 1, 0)) static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, int power_well_id) @@ -1659,14 +1638,6 @@ out: mutex_unlock(&dev_priv->rps.hw_lock); } -static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->id != PIPE_A); - - chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0); -} - static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -1693,9 +1664,8 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *power_well; - int i; - for_each_power_well(i, power_well, BIT(domain), power_domains) + for_each_power_domain_well(dev_priv, power_well, BIT_ULL(domain)) intel_power_well_get(dev_priv, power_well); power_domains->domain_use_count[domain]++; @@ -1779,7 +1749,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains; struct i915_power_well *power_well; - int i; power_domains = &dev_priv->power_domains; @@ -1790,7 +1759,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, intel_display_power_domain_str(domain)); power_domains->domain_use_count[domain]--; - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) + for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain)) intel_power_well_put(dev_priv, power_well); mutex_unlock(&power_domains->lock); @@ -1799,134 +1768,134 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, } #define HSW_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BDW_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DSI) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_DSI) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DPIO_CMN_D_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_INIT)) static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { - .sync_hw = i9xx_always_on_power_well_noop, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = i9xx_always_on_power_well_noop, .disable = i9xx_always_on_power_well_noop, .is_enabled = i9xx_always_on_power_well_enabled, }; static const struct i915_power_well_ops chv_pipe_power_well_ops = { - .sync_hw = chv_pipe_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = chv_pipe_power_well_enable, .disable = chv_pipe_power_well_disable, .is_enabled = chv_pipe_power_well_enabled, }; static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = chv_dpio_cmn_power_well_enable, .disable = chv_dpio_cmn_power_well_disable, .is_enabled = vlv_power_well_enabled, @@ -1956,14 +1925,14 @@ static const struct i915_power_well_ops skl_power_well_ops = { }; static const struct i915_power_well_ops gen9_dc_off_power_well_ops = { - .sync_hw = gen9_dc_off_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = gen9_dc_off_power_well_enable, .disable = gen9_dc_off_power_well_disable, .is_enabled = gen9_dc_off_power_well_enabled, }; static const struct i915_power_well_ops bxt_dpio_cmn_power_well_ops = { - .sync_hw = bxt_dpio_cmn_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = bxt_dpio_cmn_power_well_enable, .disable = bxt_dpio_cmn_power_well_disable, .is_enabled = bxt_dpio_cmn_power_well_enabled, @@ -1998,21 +1967,21 @@ static struct i915_power_well bdw_power_wells[] = { }; static const struct i915_power_well_ops vlv_display_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_display_power_well_enable, .disable = vlv_display_power_well_disable, .is_enabled = vlv_power_well_enabled, }; static const struct i915_power_well_ops vlv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_dpio_cmn_power_well_enable, .disable = vlv_dpio_cmn_power_well_disable, .is_enabled = vlv_power_well_enabled, }; static const struct i915_power_well_ops vlv_dpio_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_power_well_enable, .disable = vlv_power_well_disable, .is_enabled = vlv_power_well_enabled, @@ -2155,26 +2124,26 @@ static struct i915_power_well skl_power_wells[] = { .id = SKL_DISP_PW_2, }, { - .name = "DDI A/E power well", - .domains = SKL_DISPLAY_DDI_A_E_POWER_DOMAINS, + .name = "DDI A/E IO power well", + .domains = SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_A_E, }, { - .name = "DDI B power well", - .domains = SKL_DISPLAY_DDI_B_POWER_DOMAINS, + .name = "DDI B IO power well", + .domains = SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_B, }, { - .name = "DDI C power well", - .domains = SKL_DISPLAY_DDI_C_POWER_DOMAINS, + .name = "DDI C IO power well", + .domains = SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_C, }, { - .name = "DDI D power well", - .domains = SKL_DISPLAY_DDI_D_POWER_DOMAINS, + .name = "DDI D IO power well", + .domains = SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_D, }, @@ -2287,20 +2256,20 @@ static struct i915_power_well glk_power_wells[] = { .id = GLK_DISP_PW_AUX_C, }, { - .name = "DDI A power well", - .domains = GLK_DISPLAY_DDI_A_POWER_DOMAINS, + .name = "DDI A IO power well", + .domains = GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = GLK_DISP_PW_DDI_A, }, { - .name = "DDI B power well", - .domains = GLK_DISPLAY_DDI_B_POWER_DOMAINS, + .name = "DDI B IO power well", + .domains = GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_B, }, { - .name = "DDI C power well", - .domains = GLK_DISPLAY_DDI_C_POWER_DOMAINS, + .name = "DDI C IO power well", + .domains = GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_C, }, @@ -2323,7 +2292,7 @@ static uint32_t get_allowed_dc_mask(const struct drm_i915_private *dev_priv, int requested_dc; int max_dc; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { max_dc = 2; mask = 0; } else if (IS_GEN9_LP(dev_priv)) { @@ -2386,7 +2355,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) dev_priv->csr.allowed_dc_mask = get_allowed_dc_mask(dev_priv, i915.enable_dc); - BUILD_BUG_ON(POWER_DOMAIN_NUM > 31); + BUILD_BUG_ON(POWER_DOMAIN_NUM > 64); mutex_init(&power_domains->lock); @@ -2398,7 +2367,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) set_power_wells(power_domains, hsw_power_wells); } else if (IS_BROADWELL(dev_priv)) { set_power_wells(power_domains, bdw_power_wells); - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { set_power_wells(power_domains, skl_power_wells); } else if (IS_BROXTON(dev_priv)) { set_power_wells(power_domains, bxt_power_wells); @@ -2454,10 +2423,9 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *power_well; - int i; mutex_lock(&power_domains->lock); - for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { + for_each_power_well(dev_priv, power_well) { power_well->ops->sync_hw(dev_priv, power_well); power_well->hw_enabled = power_well->ops->is_enabled(dev_priv, power_well); @@ -2722,7 +2690,10 @@ static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv) * @resume: Called from resume code paths or not * * This function initializes the hardware power domain state and enables all - * power domains using intel_display_set_init_power(). + * power wells belonging to the INIT power domain. Power wells in other + * domains (and not in the INIT domain) are referenced or disabled during the + * modeset state HW readout. After that the reference count of each power well + * must match its HW enabled state, see intel_power_domains_verify_state(). */ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) { @@ -2730,7 +2701,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) power_domains->initializing = true; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { skl_display_core_init(dev_priv, resume); } else if (IS_GEN9_LP(dev_priv)) { bxt_display_core_init(dev_priv, resume); @@ -2769,12 +2740,92 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv) if (!i915.disable_power_well) intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_display_core_uninit(dev_priv); else if (IS_GEN9_LP(dev_priv)) bxt_display_core_uninit(dev_priv); } +static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + + for_each_power_well(dev_priv, power_well) { + enum intel_display_power_domain domain; + + DRM_DEBUG_DRIVER("%-25s %d\n", + power_well->name, power_well->count); + + for_each_power_domain(domain, power_well->domains) + DRM_DEBUG_DRIVER(" %-23s %d\n", + intel_display_power_domain_str(domain), + power_domains->domain_use_count[domain]); + } +} + +/** + * intel_power_domains_verify_state - verify the HW/SW state for all power wells + * @dev_priv: i915 device instance + * + * Verify if the reference count of each power well matches its HW enabled + * state and the total refcount of the domains it belongs to. This must be + * called after modeset HW state sanitization, which is responsible for + * acquiring reference counts for any power wells in use and disabling the + * ones left on by BIOS but not required by any active output. + */ +void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + bool dump_domain_info; + + mutex_lock(&power_domains->lock); + + dump_domain_info = false; + for_each_power_well(dev_priv, power_well) { + enum intel_display_power_domain domain; + int domains_count; + bool enabled; + + /* + * Power wells not belonging to any domain (like the MISC_IO + * and PW1 power wells) are under FW control, so ignore them, + * since their state can change asynchronously. + */ + if (!power_well->domains) + continue; + + enabled = power_well->ops->is_enabled(dev_priv, power_well); + if ((power_well->count || power_well->always_on) != enabled) + DRM_ERROR("power well %s state mismatch (refcount %d/enabled %d)", + power_well->name, power_well->count, enabled); + + domains_count = 0; + for_each_power_domain(domain, power_well->domains) + domains_count += power_domains->domain_use_count[domain]; + + if (power_well->count != domains_count) { + DRM_ERROR("power well %s refcount/domain refcount mismatch " + "(refcount %d/domains refcount %d)\n", + power_well->name, power_well->count, + domains_count); + dump_domain_info = true; + } + } + + if (dump_domain_info) { + static bool dumped; + + if (!dumped) { + intel_power_domains_dump_info(dev_priv); + dumped = true; + } + } + + mutex_unlock(&power_domains->lock); +} + /** * intel_runtime_pm_get - grab a runtime pm reference * @dev_priv: i915 device instance diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 2ad13903a054..816a6f5a3fd9 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2981,6 +2981,7 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, /* encoder type will be decided later */ intel_encoder = &intel_sdvo->base; intel_encoder->type = INTEL_OUTPUT_SDVO; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; drm_encoder_init(&dev_priv->drm, &intel_encoder->base, &intel_sdvo_enc_funcs, 0, diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 1a840bf92eea..7d971cb56116 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -60,8 +60,7 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, } I915_WRITE(VLV_IOSF_ADDR, addr); - if (!is_read) - I915_WRITE(VLV_IOSF_DATA, *val); + I915_WRITE(VLV_IOSF_DATA, is_read ? 0 : *val); I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd); if (intel_wait_for_register(dev_priv, @@ -74,7 +73,6 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, if (is_read) *val = I915_READ(VLV_IOSF_DATA); - I915_WRITE(VLV_IOSF_DATA, 0); return 0; } @@ -93,14 +91,18 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr) return val; } -void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) +int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) { + int err; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); mutex_lock(&dev_priv->sb_lock); - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, - SB_CRWRDA_NP, addr, &val); + err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, + SB_CRWRDA_NP, addr, &val); mutex_unlock(&dev_priv->sb_lock); + + return err; } u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg) @@ -214,6 +216,7 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, } I915_WRITE(SBI_ADDR, (reg << 16)); + I915_WRITE(SBI_DATA, 0); if (destination == SBI_ICLK) value = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRRD; @@ -223,10 +226,15 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, if (intel_wait_for_register(dev_priv, SBI_CTL_STAT, - SBI_BUSY | SBI_RESPONSE_FAIL, + SBI_BUSY, 0, 100)) { - DRM_ERROR("timeout waiting for SBI to complete read transaction\n"); + DRM_ERROR("timeout waiting for SBI to complete read\n"); + return 0; + } + + if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) { + DRM_ERROR("error during SBI read of reg %x\n", reg); return 0; } @@ -258,10 +266,16 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, if (intel_wait_for_register(dev_priv, SBI_CTL_STAT, - SBI_BUSY | SBI_RESPONSE_FAIL, + SBI_BUSY, 0, 100)) { - DRM_ERROR("timeout waiting for SBI to complete write transaction\n"); + DRM_ERROR("timeout waiting for SBI to complete write\n"); + return; + } + + if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) { + DRM_ERROR("error during SBI write of %x to reg %x\n", + value, reg); return; } } diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 9ef54688872a..27e0752d1578 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -219,13 +219,22 @@ skl_update_plane(struct drm_plane *drm_plane, uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; - plane_ctl = PLANE_CTL_ENABLE | - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE; + plane_ctl = PLANE_CTL_ENABLE; + + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE(PLANE_COLOR_CTL(pipe, plane_id), + PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PIPE_CSC_ENABLE | + PLANE_COLOR_PLANE_GAMMA_DISABLE); + } else { + plane_ctl |= + PLANE_CTL_PIPE_GAMMA_ENABLE | + PLANE_CTL_PIPE_CSC_ENABLE | + PLANE_CTL_PLANE_GAMMA_DISABLE; + } plane_ctl |= skl_plane_ctl_format(fb->format->format); plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - plane_ctl |= skl_plane_ctl_rotation(rotation); if (key->flags) { diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index eb692e4ffe01..6ed1a3ce47b7 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1621,6 +1621,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); intel_encoder->type = INTEL_OUTPUT_TVOUT; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->crtc_mask = (1 << 0) | (1 << 1); intel_encoder->cloneable = 0; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index abe08885a5ba..b35b7a03eeaf 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -133,6 +133,13 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma } static void +vgpu_fw_domains_nop(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains) +{ + /* Guest driver doesn't need to takes care forcewake. */ +} + +static void fw_domains_posting_read(struct drm_i915_private *dev_priv) { struct intel_uncore_forcewake_domain *d; @@ -499,7 +506,7 @@ void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { - assert_spin_locked(&dev_priv->uncore.lock); + lockdep_assert_held(&dev_priv->uncore.lock); if (!dev_priv->uncore.funcs.force_wake_get) return; @@ -557,7 +564,7 @@ void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv, void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { - assert_spin_locked(&dev_priv->uncore.lock); + lockdep_assert_held(&dev_priv->uncore.lock); if (!dev_priv->uncore.funcs.force_wake_put) return; @@ -635,33 +642,6 @@ find_fw_domain(struct drm_i915_private *dev_priv, u32 offset) return entry->domains; } -static void -intel_fw_table_check(struct drm_i915_private *dev_priv) -{ - const struct intel_forcewake_range *ranges; - unsigned int num_ranges; - s32 prev; - unsigned int i; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG)) - return; - - ranges = dev_priv->uncore.fw_domains_table; - if (!ranges) - return; - - num_ranges = dev_priv->uncore.fw_domains_table_entries; - - for (i = 0, prev = -1; i < num_ranges; i++, ranges++) { - WARN_ON_ONCE(IS_GEN9(dev_priv) && - (prev + 1) != (s32)ranges->start); - WARN_ON_ONCE(prev >= (s32)ranges->start); - prev = ranges->start; - WARN_ON_ONCE(prev >= (s32)ranges->end); - prev = ranges->end; - } -} - #define GEN_FW_RANGE(s, e, d) \ { .start = (s), .end = (e), .domains = (d) } @@ -700,23 +680,6 @@ static const i915_reg_t gen8_shadowed_regs[] = { /* TODO: Other registers are not yet used */ }; -static void intel_shadow_table_check(void) -{ - const i915_reg_t *reg = gen8_shadowed_regs; - s32 prev; - u32 offset; - unsigned int i; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG)) - return; - - for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { - offset = i915_mmio_reg_offset(*reg); - WARN_ON_ONCE(prev >= (s32)offset); - prev = offset; - } -} - static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) { u32 offset = i915_mmio_reg_offset(*reg); @@ -985,29 +948,19 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, ___force_wake_auto(dev_priv, fw_domains); } -#define __gen6_read(x) \ -static u##x \ -gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - enum forcewake_domains fw_engine; \ - GEN6_READ_HEADER(x); \ - fw_engine = __gen6_reg_read_fw_domains(offset); \ - if (fw_engine) \ - __force_wake_auto(dev_priv, fw_engine); \ - val = __raw_i915_read##x(dev_priv, reg); \ - GEN6_READ_FOOTER; \ -} - -#define __fwtable_read(x) \ +#define __gen_read(func, x) \ static u##x \ -fwtable_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ +func##_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_READ_HEADER(x); \ - fw_engine = __fwtable_reg_read_fw_domains(offset); \ + fw_engine = __##func##_reg_read_fw_domains(offset); \ if (fw_engine) \ __force_wake_auto(dev_priv, fw_engine); \ val = __raw_i915_read##x(dev_priv, reg); \ GEN6_READ_FOOTER; \ } +#define __gen6_read(x) __gen_read(gen6, x) +#define __fwtable_read(x) __gen_read(fwtable, x) #define __gen9_decoupled_read(x) \ static u##x \ @@ -1045,34 +998,6 @@ __gen6_read(64) #undef GEN6_READ_FOOTER #undef GEN6_READ_HEADER -#define VGPU_READ_HEADER(x) \ - unsigned long irqflags; \ - u##x val = 0; \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags) - -#define VGPU_READ_FOOTER \ - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \ - trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \ - return val - -#define __vgpu_read(x) \ -static u##x \ -vgpu_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - VGPU_READ_HEADER(x); \ - val = __raw_i915_read##x(dev_priv, reg); \ - VGPU_READ_FOOTER; \ -} - -__vgpu_read(8) -__vgpu_read(16) -__vgpu_read(32) -__vgpu_read(64) - -#undef __vgpu_read -#undef VGPU_READ_FOOTER -#undef VGPU_READ_HEADER - #define GEN2_WRITE_HEADER \ trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ assert_rpm_wakelock_held(dev_priv); \ @@ -1136,29 +1061,19 @@ gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool GEN6_WRITE_FOOTER; \ } -#define __gen8_write(x) \ -static void \ -gen8_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ - enum forcewake_domains fw_engine; \ - GEN6_WRITE_HEADER; \ - fw_engine = __gen8_reg_write_fw_domains(offset); \ - if (fw_engine) \ - __force_wake_auto(dev_priv, fw_engine); \ - __raw_i915_write##x(dev_priv, reg, val); \ - GEN6_WRITE_FOOTER; \ -} - -#define __fwtable_write(x) \ +#define __gen_write(func, x) \ static void \ -fwtable_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ +func##_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_WRITE_HEADER; \ - fw_engine = __fwtable_reg_write_fw_domains(offset); \ + fw_engine = __##func##_reg_write_fw_domains(offset); \ if (fw_engine) \ __force_wake_auto(dev_priv, fw_engine); \ __raw_i915_write##x(dev_priv, reg, val); \ GEN6_WRITE_FOOTER; \ } +#define __gen8_write(x) __gen_write(gen8, x) +#define __fwtable_write(x) __gen_write(fwtable, x) #define __gen9_decoupled_write(x) \ static void \ @@ -1195,31 +1110,6 @@ __gen6_write(32) #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER -#define VGPU_WRITE_HEADER \ - unsigned long irqflags; \ - trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags) - -#define VGPU_WRITE_FOOTER \ - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags) - -#define __vgpu_write(x) \ -static void vgpu_write##x(struct drm_i915_private *dev_priv, \ - i915_reg_t reg, u##x val, bool trace) { \ - VGPU_WRITE_HEADER; \ - __raw_i915_write##x(dev_priv, reg, val); \ - VGPU_WRITE_FOOTER; \ -} - -__vgpu_write(8) -__vgpu_write(16) -__vgpu_write(32) - -#undef __vgpu_write -#undef VGPU_WRITE_FOOTER -#undef VGPU_WRITE_HEADER - #define ASSIGN_WRITE_MMIO_VFUNCS(x) \ do { \ dev_priv->uncore.funcs.mmio_writeb = x##_write8; \ @@ -1375,6 +1265,11 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) FORCEWAKE, FORCEWAKE_ACK); } + if (intel_vgpu_active(dev_priv)) { + dev_priv->uncore.funcs.force_wake_get = vgpu_fw_domains_nop; + dev_priv->uncore.funcs.force_wake_put = vgpu_fw_domains_nop; + } + /* All future platforms are expected to require complex power gating */ WARN_ON(dev_priv->uncore.fw_domains == 0); } @@ -1445,15 +1340,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) break; } - intel_fw_table_check(dev_priv); - if (INTEL_GEN(dev_priv) >= 8) - intel_shadow_table_check(); - - if (intel_vgpu_active(dev_priv)) { - ASSIGN_WRITE_MMIO_VFUNCS(vgpu); - ASSIGN_READ_MMIO_VFUNCS(vgpu); - } - i915_check_and_clear_faults(dev_priv); } #undef ASSIGN_WRITE_MMIO_VFUNCS @@ -1971,3 +1857,7 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, return fw_domains; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_uncore.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c new file mode 100644 index 000000000000..4e681fc13be4 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -0,0 +1,135 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "huge_gem_object.h" + +static void huge_free_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + unsigned long nreal = obj->scratch / PAGE_SIZE; + struct scatterlist *sg; + + for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg)) + __free_page(sg_page(sg)); + + sg_free_table(pages); + kfree(pages); +} + +static struct sg_table * +huge_get_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + const unsigned long nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct scatterlist *sg, *src, *end; + struct sg_table *pages; + unsigned long n; + + pages = kmalloc(sizeof(*pages), GFP); + if (!pages) + return ERR_PTR(-ENOMEM); + + if (sg_alloc_table(pages, npages, GFP)) { + kfree(pages); + return ERR_PTR(-ENOMEM); + } + + sg = pages->sgl; + for (n = 0; n < nreal; n++) { + struct page *page; + + page = alloc_page(GFP | __GFP_HIGHMEM); + if (!page) { + sg_mark_end(sg); + goto err; + } + + sg_set_page(sg, page, PAGE_SIZE, 0); + sg = __sg_next(sg); + } + if (nreal < npages) { + for (end = sg, src = pages->sgl; sg; sg = __sg_next(sg)) { + sg_set_page(sg, sg_page(src), PAGE_SIZE, 0); + src = __sg_next(src); + if (src == end) + src = pages->sgl; + } + } + + if (i915_gem_gtt_prepare_pages(obj, pages)) + goto err; + + return pages; + +err: + huge_free_pages(obj, pages); + return ERR_PTR(-ENOMEM); +#undef GFP +} + +static void huge_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_free_pages(obj, pages); + + obj->mm.dirty = false; +} + +static const struct drm_i915_gem_object_ops huge_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = huge_get_pages, + .put_pages = huge_put_pages, +}; + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!phys_size || phys_size > dma_size); + GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(dma_size, I915_GTT_PAGE_SIZE)); + + if (overflows_type(dma_size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); + i915_gem_object_init(obj, &huge_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + obj->scratch = phys_size; + + return obj; +} diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/selftests/huge_gem_object.h new file mode 100644 index 000000000000..a6133a9e8029 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __HUGE_GEM_OBJECT_H +#define __HUGE_GEM_OBJECT_H + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size); + +static inline phys_addr_t +huge_gem_object_phys_size(struct drm_i915_gem_object *obj) +{ + return obj->scratch; +} + +static inline dma_addr_t +huge_gem_object_dma_size(struct drm_i915_gem_object *obj) +{ + return obj->base.size; +} + +#endif /* !__HUGE_GEM_OBJECT_H */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c new file mode 100644 index 000000000000..f08d0179b3df --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -0,0 +1,385 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" +#include "i915_random.h" + +static int cpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + unsigned int needs_clflush; + struct page *page; + typeof(v) *map; + int err; + + err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + if (needs_clflush & CLFLUSH_BEFORE) + clflush(map+offset_in_page(offset) / sizeof(*map)); + map[offset_in_page(offset) / sizeof(*map)] = v; + if (needs_clflush & CLFLUSH_AFTER) + clflush(map+offset_in_page(offset) / sizeof(*map)); + kunmap_atomic(map); + + i915_gem_obj_finish_shmem_access(obj); + return 0; +} + +static int cpu_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + unsigned int needs_clflush; + struct page *page; + typeof(v) map; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + if (needs_clflush & CLFLUSH_BEFORE) + clflush(map+offset_in_page(offset) / sizeof(*map)); + *v = map[offset_in_page(offset) / sizeof(*map)]; + kunmap_atomic(map); + + i915_gem_obj_finish_shmem_access(obj); + return 0; +} + +static int gtt_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct i915_vma *vma; + typeof(v) *map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + map[offset / sizeof(*map)] = v; + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int gtt_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + struct i915_vma *vma; + typeof(v) map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = map[offset / sizeof(*map)]; + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int wc_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + typeof(v) *map; + int err; + + /* XXX GTT write followed by WC write go missing */ + i915_gem_object_flush_gtt_write_domain(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + map[offset / sizeof(*map)] = v; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int wc_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + typeof(v) map; + int err; + + /* XXX WC write followed by GTT write go missing */ + i915_gem_object_flush_gtt_write_domain(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = map[offset / sizeof(*map)]; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int gpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + u32 *cs; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + __i915_add_request(rq, false); + i915_vma_unpin(vma); + return PTR_ERR(cs); + } + + if (INTEL_GEN(i915) >= 8) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = v; + } else if (INTEL_GEN(i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; + } else { + *cs++ = MI_STORE_DWORD_IMM | 1 << 22; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; + *cs++ = MI_NOOP; + } + intel_ring_advance(rq, cs); + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unpin(vma); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &rq->fence); + reservation_object_unlock(obj->resv); + + __i915_add_request(rq, true); + + return 0; +} + +static bool always_valid(struct drm_i915_private *i915) +{ + return true; +} + +static bool needs_mi_store_dword(struct drm_i915_private *i915) +{ + return igt_can_mi_store_dword_imm(i915); +} + +static const struct igt_coherency_mode { + const char *name; + int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); + int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); + bool (*valid)(struct drm_i915_private *i915); +} igt_coherency_mode[] = { + { "cpu", cpu_set, cpu_get, always_valid }, + { "gtt", gtt_set, gtt_get, always_valid }, + { "wc", wc_set, wc_get, always_valid }, + { "gpu", gpu_set, NULL, needs_mi_store_dword }, + { }, +}; + +static int igt_gem_coherency(void *arg) +{ + const unsigned int ncachelines = PAGE_SIZE/64; + I915_RND_STATE(prng); + struct drm_i915_private *i915 = arg; + const struct igt_coherency_mode *read, *write, *over; + struct drm_i915_gem_object *obj; + unsigned long count, n; + u32 *offsets, *values; + int err = 0; + + /* We repeatedly write, overwrite and read from a sequence of + * cachelines in order to try and detect incoherency (unflushed writes + * from either the CPU or GPU). Each setter/getter uses our cache + * domain API which should prevent incoherency. + */ + + offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); + if (!offsets) + return -ENOMEM; + for (count = 0; count < ncachelines; count++) + offsets[count] = count * 64 + 4 * (count % 16); + + values = offsets + ncachelines; + + mutex_lock(&i915->drm.struct_mutex); + for (over = igt_coherency_mode; over->name; over++) { + if (!over->set) + continue; + + if (!over->valid(i915)) + continue; + + for (write = igt_coherency_mode; write->name; write++) { + if (!write->set) + continue; + + if (!write->valid(i915)) + continue; + + for (read = igt_coherency_mode; read->name; read++) { + if (!read->get) + continue; + + if (!read->valid(i915)) + continue; + + for_each_prime_number_from(count, 1, ncachelines) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto unlock; + } + + i915_random_reorder(offsets, ncachelines, &prng); + for (n = 0; n < count; n++) + values[n] = prandom_u32_state(&prng); + + for (n = 0; n < count; n++) { + err = over->set(obj, offsets[n], ~values[n]); + if (err) { + pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", + n, count, over->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + err = write->set(obj, offsets[n], values[n]); + if (err) { + pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", + n, count, write->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + u32 found; + + err = read->get(obj, offsets[n], &found); + if (err) { + pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", + n, count, read->name, err); + goto put_object; + } + + if (found != values[n]) { + pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", + n, count, over->name, + write->name, values[n], + read->name, found, + ~values[n], offsets[n]); + err = -EINVAL; + goto put_object; + } + } + + __i915_gem_object_release_unless_active(obj); + } + } + } + } +unlock: + mutex_unlock(&i915->drm.struct_mutex); + kfree(offsets); + return err; + +put_object: + __i915_gem_object_release_unless_active(obj); + goto unlock; +} + +int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_coherency), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c new file mode 100644 index 000000000000..3813a19a6179 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -0,0 +1,459 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_drm.h" +#include "huge_gem_object.h" + +#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) + +static struct i915_vma * +gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) +{ + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(vma->vm->i915); + unsigned long n, size; + u32 *cmd; + int err; + + GEM_BUG_ON(!igt_can_mi_store_dword_imm(vma->vm->i915)); + + size = (4 * count + 1) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(vma->vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = value; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? 1 << 22 : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = value; + } else { + *cmd++ = MI_STORE_DWORD_IMM | 1 << 22; + *cmd++ = offset; + *cmd++ = value; + } + offset += PAGE_SIZE; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static unsigned long real_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; +} + +static unsigned long fake_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; +} + +static int gpu_fill(struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + unsigned int dw) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + struct i915_vma *batch; + unsigned int flags; + int err; + + GEM_BUG_ON(obj->base.size > vm->total); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); + if (err) + return err; + + /* Within the GTT the huge objects maps every page onto + * its 1024 real pages (using phys_pfn = dma_pfn % 1024). + * We set the nth dword within the page using the nth + * mapping via the GTT - this should exercise the GTT mapping + * whilst checking that each context provides a unique view + * into the object. + */ + batch = gpu_fill_dw(vma, + (dw * real_page_count(obj)) << PAGE_SHIFT | + (dw * sizeof(u32)), + real_page_count(obj), + dw); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_vma; + } + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto err_request; + + err = i915_switch_context(rq); + if (err) + goto err_request; + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + i915_vma_move_to_active(batch, rq, 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + i915_vma_move_to_active(vma, rq, 0); + i915_vma_unpin(vma); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &rq->fence); + reservation_object_unlock(obj->resv); + + __i915_add_request(rq, true); + + return 0; + +err_request: + __i915_add_request(rq, false); +err_batch: + i915_vma_unpin(batch); +err_vma: + i915_vma_unpin(vma); + return err; +} + +static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) +{ + const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); + unsigned int n, m, need_flush; + int err; + + err = i915_gem_obj_prepare_shmem_write(obj, &need_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + for (m = 0; m < DW_PER_PAGE; m++) + map[m] = value; + if (!has_llc) + drm_clflush_virt_range(map, PAGE_SIZE); + kunmap_atomic(map); + } + + i915_gem_obj_finish_shmem_access(obj); + obj->base.read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; + obj->base.write_domain = 0; + return 0; +} + +static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max) +{ + unsigned int n, m, needs_flush; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(map, PAGE_SIZE); + + for (m = 0; m < max; m++) { + if (map[m] != m) { + pr_err("Invalid value at page %d, offset %d: found %x expected %x\n", + n, m, map[m], m); + err = -EINVAL; + goto out_unmap; + } + } + + for (; m < DW_PER_PAGE; m++) { + if (map[m] != 0xdeadbeef) { + pr_err("Invalid value at page %d, offset %d: found %x expected %x\n", + n, m, map[m], 0xdeadbeef); + err = -EINVAL; + goto out_unmap; + } + } + +out_unmap: + kunmap_atomic(map); + if (err) + break; + } + + i915_gem_obj_finish_shmem_access(obj); + return err; +} + +static struct drm_i915_gem_object * +create_test_object(struct i915_gem_context *ctx, + struct drm_file *file, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; + u64 size; + u32 handle; + int err; + + size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); + size = round_down(size, DW_PER_PAGE * PAGE_SIZE); + + obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + if (IS_ERR(obj)) + return obj; + + /* tie the handle to the drm_file for easy reaping */ + err = drm_gem_handle_create(file, &obj->base, &handle); + i915_gem_object_put(obj); + if (err) + return ERR_PTR(err); + + err = cpu_fill(obj, 0xdeadbeef); + if (err) { + pr_err("Failed to fill object with cpu, err=%d\n", + err); + return ERR_PTR(err); + } + + list_add_tail(&obj->st_link, objects); + return obj; +} + +static unsigned long max_dwords(struct drm_i915_gem_object *obj) +{ + unsigned long npages = fake_page_count(obj); + + GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); + return npages / DW_PER_PAGE; +} + +static int igt_ctx_exec(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_file *file = mock_file(i915); + struct drm_i915_gem_object *obj; + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + unsigned long ncontexts, ndwords, dw; + bool first_shared_gtt = true; + int err; + + /* Create a few different contexts (with different mm) and write + * through each ctx/mm using the GPU making sure those writes end + * up in the expected pages of our obj. + */ + + mutex_lock(&i915->drm.struct_mutex); + + ncontexts = 0; + ndwords = 0; + dw = 0; + while (!time_after(jiffies, end_time)) { + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + unsigned int id; + + if (first_shared_gtt) { + ctx = __create_hw_context(i915, file->driver_priv); + first_shared_gtt = false; + } else { + ctx = i915_gem_create_context(i915, file->driver_priv); + } + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + if (dw == 0) { + obj = create_test_object(ctx, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + } + + err = gpu_fill(obj, ctx, engine, dw); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + goto out_unlock; + } + + if (++dw == max_dwords(obj)) + dw = 0; + ndwords++; + } + ncontexts++; + } + pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n", + ncontexts, INTEL_INFO(i915)->num_rings, ndwords); + + dw = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + + err = cpu_check(obj, rem); + if (err) + break; + + dw += rem; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + +static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + int err; + + err = i915_gem_init_aliasing_ppgtt(i915); + if (err) + return err; + + list_for_each_entry(obj, &i915->mm.bound_list, global_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + continue; + + vma->flags &= ~I915_VMA_LOCAL_BIND; + } + + return 0; +} + +static void fake_aliasing_ppgtt_disable(struct drm_i915_private *i915) +{ + i915_gem_fini_aliasing_ppgtt(i915); +} + +int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ctx_exec), + }; + bool fake_alias = false; + int err; + + /* Install a fake aliasing gtt for exercise */ + if (USES_PPGTT(dev_priv) && !dev_priv->mm.aliasing_ppgtt) { + mutex_lock(&dev_priv->drm.struct_mutex); + err = fake_aliasing_ppgtt_enable(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + if (err) + return err; + + GEM_BUG_ON(!dev_priv->mm.aliasing_ppgtt); + fake_alias = true; + } + + err = i915_subtests(tests, dev_priv); + + if (fake_alias) { + mutex_lock(&dev_priv->drm.struct_mutex); + fake_aliasing_ppgtt_disable(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + } + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c new file mode 100644 index 000000000000..817bef74bbcb --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c @@ -0,0 +1,303 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "mock_dmabuf.h" + +static int igt_dmabuf_export(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + i915_gem_object_put(obj); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + return PTR_ERR(dmabuf); + } + + dma_buf_put(dmabuf); + return 0; +} + +static int igt_dmabuf_import_self(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + int err; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (IS_ERR(import)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(import)); + err = PTR_ERR(import); + goto out_dmabuf; + } + + if (import != &obj->base) { + pr_err("i915_gem_prime_import created a new object!\n"); + err = -EINVAL; + goto out_import; + } + + err = 0; +out_import: + i915_gem_object_put(to_intel_bo(import)); +out_dmabuf: + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_dmabuf_import(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *obj_map, *dma_map; + u32 pattern[] = { 0, 0xaa, 0xcc, 0x55, 0xff }; + int err, i; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto out_dmabuf; + } + + if (obj->base.dev != &i915->drm) { + pr_err("i915_gem_prime_import created a non-i915 object!\n"); + err = -EINVAL; + goto out_obj; + } + + if (obj->base.size != PAGE_SIZE) { + pr_err("i915_gem_prime_import is wrong size found %lld, expected %ld\n", + (long long)obj->base.size, PAGE_SIZE); + err = -EINVAL; + goto out_obj; + } + + dma_map = dma_buf_vmap(dmabuf); + if (!dma_map) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto out_obj; + } + + if (0) { /* Can not yet map dmabuf */ + obj_map = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(obj_map)) { + err = PTR_ERR(obj_map); + pr_err("i915_gem_object_pin_map failed with err=%d\n", err); + goto out_dma_map; + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(dma_map, pattern[i], PAGE_SIZE); + if (memchr_inv(obj_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("imported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(obj_map, pattern[i], PAGE_SIZE); + if (memchr_inv(dma_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("exported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + i915_gem_object_unpin_map(obj); + } + + err = 0; +out_dma_map: + dma_buf_vunmap(dmabuf, dma_map); +out_obj: + i915_gem_object_put(obj); +out_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_import_ownership(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + ptr = dma_buf_vmap(dmabuf); + if (!ptr) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto err_dmabuf; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_vunmap(dmabuf, ptr); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto err_dmabuf; + } + + dma_buf_put(dmabuf); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("i915_gem_object_pin_pages failed with err=%d\n", err); + goto out_obj; + } + + err = 0; + i915_gem_object_unpin_pages(obj); +out_obj: + i915_gem_object_put(obj); + return err; + +err_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_export_vmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto err_obj; + } + i915_gem_object_put(obj); + + ptr = dma_buf_vmap(dmabuf); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + pr_err("dma_buf_vmap failed with err=%d\n", err); + goto out; + } + + if (memchr_inv(ptr, 0, dmabuf->size)) { + pr_err("Exported object not initialiased to zero!\n"); + err = -EINVAL; + goto out; + } + + memset(ptr, 0xc5, dmabuf->size); + + err = 0; + dma_buf_vunmap(dmabuf, ptr); +out: + dma_buf_put(dmabuf); + return err; + +err_obj: + i915_gem_object_put(obj); + return err; +} + +int i915_gem_dmabuf_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + SUBTEST(igt_dmabuf_import_self), + SUBTEST(igt_dmabuf_import), + SUBTEST(igt_dmabuf_import_ownership), + SUBTEST(igt_dmabuf_export_vmap), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_unref(&i915->drm); + return err; +} + +int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c new file mode 100644 index 000000000000..97af353db218 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -0,0 +1,260 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" + +static int populate_ggtt(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + u64 size; + + for (size = 0; + size + I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + size += I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + } + + if (!list_empty(&i915->mm.unbound_list)) { + size = 0; + list_for_each_entry(obj, &i915->mm.unbound_list, global_link) + size++; + + pr_err("Found %lld objects unbound!\n", size); + return -EINVAL; + } + + if (list_empty(&i915->ggtt.base.inactive_list)) { + pr_err("No objects on the GGTT inactive list!\n"); + return -EINVAL; + } + + return 0; +} + +static void unpin_ggtt(struct drm_i915_private *i915) +{ + struct i915_vma *vma; + + list_for_each_entry(vma, &i915->ggtt.base.inactive_list, vm_link) + i915_vma_unpin(vma); +} + +static void cleanup_objects(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, global_link) + i915_gem_object_put(obj); + + list_for_each_entry_safe(obj, on, &i915->mm.bound_list, global_link) + i915_gem_object_put(obj); + + mutex_unlock(&i915->drm.struct_mutex); + + i915_gem_drain_freed_objects(i915); + + mutex_lock(&i915->drm.struct_mutex); +} + +static int igt_evict_something(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + int err; + + /* Fill the GGTT with pinned objects and try to evict one. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_something(&ggtt->base, + I915_GTT_PAGE_SIZE, 0, 0, + 0, U64_MAX, + 0); + if (err != -ENOSPC) { + pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + /* Everything is unpinned, we should be able to evict something */ + err = i915_gem_evict_something(&ggtt->base, + I915_GTT_PAGE_SIZE, 0, 0, + 0, U64_MAX, + 0); + if (err) { + pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_overcommit(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + /* Fill the GGTT with pinned objects and then try to pin one more. + * We expect it to fail. + */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto cleanup; + } + + list_move(&obj->global_link, &i915->mm.unbound_list); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) { + pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma)); + err = -EINVAL; + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_evict_for_vma(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node target = { + .start = 0, + .size = 4096, + }; + int err; + + /* Fill the GGTT with pinned objects and try to evict a range. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + if (err != -ENOSPC) { + pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + /* Everything is unpinned, we should be able to evict the node */ + err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + if (err) { + pr_err("i915_gem_evict_for_node returned err=%d\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_evict_vm(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + int err; + + /* Fill the GGTT with pinned objects and try to evict everything. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_vm(&ggtt->base, false); + if (err) { + pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + err = i915_gem_evict_vm(&ggtt->base, false); + if (err) { + pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +int i915_gem_evict_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_evict_something), + SUBTEST(igt_evict_for_vma), + SUBTEST(igt_evict_vm), + SUBTEST(igt_overcommit), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c new file mode 100644 index 000000000000..0f3fa34377c6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -0,0 +1,1556 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/list_sort.h> +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" +#include "i915_random.h" + +#include "mock_context.h" +#include "mock_drm.h" +#include "mock_gem_device.h" + +static void fake_free_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static struct sg_table * +fake_get_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) +#define PFN_BIAS 0x1000 + struct sg_table *pages; + struct scatterlist *sg; + typeof(obj->base.size) rem; + + pages = kmalloc(sizeof(*pages), GFP); + if (!pages) + return ERR_PTR(-ENOMEM); + + rem = round_up(obj->base.size, BIT(31)) >> 31; + if (sg_alloc_table(pages, rem, GFP)) { + kfree(pages); + return ERR_PTR(-ENOMEM); + } + + rem = obj->base.size; + for (sg = pages->sgl; sg; sg = sg_next(sg)) { + unsigned long len = min_t(typeof(rem), rem, BIT(31)); + + GEM_BUG_ON(!len); + sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); + sg_dma_address(sg) = page_to_phys(sg_page(sg)); + sg_dma_len(sg) = len; + + rem -= len; + } + GEM_BUG_ON(rem); + + obj->mm.madv = I915_MADV_DONTNEED; + return pages; +#undef GFP +} + +static void fake_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + fake_free_pages(obj, pages); + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops fake_ops = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_pages, + .put_pages = fake_put_pages, +}; + +static struct drm_i915_gem_object * +fake_dma_object(struct drm_i915_private *i915, u64 size) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &fake_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + /* Preallocate the "backing storage" */ + if (i915_gem_object_pin_pages(obj)) + return ERR_PTR(-ENOMEM); + + i915_gem_object_unpin_pages(obj); + return obj; +} + +static int igt_ppgtt_alloc(void *arg) +{ + struct drm_i915_private *dev_priv = arg; + struct i915_hw_ppgtt *ppgtt; + u64 size, last; + int err; + + /* Allocate a ppggt and try to fill the entire range */ + + if (!USES_PPGTT(dev_priv)) + return 0; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return -ENOMEM; + + mutex_lock(&dev_priv->drm.struct_mutex); + err = __hw_ppgtt_init(ppgtt, dev_priv); + if (err) + goto err_ppgtt; + + if (!ppgtt->base.allocate_va_range) + goto err_ppgtt_cleanup; + + /* Check we can allocate the entire range */ + for (size = 4096; + size <= ppgtt->base.total; + size <<= 2) { + err = ppgtt->base.allocate_va_range(&ppgtt->base, 0, size); + if (err) { + if (err == -ENOMEM) { + pr_info("[1] Ran out of memory for va_range [0 + %llx] [bit %d]\n", + size, ilog2(size)); + err = 0; /* virtual space too large! */ + } + goto err_ppgtt_cleanup; + } + + ppgtt->base.clear_range(&ppgtt->base, 0, size); + } + + /* Check we can incrementally allocate the entire range */ + for (last = 0, size = 4096; + size <= ppgtt->base.total; + last = size, size <<= 2) { + err = ppgtt->base.allocate_va_range(&ppgtt->base, + last, size - last); + if (err) { + if (err == -ENOMEM) { + pr_info("[2] Ran out of memory for va_range [%llx + %llx] [bit %d]\n", + last, size - last, ilog2(size)); + err = 0; /* virtual space too large! */ + } + goto err_ppgtt_cleanup; + } + } + +err_ppgtt_cleanup: + ppgtt->base.cleanup(&ppgtt->base); +err_ppgtt: + mutex_unlock(&dev_priv->drm.struct_mutex); + kfree(ppgtt); + return err; +} + +static int lowlevel_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + I915_RND_STATE(seed_prng); + unsigned int size; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (size = 12; (hole_end - hole_start) >> size; size++) { + I915_RND_SUBSTATE(prng, seed_prng); + struct drm_i915_gem_object *obj; + unsigned int *order, count, n; + u64 hole_size; + + hole_size = (hole_end - hole_start) >> size; + if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) + hole_size = KMALLOC_MAX_SIZE / sizeof(u32); + count = hole_size; + do { + count >>= 1; + order = i915_random_order(count, &prng); + } while (!order && count); + if (!order) + break; + + GEM_BUG_ON(count * BIT_ULL(size) > vm->total); + GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end); + + /* Ignore allocation failures (i.e. don't report them as + * a test failure) as we are purposefully allocating very + * large objects without checking that we have sufficient + * memory. We expect to hit -ENOMEM. + */ + + obj = fake_dma_object(i915, BIT_ULL(size)); + if (IS_ERR(obj)) { + kfree(order); + break; + } + + GEM_BUG_ON(obj->base.size != BIT_ULL(size)); + + if (i915_gem_object_pin_pages(obj)) { + i915_gem_object_put(obj); + kfree(order); + break; + } + + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + + if (igt_timeout(end_time, + "%s timed out before %d/%d\n", + __func__, n, count)) { + hole_end = hole_start; /* quit */ + break; + } + + if (vm->allocate_va_range && + vm->allocate_va_range(vm, addr, BIT_ULL(size))) + break; + + vm->insert_entries(vm, obj->mm.pages, addr, + I915_CACHE_NONE, 0); + } + count = n; + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + vm->clear_range(vm, addr, BIT_ULL(size)); + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + + kfree(order); + } + + return 0; +} + +static void close_object_list(struct list_head *objects, + struct i915_address_space *vm) +{ + struct drm_i915_gem_object *obj, *on; + int ignored; + + list_for_each_entry_safe(obj, on, objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, vm, NULL); + if (!IS_ERR(vma)) + ignored = i915_vma_unbind(vma); + /* Only ppgtt vma may be closed before the object is freed */ + if (!IS_ERR(vma) && !i915_vma_is_ggtt(vma)) + i915_vma_close(vma); + + list_del(&obj->st_link); + i915_gem_object_put(obj); + } +} + +static int fill_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + const u64 hole_size = hole_end - hole_start; + struct drm_i915_gem_object *obj; + const unsigned long max_pages = + min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT); + const unsigned long max_step = max(int_sqrt(max_pages), 2UL); + unsigned long npages, prime, flags; + struct i915_vma *vma; + LIST_HEAD(objects); + int err; + + /* Try binding many VMA working inwards from either edge */ + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + for_each_prime_number_from(prime, 2, max_step) { + for (npages = 1; npages <= max_pages; npages *= prime) { + const u64 full_size = npages << PAGE_SHIFT; + const struct { + const char *name; + u64 offset; + int step; + } phases[] = { + { "top-down", hole_end, -1, }, + { "bottom-up", hole_start, 1, }, + { } + }, *p; + + obj = fake_dma_object(i915, full_size); + if (IS_ERR(obj)) + break; + + list_add(&obj->st_link, &objects); + + /* Align differing sized objects against the edges, and + * check we don't walk off into the void when binding + * them into the GTT. + */ + for (p = phases; p->name; p++) { + u64 offset; + + offset = p->offset; + list_for_each_entry(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + err = i915_vma_pin(vma, 0, 0, offset | flags); + if (err) { + pr_err("%s(%s) pin (forward) failed with err=%d on size=%lu pages (prime=%lu), offset=%llx\n", + __func__, p->name, err, npages, prime, offset); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (forward) insert failed: vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (forward) moved vma.node=%llx + %llx, expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, + offset); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s(%s) (forward) unbind of vma.node=%llx + %llx failed with err=%d\n", + __func__, p->name, vma->node.start, vma->node.size, + err); + goto err; + } + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry_reverse(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + err = i915_vma_pin(vma, 0, 0, offset | flags); + if (err) { + pr_err("%s(%s) pin (backward) failed with err=%d on size=%lu pages (prime=%lu), offset=%llx\n", + __func__, p->name, err, npages, prime, offset); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (backward) insert failed: vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry_reverse(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (backward) moved vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s(%s) (backward) unbind of vma.node=%llx + %llx failed with err=%d\n", + __func__, p->name, vma->node.start, vma->node.size, + err); + goto err; + } + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + } + + if (igt_timeout(end_time, "%s timed out (npages=%lu, prime=%lu)\n", + __func__, npages, prime)) { + err = -EINTR; + goto err; + } + } + + close_object_list(&objects, vm); + } + + return 0; + +err: + close_object_list(&objects, vm); + return err; +} + +static int walk_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + const u64 hole_size = hole_end - hole_start; + const unsigned long max_pages = + min_t(u64, ULONG_MAX - 1, hole_size >> PAGE_SHIFT); + unsigned long flags; + u64 size; + + /* Try binding a single VMA in different positions within the hole */ + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + for_each_prime_number_from(size, 1, max_pages) { + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u64 addr; + int err = 0; + + obj = fake_dma_object(i915, size << PAGE_SHIFT); + if (IS_ERR(obj)) + break; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + for (addr = hole_start; + addr + obj->base.size < hole_end; + addr += obj->base.size) { + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n", + __func__, addr, vma->size, + hole_start, hole_end, err); + goto err; + } + i915_vma_unpin(vma); + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, vma->size); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s unbind failed at %llx + %llx with err=%d\n", + __func__, addr, vma->size, err); + goto err; + } + + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + + if (igt_timeout(end_time, + "%s timed out at %llx\n", + __func__, addr)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + +static int pot_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned long flags; + unsigned int pot; + int err = 0; + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + obj = i915_gem_object_create_internal(i915, 2 * I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + /* Insert a pair of pages across every pot boundary within the hole */ + for (pot = fls64(hole_end - 1) - 1; + pot > ilog2(2 * I915_GTT_PAGE_SIZE); + pot--) { + u64 step = BIT_ULL(pot); + u64 addr; + + for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; + addr <= round_down(hole_end - 2*I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; + addr += step) { + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx in hole [%llx - %llx], with err=%d\n", + __func__, + addr, + hole_start, hole_end, + err); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, vma->size); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + GEM_BUG_ON(err); + } + + if (igt_timeout(end_time, + "%s timed out after %d/%d\n", + __func__, pot, fls64(hole_end - 1) - 1)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); +err_obj: + i915_gem_object_put(obj); + return err; +} + +static int drunk_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + I915_RND_STATE(prng); + unsigned int size; + unsigned long flags; + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (size = 12; (hole_end - hole_start) >> size; size++) { + struct drm_i915_gem_object *obj; + unsigned int *order, count, n; + struct i915_vma *vma; + u64 hole_size; + int err; + + hole_size = (hole_end - hole_start) >> size; + if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) + hole_size = KMALLOC_MAX_SIZE / sizeof(u32); + count = hole_size; + do { + count >>= 1; + order = i915_random_order(count, &prng); + } while (!order && count); + if (!order) + break; + + /* Ignore allocation failures (i.e. don't report them as + * a test failure) as we are purposefully allocating very + * large objects without checking that we have sufficient + * memory. We expect to hit -ENOMEM. + */ + + obj = fake_dma_object(i915, BIT_ULL(size)); + if (IS_ERR(obj)) { + kfree(order); + break; + } + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + GEM_BUG_ON(vma->size != BIT_ULL(size)); + + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx + %llx in hole [%llx - %llx], with err=%d\n", + __func__, + addr, BIT_ULL(size), + hole_start, hole_end, + err); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, BIT_ULL(size)); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + GEM_BUG_ON(err); + + if (igt_timeout(end_time, + "%s timed out after %d/%d\n", + __func__, n, count)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); +err_obj: + i915_gem_object_put(obj); + kfree(order); + if (err) + return err; + } + + return 0; +} + +static int __shrink_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + struct drm_i915_gem_object *obj; + unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + unsigned int order = 12; + LIST_HEAD(objects); + int err = 0; + u64 addr; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (addr = hole_start; addr < hole_end; ) { + struct i915_vma *vma; + u64 size = BIT_ULL(order++); + + size = min(size, hole_end - addr); + obj = fake_dma_object(i915, size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + break; + } + + GEM_BUG_ON(vma->size != size); + + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx + %llx in hole [%llx - %llx], with err=%d\n", + __func__, addr, size, hole_start, hole_end, err); + break; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, size); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + break; + } + + i915_vma_unpin(vma); + addr += size; + + if (igt_timeout(end_time, + "%s timed out at ofset %llx [%llx - %llx]\n", + __func__, addr, hole_start, hole_end)) { + err = -EINTR; + break; + } + } + + close_object_list(&objects, vm); + return err; +} + +static int shrink_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + unsigned long prime; + int err; + + vm->fault_attr.probability = 999; + atomic_set(&vm->fault_attr.times, -1); + + for_each_prime_number_from(prime, 0, ULONG_MAX - 1) { + vm->fault_attr.interval = prime; + err = __shrink_hole(i915, vm, hole_start, hole_end, end_time); + if (err) + break; + } + + memset(&vm->fault_attr, 0, sizeof(vm->fault_attr)); + + return err; +} + +static int exercise_ppgtt(struct drm_i915_private *dev_priv, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct drm_file *file; + struct i915_hw_ppgtt *ppgtt; + IGT_TIMEOUT(end_time); + int err; + + if (!USES_FULL_PPGTT(dev_priv)) + return 0; + + file = mock_file(dev_priv); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&dev_priv->drm.struct_mutex); + ppgtt = i915_ppgtt_create(dev_priv, file->driver_priv, "mock"); + if (IS_ERR(ppgtt)) { + err = PTR_ERR(ppgtt); + goto out_unlock; + } + GEM_BUG_ON(offset_in_page(ppgtt->base.total)); + GEM_BUG_ON(ppgtt->base.closed); + + err = func(dev_priv, &ppgtt->base, 0, ppgtt->base.total, end_time); + + i915_ppgtt_close(&ppgtt->base); + i915_ppgtt_put(ppgtt); +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + + mock_file_free(dev_priv, file); + return err; +} + +static int igt_ppgtt_fill(void *arg) +{ + return exercise_ppgtt(arg, fill_hole); +} + +static int igt_ppgtt_walk(void *arg) +{ + return exercise_ppgtt(arg, walk_hole); +} + +static int igt_ppgtt_pot(void *arg) +{ + return exercise_ppgtt(arg, pot_hole); +} + +static int igt_ppgtt_drunk(void *arg) +{ + return exercise_ppgtt(arg, drunk_hole); +} + +static int igt_ppgtt_lowlevel(void *arg) +{ + return exercise_ppgtt(arg, lowlevel_hole); +} + +static int igt_ppgtt_shrink(void *arg) +{ + return exercise_ppgtt(arg, shrink_hole); +} + +static int sort_holes(void *priv, struct list_head *A, struct list_head *B) +{ + struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); + struct drm_mm_node *b = list_entry(B, typeof(*b), hole_stack); + + if (a->start < b->start) + return -1; + else + return 1; +} + +static int exercise_ggtt(struct drm_i915_private *i915, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + u64 hole_start, hole_end, last = 0; + struct drm_mm_node *node; + IGT_TIMEOUT(end_time); + int err; + + mutex_lock(&i915->drm.struct_mutex); +restart: + list_sort(NULL, &ggtt->base.mm.hole_stack, sort_holes); + drm_mm_for_each_hole(node, &ggtt->base.mm, hole_start, hole_end) { + if (hole_start < last) + continue; + + if (ggtt->base.mm.color_adjust) + ggtt->base.mm.color_adjust(node, 0, + &hole_start, &hole_end); + if (hole_start >= hole_end) + continue; + + err = func(i915, &ggtt->base, hole_start, hole_end, end_time); + if (err) + break; + + /* As we have manipulated the drm_mm, the list may be corrupt */ + last = hole_end; + goto restart; + } + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +static int igt_ggtt_fill(void *arg) +{ + return exercise_ggtt(arg, fill_hole); +} + +static int igt_ggtt_walk(void *arg) +{ + return exercise_ggtt(arg, walk_hole); +} + +static int igt_ggtt_pot(void *arg) +{ + return exercise_ggtt(arg, pot_hole); +} + +static int igt_ggtt_drunk(void *arg) +{ + return exercise_ggtt(arg, drunk_hole); +} + +static int igt_ggtt_lowlevel(void *arg) +{ + return exercise_ggtt(arg, lowlevel_hole); +} + +static int igt_ggtt_page(void *arg) +{ + const unsigned int count = PAGE_SIZE/sizeof(u32); + I915_RND_STATE(prng); + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_i915_gem_object *obj; + struct drm_mm_node tmp; + unsigned int *order, n; + int err; + + mutex_lock(&i915->drm.struct_mutex); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_free; + + memset(&tmp, 0, sizeof(tmp)); + err = drm_mm_insert_node_in_range(&ggtt->base.mm, &tmp, + 1024 * PAGE_SIZE, 0, + I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + if (err) + goto out_unpin; + + order = i915_random_order(count, &prng); + if (!order) { + err = -ENOMEM; + goto out_remove; + } + + for (n = 0; n < count; n++) { + u64 offset = tmp.start + order[n] * PAGE_SIZE; + u32 __iomem *vaddr; + + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); + + vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + iowrite32(n, vaddr + n); + io_mapping_unmap_atomic(vaddr); + + wmb(); + ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); + } + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + u64 offset = tmp.start + order[n] * PAGE_SIZE; + u32 __iomem *vaddr; + u32 val; + + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); + + vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + val = ioread32(vaddr + n); + io_mapping_unmap_atomic(vaddr); + + ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); + + if (val != n) { + pr_err("insert page failed: found %d, expected %d\n", + val, n); + err = -EINVAL; + break; + } + } + + kfree(order); +out_remove: + drm_mm_remove_node(&tmp); +out_unpin: + i915_gem_object_unpin_pages(obj); +out_free: + i915_gem_object_put(obj); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static void track_vma_bind(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + + obj->bind_count++; /* track for eviction later */ + __i915_gem_object_pin_pages(obj); + + vma->pages = obj->mm.pages; + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); +} + +static int exercise_mock(struct drm_i915_private *i915, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct i915_gem_context *ctx; + struct i915_hw_ppgtt *ppgtt; + IGT_TIMEOUT(end_time); + int err; + + ctx = mock_context(i915, "mock"); + if (!ctx) + return -ENOMEM; + + ppgtt = ctx->ppgtt; + GEM_BUG_ON(!ppgtt); + + err = func(i915, &ppgtt->base, 0, ppgtt->base.total, end_time); + + mock_context_close(ctx); + return err; +} + +static int igt_mock_fill(void *arg) +{ + return exercise_mock(arg, fill_hole); +} + +static int igt_mock_walk(void *arg) +{ + return exercise_mock(arg, walk_hole); +} + +static int igt_mock_pot(void *arg) +{ + return exercise_mock(arg, pot_hole); +} + +static int igt_mock_drunk(void *arg) +{ + return exercise_mock(arg, drunk_hole); +} + +static int igt_gtt_reserve(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + LIST_HEAD(objects); + u64 total; + int err; + + /* i915_gem_gtt_reserve() tries to reserve the precise range + * for the node, and evicts if it has to. So our test checks that + * it can give us the requsted space and prevent overlaps. + */ + + /* Start by filling the GGTT */ + for (total = 0; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + total, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != total || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 1) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + total, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + + /* Now we start forcing evictions */ + for (total = I915_GTT_PAGE_SIZE; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + total, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != total || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 2) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + total, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + + /* And then try at random */ + list_for_each_entry_safe(obj, on, &objects, st_link) { + struct i915_vma *vma; + u64 offset; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("i915_vma_unbind failed with err=%d!\n", err); + goto out; + } + + offset = random_offset(0, i915->ggtt.base.total, + 2*I915_GTT_PAGE_SIZE, + I915_GTT_MIN_ALIGNMENT); + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + offset, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != offset || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 3) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + offset, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + +out: + list_for_each_entry_safe(obj, on, &objects, st_link) { + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + return err; +} + +static int igt_gtt_insert(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + struct drm_mm_node tmp = {}; + const struct invalid_insert { + u64 size; + u64 alignment; + u64 start, end; + } invalid_insert[] = { + { + i915->ggtt.base.total + I915_GTT_PAGE_SIZE, 0, + 0, i915->ggtt.base.total, + }, + { + 2*I915_GTT_PAGE_SIZE, 0, + 0, I915_GTT_PAGE_SIZE, + }, + { + -(u64)I915_GTT_PAGE_SIZE, 0, + 0, 4*I915_GTT_PAGE_SIZE, + }, + { + -(u64)2*I915_GTT_PAGE_SIZE, 2*I915_GTT_PAGE_SIZE, + 0, 4*I915_GTT_PAGE_SIZE, + }, + { + I915_GTT_PAGE_SIZE, I915_GTT_MIN_ALIGNMENT << 1, + I915_GTT_MIN_ALIGNMENT, I915_GTT_MIN_ALIGNMENT << 1, + }, + {} + }, *ii; + LIST_HEAD(objects); + u64 total; + int err; + + /* i915_gem_gtt_insert() tries to allocate some free space in the GTT + * to the node, evicting if required. + */ + + /* Check a couple of obviously invalid requests */ + for (ii = invalid_insert; ii->size; ii++) { + err = i915_gem_gtt_insert(&i915->ggtt.base, &tmp, + ii->size, ii->alignment, + I915_COLOR_UNEVICTABLE, + ii->start, ii->end, + 0); + if (err != -ENOSPC) { + pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n", + ii->size, ii->alignment, ii->start, ii->end, + err); + return -EINVAL; + } + } + + /* Start by filling the GGTT */ + for (total = 0; + total + I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err == -ENOSPC) { + /* maxed out the GGTT space */ + i915_gem_object_put(obj); + break; + } + if (err) { + pr_err("i915_gem_gtt_insert (pass 1) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + __i915_vma_pin(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + } + + list_for_each_entry(obj, &objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + if (!drm_mm_node_allocated(&vma->node)) { + pr_err("VMA was unexpectedly evicted!\n"); + err = -EINVAL; + goto out; + } + + __i915_vma_unpin(vma); + } + + /* If we then reinsert, we should find the same hole */ + list_for_each_entry_safe(obj, on, &objects, st_link) { + struct i915_vma *vma; + u64 offset; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + offset = vma->node.start; + + err = i915_vma_unbind(vma); + if (err) { + pr_err("i915_vma_unbind failed with err=%d!\n", err); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err) { + pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != offset) { + pr_err("i915_gem_gtt_insert did not return node to its previous location (the only hole), expected address %llx, found %llx\n", + offset, vma->node.start); + err = -EINVAL; + goto out; + } + } + + /* And then force evictions */ + for (total = 0; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err) { + pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + } + +out: + list_for_each_entry_safe(obj, on, &objects, st_link) { + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + return err; +} + +int i915_gem_gtt_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_drunk), + SUBTEST(igt_mock_walk), + SUBTEST(igt_mock_pot), + SUBTEST(igt_mock_fill), + SUBTEST(igt_gtt_reserve), + SUBTEST(igt_gtt_insert), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} + +int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ppgtt_alloc), + SUBTEST(igt_ppgtt_lowlevel), + SUBTEST(igt_ppgtt_drunk), + SUBTEST(igt_ppgtt_walk), + SUBTEST(igt_ppgtt_pot), + SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ppgtt_shrink), + SUBTEST(igt_ggtt_lowlevel), + SUBTEST(igt_ggtt_drunk), + SUBTEST(igt_ggtt_walk), + SUBTEST(igt_ggtt_pot), + SUBTEST(igt_ggtt_fill), + SUBTEST(igt_ggtt_page), + }; + + GEM_BUG_ON(offset_in_page(i915->ggtt.base.total)); + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c new file mode 100644 index 000000000000..67d82bf1407f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -0,0 +1,600 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "huge_gem_object.h" + +static int igt_gem_object(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err = -ENOMEM; + + /* Basic test to ensure we can create an object */ + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_err("i915_gem_object_create failed, err=%d\n", err); + goto out; + } + + err = 0; + i915_gem_object_put(obj); +out: + return err; +} + +static int igt_phys_object(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err; + + /* Create an object and bind it to a contiguous set of physical pages, + * i.e. exercise the i915_gem_object_phys API. + */ + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_err("i915_gem_object_create failed, err=%d\n", err); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_attach_phys(obj, PAGE_SIZE); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); + goto out_obj; + } + + if (obj->ops != &i915_gem_phys_ops) { + pr_err("i915_gem_object_attach_phys did not create a phys object\n"); + err = -EINVAL; + goto out_obj; + } + + if (!atomic_read(&obj->mm.pages_pin_count)) { + pr_err("i915_gem_object_attach_phys did not pin its phys pages\n"); + err = -EINVAL; + goto out_obj; + } + + /* Make the object dirty so that put_pages must do copy back the data */ + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_set_to_gtt_domain(obj, true); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n", + err); + goto out_obj; + } + +out_obj: + i915_gem_object_put(obj); +out: + return err; +} + +static int igt_gem_huge(void *arg) +{ + const unsigned int nreal = 509; /* just to be awkward */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + unsigned int n; + int err; + + /* Basic sanitycheck of our huge fake object allocation */ + + obj = huge_gem_object(i915, + nreal * PAGE_SIZE, + i915->ggtt.base.total + PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + for (n = 0; n < obj->base.size / PAGE_SIZE; n++) { + if (i915_gem_object_get_page(obj, n) != + i915_gem_object_get_page(obj, n % nreal)) { + pr_err("Page lookup mismatch at index %u [%u]\n", + n, n % nreal); + err = -EINVAL; + goto out_unpin; + } + } + +out_unpin: + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + +struct tile { + unsigned int width; + unsigned int height; + unsigned int stride; + unsigned int size; + unsigned int tiling; + unsigned int swizzle; +}; + +static u64 swizzle_bit(unsigned int bit, u64 offset) +{ + return (offset & BIT_ULL(bit)) >> (bit - 6); +} + +static u64 tiled_offset(const struct tile *tile, u64 v) +{ + u64 x, y; + + if (tile->tiling == I915_TILING_NONE) + return v; + + y = div64_u64_rem(v, tile->stride, &x); + v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height; + + if (tile->tiling == I915_TILING_X) { + v += y * tile->width; + v += div64_u64_rem(x, tile->width, &x) << tile->size; + v += x; + } else { + const unsigned int ytile_span = 16; + const unsigned int ytile_height = 32 * ytile_span; + + v += y * ytile_span; + v += div64_u64_rem(x, ytile_span, &x) * ytile_height; + v += x; + } + + switch (tile->swizzle) { + case I915_BIT_6_SWIZZLE_9: + v ^= swizzle_bit(9, v); + break; + case I915_BIT_6_SWIZZLE_9_10: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); + break; + case I915_BIT_6_SWIZZLE_9_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); + break; + case I915_BIT_6_SWIZZLE_9_10_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); + break; + } + + return v; +} + +static int check_partial_mapping(struct drm_i915_gem_object *obj, + const struct tile *tile, + unsigned long end_time) +{ + const unsigned int nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_vma *vma; + unsigned long page; + int err; + + if (igt_timeout(end_time, + "%s: timed out before tiling=%d stride=%d\n", + __func__, tile->tiling, tile->stride)) + return -EINTR; + + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); + if (err) + return err; + + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); + + for_each_prime_number_from(page, 1, npages) { + struct i915_ggtt_view view = + compute_partial_view(obj, page, MIN_CHUNK_PAGES); + u32 __iomem *io; + struct page *p; + unsigned int n; + u64 offset; + u32 *cpu; + + GEM_BUG_ON(view.partial.size > nreal); + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + pr_err("Failed to pin partial view: offset=%lu\n", + page); + return PTR_ERR(vma); + } + + n = page - view.partial.offset; + GEM_BUG_ON(n >= view.partial.size); + + io = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(io)) { + pr_err("Failed to iomap partial view: offset=%lu\n", + page); + return PTR_ERR(io); + } + + err = i915_vma_get_fence(vma); + if (err) { + pr_err("Failed to get fence for partial view: offset=%lu\n", + page); + i915_vma_unpin_iomap(vma); + return err; + } + + iowrite32(page, io + n * PAGE_SIZE/sizeof(*io)); + i915_vma_unpin_iomap(vma); + + offset = tiled_offset(tile, page << PAGE_SHIFT); + if (offset >= obj->base.size) + continue; + + i915_gem_object_flush_gtt_write_domain(obj); + + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + cpu = kmap(p) + offset_in_page(offset); + drm_clflush_virt_range(cpu, sizeof(*cpu)); + if (*cpu != (u32)page) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile_row_pages(obj), + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, + offset >> PAGE_SHIFT, + (unsigned int)offset_in_page(offset), + offset, + (u32)page, *cpu); + err = -EINVAL; + } + *cpu = 0; + drm_clflush_virt_range(cpu, sizeof(*cpu)); + kunmap(p); + if (err) + return err; + } + + return 0; +} + +static int igt_partial_tiling(void *arg) +{ + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int tiling; + int err; + + /* We want to check the page mapping and fencing of a large object + * mmapped through the GTT. The object we create is larger than can + * possibly be mmaped as a whole, and so we must use partial GGTT vma. + * We then check that a write through each partial GGTT vma ends up + * in the right set of pages within the object, and with the expected + * tiling, which we verify by manual swizzling. + */ + + obj = huge_gem_object(i915, + nreal << PAGE_SHIFT, + (1 + next_prime_number(i915->ggtt.base.total >> PAGE_SHIFT)) << PAGE_SHIFT); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + + if (1) { + IGT_TIMEOUT(end); + struct tile tile; + + tile.height = 1; + tile.width = 1; + tile.size = 0; + tile.stride = 0; + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; + tile.tiling = I915_TILING_NONE; + + err = check_partial_mapping(obj, &tile, end); + if (err && err != -EINTR) + goto out_unlock; + } + + for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) { + IGT_TIMEOUT(end); + unsigned int max_pitch; + unsigned int pitch; + struct tile tile; + + tile.tiling = tiling; + switch (tiling) { + case I915_TILING_X: + tile.swizzle = i915->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + tile.swizzle = i915->mm.bit_6_swizzle_y; + break; + } + + if (tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN || + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + continue; + + if (INTEL_GEN(i915) <= 2) { + tile.height = 16; + tile.width = 128; + tile.size = 11; + } else if (tile.tiling == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(i915)) { + tile.height = 32; + tile.width = 128; + tile.size = 12; + } else { + tile.height = 8; + tile.width = 512; + tile.size = 12; + } + + if (INTEL_GEN(i915) < 4) + max_pitch = 8192 / tile.width; + else if (INTEL_GEN(i915) < 7) + max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; + else + max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; + + for (pitch = max_pitch; pitch; pitch >>= 1) { + tile.stride = tile.width * pitch; + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + + if (pitch > 2 && INTEL_GEN(i915) >= 4) { + tile.stride = tile.width * (pitch - 1); + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + + if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { + tile.stride = tile.width * (pitch + 1); + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + } + + if (INTEL_GEN(i915) >= 4) { + for_each_prime_number(pitch, max_pitch) { + tile.stride = tile.width * pitch; + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + } + +next_tiling: ; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + +static int make_obj_busy(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + i915_vma_move_to_active(vma, rq, 0); + i915_add_request(rq); + + i915_gem_object_set_active_reference(obj); + i915_vma_unpin(vma); + return 0; +} + +static bool assert_mmap_offset(struct drm_i915_private *i915, + unsigned long size, + int expected) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_create_mmap_offset(obj); + i915_gem_object_put(obj); + + return err == expected; +} + +static int igt_mmap_offset_exhaustion(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; + struct drm_i915_gem_object *obj; + struct drm_mm_node resv, *hole; + u64 hole_start, hole_end; + int loop, err; + + /* Trim the device mmap space to only a page */ + memset(&resv, 0, sizeof(resv)); + drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { + resv.start = hole_start; + resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ + err = drm_mm_reserve_node(mm, &resv); + if (err) { + pr_err("Failed to trim VMA manager, err=%d\n", err); + return err; + } + break; + } + + /* Just fits! */ + if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { + pr_err("Unable to insert object into single page hole\n"); + err = -EINVAL; + goto out; + } + + /* Too large */ + if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) { + pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); + err = -EINVAL; + goto out; + } + + /* Fill the hole, further allocation attempts should then fail */ + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_create_mmap_offset(obj); + if (err) { + pr_err("Unable to insert object into reclaimed hole\n"); + goto err_obj; + } + + if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { + pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); + err = -EINVAL; + goto err_obj; + } + + i915_gem_object_put(obj); + + /* Now fill with busy dead objects that we expect to reap */ + for (loop = 0; loop < 3; loop++) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + err = make_obj_busy(obj); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("[loop %d] Failed to busy the object\n", loop); + goto err_obj; + } + + GEM_BUG_ON(!i915_gem_object_is_active(obj)); + err = i915_gem_object_create_mmap_offset(obj); + if (err) { + pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n", + loop, err); + goto out; + } + } + +out: + drm_mm_remove_node(&resv); + return err; +err_obj: + i915_gem_object_put(obj); + goto out; +} + +int i915_gem_object_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_object), + SUBTEST(igt_phys_object), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_unref(&i915->drm); + return err; +} + +int i915_gem_object_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_huge), + SUBTEST(igt_partial_tiling), + SUBTEST(igt_mmap_offset_exhaustion), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c new file mode 100644 index 000000000000..926b24c117d6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -0,0 +1,882 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" + +#include "mock_context.h" +#include "mock_gem_device.h" + +static int igt_add_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -ENOMEM; + + /* Basic preliminary test to create a request and let it loose! */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], + i915->kernel_context, + HZ / 10); + if (!request) + goto out_unlock; + + i915_add_request(request); + + err = 0; +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_wait_request(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -EINVAL; + + /* Submit a request, then wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) != -ETIME) { + pr_err("request wait succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_gem_request_completed(request)) { + pr_err("request completed before submit!!\n"); + goto out_unlock; + } + + i915_add_request(request); + + if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); + goto out_unlock; + } + + if (i915_gem_request_completed(request)) { + pr_err("request completed immediately!\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { + pr_err("request wait succeeded (expected timeout!)\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out!\n"); + goto out_unlock; + } + + if (!i915_gem_request_completed(request)) { + pr_err("request not complete after waiting!\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out when already complete!\n"); + goto out_unlock; + } + + err = 0; +out_unlock: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_fence_wait(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -EINVAL; + + /* Submit a request, treat it as a fence and wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_locked; + } + mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */ + + if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { + pr_err("fence wait success before submit (expected timeout)!\n"); + goto out_device; + } + + mutex_lock(&i915->drm.struct_mutex); + i915_add_request(request); + mutex_unlock(&i915->drm.struct_mutex); + + if (dma_fence_is_signaled(&request->fence)) { + pr_err("fence signaled immediately!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { + pr_err("fence wait success after submit (expected timeout)!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out (expected success)!\n"); + goto out_device; + } + + if (!dma_fence_is_signaled(&request->fence)) { + pr_err("fence unsignaled after waiting!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out when complete (expected success)!\n"); + goto out_device; + } + + err = 0; +out_device: + mutex_lock(&i915->drm.struct_mutex); +out_locked: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_request_rewind(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request, *vip; + struct i915_gem_context *ctx[2]; + int err = -EINVAL; + + mutex_lock(&i915->drm.struct_mutex); + ctx[0] = mock_context(i915, "A"); + request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); + if (!request) { + err = -ENOMEM; + goto err_context_0; + } + + i915_gem_request_get(request); + i915_add_request(request); + + ctx[1] = mock_context(i915, "B"); + vip = mock_request(i915->engine[RCS], ctx[1], 0); + if (!vip) { + err = -ENOMEM; + goto err_context_1; + } + + /* Simulate preemption by manual reordering */ + if (!mock_cancel_request(request)) { + pr_err("failed to cancel request (already executed)!\n"); + i915_add_request(vip); + goto err_context_1; + } + i915_gem_request_get(vip); + i915_add_request(vip); + request->engine->submit_request(request); + + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_wait_request(vip, 0, HZ) == -ETIME) { + pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", + vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); + goto err; + } + + if (i915_gem_request_completed(request)) { + pr_err("low priority request already completed\n"); + goto err; + } + + err = 0; +err: + i915_gem_request_put(vip); + mutex_lock(&i915->drm.struct_mutex); +err_context_1: + mock_context_close(ctx[1]); + i915_gem_request_put(request); +err_context_0: + mock_context_close(ctx[0]); + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_gem_request_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_add_request), + SUBTEST(igt_wait_request), + SUBTEST(igt_fence_wait), + SUBTEST(igt_request_rewind), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + drm_dev_unref(&i915->drm); + + return err; +} + +struct live_test { + struct drm_i915_private *i915; + const char *func; + const char *name; + + unsigned int reset_count; +}; + +static int begin_live_test(struct live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name) +{ + int err; + + t->i915 = i915; + t->func = func; + t->name = name; + + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } + + i915_gem_retire_requests(i915); + + i915->gpu_error.missed_irq_rings = 0; + t->reset_count = i915_reset_count(&i915->gpu_error); + + return 0; +} + +static int end_live_test(struct live_test *t) +{ + struct drm_i915_private *i915 = t->i915; + + if (wait_for(intel_engines_are_idle(i915), 1)) { + pr_err("%s(%s): GPU not idle\n", t->func, t->name); + return -EIO; + } + + if (t->reset_count != i915_reset_count(&i915->gpu_error)) { + pr_err("%s(%s): GPU was reset %d times!\n", + t->func, t->name, + i915_reset_count(&i915->gpu_error) - t->reset_count); + return -EIO; + } + + if (i915->gpu_error.missed_irq_rings) { + pr_err("%s(%s): Missed interrupts on engines %lx\n", + t->func, t->name, i915->gpu_error.missed_irq_rings); + return -EIO; + } + + return 0; +} + +static int live_nop_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct drm_i915_gem_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_unlock; + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_unlock; + } + + /* This space is left intentionally blank. + * + * We do not actually want to perform any + * action with this request, we just want + * to measure the latency in allocation + * and submission of our breadcrumbs - + * ensuring that the bare request is sufficient + * for the system to work (i.e. proper HEAD + * tracking of the rings, interrupt handling, + * etc). It also gives us the lowest bounds + * for latency. + */ + + i915_add_request(request); + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_unlock; + + pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static struct i915_vma *empty_batch(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static struct drm_i915_gem_request * +empty_request(struct intel_engine_cs *engine, + struct i915_vma *batch) +{ + struct drm_i915_gem_request *request; + int err; + + request = i915_gem_request_alloc(engine, + engine->i915->kernel_context); + if (IS_ERR(request)) + return request; + + err = engine->emit_flush(request, EMIT_INVALIDATE); + if (err) + goto out_request; + + err = i915_switch_context(request); + if (err) + goto out_request; + + err = engine->emit_bb_start(request, + batch->node.start, + batch->node.size, + I915_DISPATCH_SECURE); + if (err) + goto out_request; + +out_request: + __i915_add_request(request, err == 0); + return err ? ERR_PTR(err) : request; +} + +static int live_empty_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + struct i915_vma *batch; + unsigned int id; + int err = 0; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + batch = empty_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct drm_i915_gem_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_batch; + + /* Warmup / preload */ + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_batch; + + pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_batch: + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static struct i915_vma *recursive_batch(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx = i915->kernel_context; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(i915); + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + goto err; + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + if (gen >= 8) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = upper_32_bits(vma->node.start); + } else if (gen >= 6) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; + *cmd++ = lower_32_bits(vma->node.start); + } else if (gen >= 4) { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cmd++ = lower_32_bits(vma->node.start); + } else { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | 1; + *cmd++ = lower_32_bits(vma->node.start); + } + *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ + + wmb(); + i915_gem_object_unpin_map(obj); + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int recursive_batch_resolve(struct i915_vma *batch) +{ + u32 *cmd; + + cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); + + *cmd = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_object_unpin_map(batch->obj); + + return 0; +} + +static int live_all_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct drm_i915_gem_request *request[I915_NUM_ENGINES]; + struct i915_vma *batch; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines simultaneously. We + * send a recursive batch to each engine - checking that we don't + * block doing so, and that they don't complete too soon. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch, err=%d\n", __func__, err); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + request[id] = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed with err=%d\n", + __func__, err); + goto out_request; + } + + err = engine->emit_flush(request[id], EMIT_INVALIDATE); + GEM_BUG_ON(err); + + err = i915_switch_context(request[id]); + GEM_BUG_ON(err); + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + if (!i915_gem_object_has_active_reference(batch->obj)) { + i915_gem_object_get(batch->obj); + i915_gem_object_set_active_reference(batch->obj); + } + + i915_vma_move_to_active(batch, request[id], 0); + i915_gem_request_get(request[id]); + i915_add_request(request[id]); + } + + for_each_engine(engine, i915, id) { + if (i915_gem_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + } + + err = recursive_batch_resolve(batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); + goto out_request; + } + + for_each_engine(engine, i915, id) { + long timeout; + + timeout = i915_wait_request(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_gem_request_completed(request[id])); + i915_gem_request_put(request[id]); + request[id] = NULL; + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) + if (request[id]) + i915_gem_request_put(request[id]); + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_sequential_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request[I915_NUM_ENGINES] = {}; + struct drm_i915_gem_request *prev = NULL; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines sequentially, such + * that each successive request waits for the earlier ones. This + * tests that we don't execute requests out of order, even though + * they are running on independent engines. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + for_each_engine(engine, i915, id) { + struct i915_vma *batch; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch for %s, err=%d\n", + __func__, engine->name, err); + goto out_unlock; + } + + request[id] = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + if (prev) { + err = i915_gem_request_await_dma_fence(request[id], + &prev->fence); + if (err) { + i915_add_request(request[id]); + pr_err("%s: Request await failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + } + + err = engine->emit_flush(request[id], EMIT_INVALIDATE); + GEM_BUG_ON(err); + + err = i915_switch_context(request[id]); + GEM_BUG_ON(err); + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + i915_vma_move_to_active(batch, request[id], 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_get(batch); + + i915_gem_request_get(request[id]); + i915_add_request(request[id]); + + prev = request[id]; + } + + for_each_engine(engine, i915, id) { + long timeout; + + if (i915_gem_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + + err = recursive_batch_resolve(request[id]->batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", + __func__, err); + goto out_request; + } + + timeout = i915_wait_request(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_gem_request_completed(request[id])); + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) { + u32 *cmd; + + if (!request[id]) + break; + + cmd = i915_gem_object_pin_map(request[id]->batch->obj, + I915_MAP_WC); + if (!IS_ERR(cmd)) { + *cmd = MI_BATCH_BUFFER_END; + wmb(); + i915_gem_object_unpin_map(request[id]->batch->obj); + } + + i915_vma_put(request[id]->batch); + i915_gem_request_put(request[id]); + } +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_gem_request_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_nop_request), + SUBTEST(live_all_engines), + SUBTEST(live_sequential_engines), + SUBTEST(live_empty_request), + }; + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h new file mode 100644 index 000000000000..18b174d855ca --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -0,0 +1,19 @@ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as subtest__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * The function should be of type int function(void). It may be conditionally + * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * + * Tests are executed in order by igt/drv_selftest + */ +selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ +selftest(uncore, intel_uncore_live_selftests) +selftest(requests, i915_gem_request_live_selftests) +selftest(objects, i915_gem_object_live_selftests) +selftest(dmabuf, i915_gem_dmabuf_live_selftests) +selftest(coherency, i915_gem_coherency_live_selftests) +selftest(gtt, i915_gem_gtt_live_selftests) +selftest(contexts, i915_gem_context_live_selftests) +selftest(hangcheck, intel_hangcheck_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h new file mode 100644 index 000000000000..be9a9ebf5692 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -0,0 +1,20 @@ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as subtest__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * The function should be of type int function(void). It may be conditionally + * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * + * Tests are executed in order by igt/drv_selftest + */ +selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ +selftest(scatterlist, scatterlist_mock_selftests) +selftest(uncore, intel_uncore_mock_selftests) +selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) +selftest(requests, i915_gem_request_mock_selftests) +selftest(objects, i915_gem_object_mock_selftests) +selftest(dmabuf, i915_gem_dmabuf_mock_selftests) +selftest(vma, i915_vma_mock_selftests) +selftest(evict, i915_gem_evict_mock_selftests) +selftest(gtt, i915_gem_gtt_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c new file mode 100644 index 000000000000..c17c83c30637 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -0,0 +1,63 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include "i915_random.h" + +static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) +{ + return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); +} + +void i915_random_reorder(unsigned int *order, unsigned int count, + struct rnd_state *state) +{ + unsigned int i, j; + + for (i = 0; i < count; i++) { + BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32)); + j = i915_prandom_u32_max_state(count, state); + swap(order[i], order[j]); + } +} + +unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) +{ + unsigned int *order, i; + + order = kmalloc_array(count, sizeof(*order), GFP_TEMPORARY); + if (!order) + return order; + + for (i = 0; i < count; i++) + order[i] = i; + + i915_random_reorder(order, count, state); + return order; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h new file mode 100644 index 000000000000..b9c334ce6cd9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -0,0 +1,50 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_SELFTESTS_RANDOM_H__ +#define __I915_SELFTESTS_RANDOM_H__ + +#include <linux/random.h> + +#include "../i915_selftest.h" + +#define I915_RND_STATE_INITIALIZER(x) ({ \ + struct rnd_state state__; \ + prandom_seed_state(&state__, (x)); \ + state__; \ +}) + +#define I915_RND_STATE(name__) \ + struct rnd_state name__ = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed) + +#define I915_RND_SUBSTATE(name__, parent__) \ + struct rnd_state name__ = I915_RND_STATE_INITIALIZER(prandom_u32_state(&(parent__))) + +unsigned int *i915_random_order(unsigned int count, + struct rnd_state *state); +void i915_random_reorder(unsigned int *order, + unsigned int count, + struct rnd_state *state); + +#endif /* !__I915_SELFTESTS_RANDOM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c new file mode 100644 index 000000000000..6ba3abb10c6f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -0,0 +1,250 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/random.h> + +#include "../i915_drv.h" +#include "../i915_selftest.h" + +struct i915_selftest i915_selftest __read_mostly = { + .timeout_ms = 1000, +}; + +int i915_mock_sanitycheck(void) +{ + pr_info(DRIVER_NAME ": %s() - ok!\n", __func__); + return 0; +} + +int i915_live_sanitycheck(struct drm_i915_private *i915) +{ + pr_info("%s: %s() - ok!\n", i915->drm.driver->name, __func__); + return 0; +} + +enum { +#define selftest(name, func) mock_##name, +#include "i915_mock_selftests.h" +#undef selftest +}; + +enum { +#define selftest(name, func) live_##name, +#include "i915_live_selftests.h" +#undef selftest +}; + +struct selftest { + bool enabled; + const char *name; + union { + int (*mock)(void); + int (*live)(struct drm_i915_private *); + }; +}; + +#define selftest(n, f) [mock_##n] = { .name = #n, .mock = f }, +static struct selftest mock_selftests[] = { +#include "i915_mock_selftests.h" +}; +#undef selftest + +#define selftest(n, f) [live_##n] = { .name = #n, .live = f }, +static struct selftest live_selftests[] = { +#include "i915_live_selftests.h" +}; +#undef selftest + +/* Embed the line number into the parameter name so that we can order tests */ +#define selftest(n, func) selftest_0(n, func, param(n)) +#define param(n) __PASTE(igt__, __PASTE(__LINE__, __mock_##n)) +#define selftest_0(n, func, id) \ +module_param_named(id, mock_selftests[mock_##n].enabled, bool, 0400); +#include "i915_mock_selftests.h" +#undef selftest_0 +#undef param + +#define param(n) __PASTE(igt__, __PASTE(__LINE__, __live_##n)) +#define selftest_0(n, func, id) \ +module_param_named(id, live_selftests[live_##n].enabled, bool, 0400); +#include "i915_live_selftests.h" +#undef selftest_0 +#undef param +#undef selftest + +static void set_default_test_all(struct selftest *st, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) + if (st[i].enabled) + return; + + for (i = 0; i < count; i++) + st[i].enabled = true; +} + +static int __run_selftests(const char *name, + struct selftest *st, + unsigned int count, + void *data) +{ + int err = 0; + + while (!i915_selftest.random_seed) + i915_selftest.random_seed = get_random_int(); + + i915_selftest.timeout_jiffies = + i915_selftest.timeout_ms ? + msecs_to_jiffies_timeout(i915_selftest.timeout_ms) : + MAX_SCHEDULE_TIMEOUT; + + set_default_test_all(st, count); + + pr_info(DRIVER_NAME ": Performing %s selftests with st_random_seed=0x%x st_timeout=%u\n", + name, i915_selftest.random_seed, i915_selftest.timeout_ms); + + /* Tests are listed in order in i915_*_selftests.h */ + for (; count--; st++) { + if (!st->enabled) + continue; + + cond_resched(); + if (signal_pending(current)) + return -EINTR; + + pr_debug(DRIVER_NAME ": Running %s\n", st->name); + if (data) + err = st->live(data); + else + err = st->mock(); + if (err == -EINTR && !signal_pending(current)) + err = 0; + if (err) + break; + } + + if (WARN(err > 0 || err == -ENOTTY, + "%s returned %d, conflicting with selftest's magic values!\n", + st->name, err)) + err = -1; + + return err; +} + +#define run_selftests(x, data) \ + __run_selftests(#x, x##_selftests, ARRAY_SIZE(x##_selftests), data) + +int i915_mock_selftests(void) +{ + int err; + + if (!i915_selftest.mock) + return 0; + + err = run_selftests(mock, NULL); + if (err) { + i915_selftest.mock = err; + return err; + } + + if (i915_selftest.mock < 0) { + i915_selftest.mock = -ENOTTY; + return 1; + } + + return 0; +} + +int i915_live_selftests(struct pci_dev *pdev) +{ + int err; + + if (!i915_selftest.live) + return 0; + + err = run_selftests(live, to_i915(pci_get_drvdata(pdev))); + if (err) { + i915_selftest.live = err; + return err; + } + + if (i915_selftest.live < 0) { + i915_selftest.live = -ENOTTY; + return 1; + } + + return 0; +} + +int __i915_subtests(const char *caller, + const struct i915_subtest *st, + unsigned int count, + void *data) +{ + int err; + + for (; count--; st++) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + + pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name); + err = st->func(data); + if (err && err != -EINTR) { + pr_err(DRIVER_NAME "/%s: %s failed with error %d\n", + caller, st->name, err); + return err; + } + } + + return 0; +} + +bool __igt_timeout(unsigned long timeout, const char *fmt, ...) +{ + va_list va; + + if (!signal_pending(current)) { + cond_resched(); + if (time_before(jiffies, timeout)) + return false; + } + + if (fmt) { + va_start(va, fmt); + vprintk(fmt, va); + va_end(va); + } + + return true; +} + +module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400); +module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); + +module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); +MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); + +module_param_named_unsafe(live_selftests, i915_selftest.live, int, 0400); +MODULE_PARM_DESC(live_selftests, "Run selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)"); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c new file mode 100644 index 000000000000..ad56566e24db --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -0,0 +1,746 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "mock_context.h" + +static bool assert_vma(struct i915_vma *vma, + struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx) +{ + bool ok = true; + + if (vma->vm != &ctx->ppgtt->base) { + pr_err("VMA created with wrong VM\n"); + ok = false; + } + + if (vma->size != obj->base.size) { + pr_err("VMA created with wrong size, found %llu, expected %zu\n", + vma->size, obj->base.size); + ok = false; + } + + if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) { + pr_err("VMA created with wrong type [%d]\n", + vma->ggtt_view.type); + ok = false; + } + + return ok; +} + +static struct i915_vma * +checked_vma_instance(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + struct i915_ggtt_view *view) +{ + struct i915_vma *vma; + bool ok = true; + + vma = i915_vma_instance(obj, vm, view); + if (IS_ERR(vma)) + return vma; + + /* Manual checks, will be reinforced by i915_vma_compare! */ + if (vma->vm != vm) { + pr_err("VMA's vm [%p] does not match request [%p]\n", + vma->vm, vm); + ok = false; + } + + if (i915_is_ggtt(vm) != i915_vma_is_ggtt(vma)) { + pr_err("VMA ggtt status [%d] does not match parent [%d]\n", + i915_vma_is_ggtt(vma), i915_is_ggtt(vm)); + ok = false; + } + + if (i915_vma_compare(vma, vm, view)) { + pr_err("i915_vma_compare failed with create parmaters!\n"); + return ERR_PTR(-EINVAL); + } + + if (i915_vma_compare(vma, vma->vm, + i915_vma_is_ggtt(vma) ? &vma->ggtt_view : NULL)) { + pr_err("i915_vma_compare failed with itself\n"); + return ERR_PTR(-EINVAL); + } + + if (!ok) { + pr_err("i915_vma_compare failed to detect the difference!\n"); + return ERR_PTR(-EINVAL); + } + + return vma; +} + +static int create_vmas(struct drm_i915_private *i915, + struct list_head *objects, + struct list_head *contexts) +{ + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + int pinned; + + list_for_each_entry(obj, objects, st_link) { + for (pinned = 0; pinned <= 1; pinned++) { + list_for_each_entry(ctx, contexts, link) { + struct i915_address_space *vm = + &ctx->ppgtt->base; + struct i915_vma *vma; + int err; + + vma = checked_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + if (!assert_vma(vma, obj, ctx)) { + pr_err("VMA lookup/create failed\n"); + return -EINVAL; + } + + if (!pinned) { + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + pr_err("Failed to pin VMA\n"); + return err; + } + } else { + i915_vma_unpin(vma); + } + } + } + } + + return 0; +} + +static int igt_vma_create(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + struct i915_gem_context *ctx, *cn; + unsigned long num_obj, num_ctx; + unsigned long no, nc; + IGT_TIMEOUT(end_time); + LIST_HEAD(contexts); + LIST_HEAD(objects); + int err; + + /* Exercise creating many vma amonst many objections, checking the + * vma creation and lookup routines. + */ + + no = 0; + for_each_prime_number(num_obj, ULONG_MAX - 1) { + for (; no < num_obj; no++) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + list_add(&obj->st_link, &objects); + } + + nc = 0; + for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) { + for (; nc < num_ctx; nc++) { + ctx = mock_context(i915, "mock"); + if (!ctx) + goto out; + + list_move(&ctx->link, &contexts); + } + + err = create_vmas(i915, &objects, &contexts); + if (err) + goto out; + + if (igt_timeout(end_time, + "%s timed out: after %lu objects in %lu contexts\n", + __func__, no, nc)) + goto end; + } + + list_for_each_entry_safe(ctx, cn, &contexts, link) + mock_context_close(ctx); + } + +end: + /* Final pass to lookup all created contexts */ + err = create_vmas(i915, &objects, &contexts); +out: + list_for_each_entry_safe(ctx, cn, &contexts, link) + mock_context_close(ctx); + + list_for_each_entry_safe(obj, on, &objects, st_link) + i915_gem_object_put(obj); + return err; +} + +struct pin_mode { + u64 size; + u64 flags; + bool (*assert)(const struct i915_vma *, + const struct pin_mode *mode, + int result); + const char *string; +}; + +static bool assert_pin_valid(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + if (result) + return false; + + if (i915_vma_misplaced(vma, mode->size, 0, mode->flags)) + return false; + + return true; +} + +__maybe_unused +static bool assert_pin_e2big(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -E2BIG; +} + +__maybe_unused +static bool assert_pin_enospc(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -ENOSPC; +} + +__maybe_unused +static bool assert_pin_einval(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -EINVAL; +} + +static int igt_vma_pin1(void *arg) +{ + struct drm_i915_private *i915 = arg; + const struct pin_mode modes[] = { +#define VALID(sz, fl) { .size = (sz), .flags = (fl), .assert = assert_pin_valid, .string = #sz ", " #fl ", (valid) " } +#define __INVALID(sz, fl, check, eval) { .size = (sz), .flags = (fl), .assert = (check), .string = #sz ", " #fl ", (invalid " #eval ")" } +#define INVALID(sz, fl) __INVALID(sz, fl, assert_pin_einval, EINVAL) +#define TOOBIG(sz, fl) __INVALID(sz, fl, assert_pin_e2big, E2BIG) +#define NOSPACE(sz, fl) __INVALID(sz, fl, assert_pin_enospc, ENOSPC) + VALID(0, PIN_GLOBAL), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE), + + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 4096), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 8192), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.base.total - 4096)), + + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), + INVALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | i915->ggtt.mappable_end), + VALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | i915->ggtt.base.total), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | round_down(U64_MAX, PAGE_SIZE)), + + VALID(4096, PIN_GLOBAL), + VALID(8192, PIN_GLOBAL), + VALID(i915->ggtt.mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(i915->ggtt.mappable_end, PIN_GLOBAL | PIN_MAPPABLE), + TOOBIG(i915->ggtt.mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(i915->ggtt.base.total - 4096, PIN_GLOBAL), + VALID(i915->ggtt.base.total, PIN_GLOBAL), + TOOBIG(i915->ggtt.base.total + 4096, PIN_GLOBAL), + TOOBIG(round_down(U64_MAX, PAGE_SIZE), PIN_GLOBAL), + INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (round_down(U64_MAX, PAGE_SIZE) - 4096)), + + VALID(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + +#if !IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) + /* Misusing BIAS is a programming error (it is not controllable + * from userspace) so when debugging is enabled, it explodes. + * However, the tests are still quite interesting for checking + * variable start, end and size. + */ + NOSPACE(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | i915->ggtt.mappable_end), + NOSPACE(0, PIN_GLOBAL | PIN_OFFSET_BIAS | i915->ggtt.base.total), + NOSPACE(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + NOSPACE(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.base.total - 4096)), +#endif + { }, +#undef NOSPACE +#undef TOOBIG +#undef INVALID +#undef __INVALID +#undef VALID + }, *m; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err = -EINVAL; + + /* Exercise all the weird and wonderful i915_vma_pin requests, + * focusing on error handling of boundary conditions. + */ + + GEM_BUG_ON(!drm_mm_clean(&i915->ggtt.base.mm)); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = checked_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + goto out; + + for (m = modes; m->assert; m++) { + err = i915_vma_pin(vma, m->size, 0, m->flags); + if (!m->assert(vma, m, err)) { + pr_err("%s to pin single page into GGTT with mode[%d:%s]: size=%llx flags=%llx, err=%d\n", + m->assert == assert_pin_valid ? "Failed" : "Unexpectedly succeeded", + (int)(m - modes), m->string, m->size, m->flags, + err); + if (!err) + i915_vma_unpin(vma); + err = -EINVAL; + goto out; + } + + if (!err) { + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + if (err) { + pr_err("Failed to unbind single page from GGTT, err=%d\n", err); + goto out; + } + } + } + + err = 0; +out: + i915_gem_object_put(obj); + return err; +} + +static unsigned long rotated_index(const struct intel_rotation_info *r, + unsigned int n, + unsigned int x, + unsigned int y) +{ + return (r->plane[n].stride * (r->plane[n].height - y - 1) + + r->plane[n].offset + x); +} + +static struct scatterlist * +assert_rotated(struct drm_i915_gem_object *obj, + const struct intel_rotation_info *r, unsigned int n, + struct scatterlist *sg) +{ + unsigned int x, y; + + for (x = 0; x < r->plane[n].width; x++) { + for (y = 0; y < r->plane[n].height; y++) { + unsigned long src_idx; + dma_addr_t src; + + if (!sg) { + pr_err("Invalid sg table: too short at plane %d, (%d, %d)!\n", + n, x, y); + return ERR_PTR(-EINVAL); + } + + src_idx = rotated_index(r, n, x, y); + src = i915_gem_object_get_dma_address(obj, src_idx); + + if (sg_dma_len(sg) != PAGE_SIZE) { + pr_err("Invalid sg.length, found %d, expected %lu for rotated page (%d, %d) [src index %lu]\n", + sg_dma_len(sg), PAGE_SIZE, + x, y, src_idx); + return ERR_PTR(-EINVAL); + } + + if (sg_dma_address(sg) != src) { + pr_err("Invalid address for rotated page (%d, %d) [src index %lu]\n", + x, y, src_idx); + return ERR_PTR(-EINVAL); + } + + sg = sg_next(sg); + } + } + + return sg; +} + +static unsigned int rotated_size(const struct intel_rotation_plane_info *a, + const struct intel_rotation_plane_info *b) +{ + return a->width * a->height + b->width * b->height; +} + +static int igt_vma_rotate(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm = &i915->ggtt.base; + struct drm_i915_gem_object *obj; + const struct intel_rotation_plane_info planes[] = { + { .width = 1, .height = 1, .stride = 1 }, + { .width = 2, .height = 2, .stride = 2 }, + { .width = 4, .height = 4, .stride = 4 }, + { .width = 8, .height = 8, .stride = 8 }, + + { .width = 3, .height = 5, .stride = 3 }, + { .width = 3, .height = 5, .stride = 4 }, + { .width = 3, .height = 5, .stride = 5 }, + + { .width = 5, .height = 3, .stride = 5 }, + { .width = 5, .height = 3, .stride = 7 }, + { .width = 5, .height = 3, .stride = 9 }, + + { .width = 4, .height = 6, .stride = 6 }, + { .width = 6, .height = 4, .stride = 6 }, + { } + }, *a, *b; + const unsigned int max_pages = 64; + int err = -ENOMEM; + + /* Create VMA for many different combinations of planes and check + * that the page layout within the rotated VMA match our expectations. + */ + + obj = i915_gem_object_create_internal(i915, max_pages * PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + for (a = planes; a->width; a++) { + for (b = planes + ARRAY_SIZE(planes); b-- != planes; ) { + struct i915_ggtt_view view; + unsigned int n, max_offset; + + max_offset = max(a->stride * a->height, + b->stride * b->height); + GEM_BUG_ON(max_offset > max_pages); + max_offset = max_pages - max_offset; + + view.type = I915_GGTT_VIEW_ROTATED; + view.rotated.plane[0] = *a; + view.rotated.plane[1] = *b; + + for_each_prime_number_from(view.rotated.plane[0].offset, 0, max_offset) { + for_each_prime_number_from(view.rotated.plane[1].offset, 0, max_offset) { + struct scatterlist *sg; + struct i915_vma *vma; + + vma = checked_vma_instance(obj, vm, &view); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + pr_err("Failed to pin VMA, err=%d\n", err); + goto out_object; + } + + if (vma->size != rotated_size(a, b) * PAGE_SIZE) { + pr_err("VMA is wrong size, expected %lu, found %llu\n", + PAGE_SIZE * rotated_size(a, b), vma->size); + err = -EINVAL; + goto out_object; + } + + if (vma->pages->nents != rotated_size(a, b)) { + pr_err("sg table is wrong sizeo, expected %u, found %u nents\n", + rotated_size(a, b), vma->pages->nents); + err = -EINVAL; + goto out_object; + } + + if (vma->node.size < vma->size) { + pr_err("VMA binding too small, expected %llu, found %llu\n", + vma->size, vma->node.size); + err = -EINVAL; + goto out_object; + } + + if (vma->pages == obj->mm.pages) { + pr_err("VMA using unrotated object pages!\n"); + err = -EINVAL; + goto out_object; + } + + sg = vma->pages->sgl; + for (n = 0; n < ARRAY_SIZE(view.rotated.plane); n++) { + sg = assert_rotated(obj, &view.rotated, n, sg); + if (IS_ERR(sg)) { + pr_err("Inconsistent VMA pages for plane %d: [(%d, %d, %d, %d), (%d, %d, %d, %d)]\n", n, + view.rotated.plane[0].width, + view.rotated.plane[0].height, + view.rotated.plane[0].stride, + view.rotated.plane[0].offset, + view.rotated.plane[1].width, + view.rotated.plane[1].height, + view.rotated.plane[1].stride, + view.rotated.plane[1].offset); + err = -EINVAL; + goto out_object; + } + } + + i915_vma_unpin(vma); + } + } + } + } + +out_object: + i915_gem_object_put(obj); +out: + return err; +} + +static bool assert_partial(struct drm_i915_gem_object *obj, + struct i915_vma *vma, + unsigned long offset, + unsigned long size) +{ + struct sgt_iter sgt; + dma_addr_t dma; + + for_each_sgt_dma(dma, sgt, vma->pages) { + dma_addr_t src; + + if (!size) { + pr_err("Partial scattergather list too long\n"); + return false; + } + + src = i915_gem_object_get_dma_address(obj, offset); + if (src != dma) { + pr_err("DMA mismatch for partial page offset %lu\n", + offset); + return false; + } + + offset++; + size--; + } + + return true; +} + +static bool assert_pin(struct i915_vma *vma, + struct i915_ggtt_view *view, + u64 size, + const char *name) +{ + bool ok = true; + + if (vma->size != size) { + pr_err("(%s) VMA is wrong size, expected %llu, found %llu\n", + name, size, vma->size); + ok = false; + } + + if (vma->node.size < vma->size) { + pr_err("(%s) VMA binding too small, expected %llu, found %llu\n", + name, vma->size, vma->node.size); + ok = false; + } + + if (view && view->type != I915_GGTT_VIEW_NORMAL) { + if (memcmp(&vma->ggtt_view, view, sizeof(*view))) { + pr_err("(%s) VMA mismatch upon creation!\n", + name); + ok = false; + } + + if (vma->pages == vma->obj->mm.pages) { + pr_err("(%s) VMA using original object pages!\n", + name); + ok = false; + } + } else { + if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) { + pr_err("Not the normal ggtt view! Found %d\n", + vma->ggtt_view.type); + ok = false; + } + + if (vma->pages != vma->obj->mm.pages) { + pr_err("VMA not using object pages!\n"); + ok = false; + } + } + + return ok; +} + +static int igt_vma_partial(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm = &i915->ggtt.base; + const unsigned int npages = 1021; /* prime! */ + struct drm_i915_gem_object *obj; + const struct phase { + const char *name; + } phases[] = { + { "create" }, + { "lookup" }, + { }, + }, *p; + unsigned int sz, offset; + struct i915_vma *vma; + int err = -ENOMEM; + + /* Create lots of different VMA for the object and check that + * we are returned the same VMA when we later request the same range. + */ + + obj = i915_gem_object_create_internal(i915, npages*PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + for (p = phases; p->name; p++) { /* exercise both create/lookup */ + unsigned int count, nvma; + + nvma = 0; + for_each_prime_number_from(sz, 1, npages) { + for_each_prime_number_from(offset, 0, npages - sz) { + struct i915_ggtt_view view; + + view.type = I915_GGTT_VIEW_PARTIAL; + view.partial.offset = offset; + view.partial.size = sz; + + if (sz == npages) + view.type = I915_GGTT_VIEW_NORMAL; + + vma = checked_vma_instance(obj, vm, &view); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto out_object; + + if (!assert_pin(vma, &view, sz*PAGE_SIZE, p->name)) { + pr_err("(%s) Inconsistent partial pinning for (offset=%d, size=%d)\n", + p->name, offset, sz); + err = -EINVAL; + goto out_object; + } + + if (!assert_partial(obj, vma, offset, sz)) { + pr_err("(%s) Inconsistent partial pages for (offset=%d, size=%d)\n", + p->name, offset, sz); + err = -EINVAL; + goto out_object; + } + + i915_vma_unpin(vma); + nvma++; + } + } + + count = 0; + list_for_each_entry(vma, &obj->vma_list, obj_link) + count++; + if (count != nvma) { + pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n", + p->name, count, nvma); + err = -EINVAL; + goto out_object; + } + + /* Check that we did create the whole object mapping */ + vma = checked_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto out_object; + + if (!assert_pin(vma, NULL, obj->base.size, p->name)) { + pr_err("(%s) inconsistent full pin\n", p->name); + err = -EINVAL; + goto out_object; + } + + i915_vma_unpin(vma); + + count = 0; + list_for_each_entry(vma, &obj->vma_list, obj_link) + count++; + if (count != nvma) { + pr_err("(%s) allocated an extra full vma!\n", p->name); + err = -EINVAL; + goto out_object; + } + } + +out_object: + i915_gem_object_put(obj); +out: + return err; +} + +int i915_vma_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_vma_create), + SUBTEST(igt_vma_pin1), + SUBTEST(igt_vma_rotate), + SUBTEST(igt_vma_partial), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} + diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c new file mode 100644 index 000000000000..19860a372d90 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -0,0 +1,481 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" +#include "i915_random.h" + +#include "mock_gem_device.h" +#include "mock_engine.h" + +static int check_rbtree(struct intel_engine_cs *engine, + const unsigned long *bitmap, + const struct intel_wait *waiters, + const int count) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct rb_node *rb; + int n; + + if (&b->irq_wait->node != rb_first(&b->waiters)) { + pr_err("First waiter does not match first element of wait-tree\n"); + return -EINVAL; + } + + n = find_first_bit(bitmap, count); + for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { + struct intel_wait *w = container_of(rb, typeof(*w), node); + int idx = w - waiters; + + if (!test_bit(idx, bitmap)) { + pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n", + idx, w->seqno); + return -EINVAL; + } + + if (n != idx) { + pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n", + idx, w->seqno, n); + return -EINVAL; + } + + n = find_next_bit(bitmap, count, n + 1); + } + + return 0; +} + +static int check_completion(struct intel_engine_cs *engine, + const unsigned long *bitmap, + const struct intel_wait *waiters, + const int count) +{ + int n; + + for (n = 0; n < count; n++) { + if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap)) + continue; + + pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n", + n, waiters[n].seqno, + intel_wait_complete(&waiters[n]) ? "complete" : "active", + test_bit(n, bitmap) ? "active" : "complete"); + return -EINVAL; + } + + return 0; +} + +static int check_rbtree_empty(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + if (b->irq_wait) { + pr_err("Empty breadcrumbs still has a waiter\n"); + return -EINVAL; + } + + if (!RB_EMPTY_ROOT(&b->waiters)) { + pr_err("Empty breadcrumbs, but wait-tree not empty\n"); + return -EINVAL; + } + + return 0; +} + +static int igt_random_insert_remove(void *arg) +{ + const u32 seqno_bias = 0x1000; + I915_RND_STATE(prng); + struct intel_engine_cs *engine = arg; + struct intel_wait *waiters; + const int count = 4096; + unsigned int *order; + unsigned long *bitmap; + int err = -ENOMEM; + int n; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), + GFP_TEMPORARY); + if (!bitmap) + goto out_waiters; + + order = i915_random_order(count, &prng); + if (!order) + goto out_bitmap; + + for (n = 0; n < count; n++) + intel_wait_init_for_seqno(&waiters[n], seqno_bias + n); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + + /* Add and remove waiters into the rbtree in random order. At each + * step, we verify that the rbtree is correctly ordered. + */ + for (n = 0; n < count; n++) { + int i = order[n]; + + intel_engine_add_wait(engine, &waiters[i]); + __set_bit(i, bitmap); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + } + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + int i = order[n]; + + intel_engine_remove_wait(engine, &waiters[i]); + __clear_bit(i, bitmap); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + } + + err = check_rbtree_empty(engine); +out_order: + kfree(order); +out_bitmap: + kfree(bitmap); +out_waiters: + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + +static int igt_insert_complete(void *arg) +{ + const u32 seqno_bias = 0x1000; + struct intel_engine_cs *engine = arg; + struct intel_wait *waiters; + const int count = 4096; + unsigned long *bitmap; + int err = -ENOMEM; + int n, m; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), + GFP_TEMPORARY); + if (!bitmap) + goto out_waiters; + + for (n = 0; n < count; n++) { + intel_wait_init_for_seqno(&waiters[n], n + seqno_bias); + intel_engine_add_wait(engine, &waiters[n]); + __set_bit(n, bitmap); + } + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_bitmap; + + /* On each step, we advance the seqno so that several waiters are then + * complete (we increase the seqno by increasingly larger values to + * retire more and more waiters at once). All retired waiters should + * be woken and removed from the rbtree, and so that we check. + */ + for (n = 0; n < count; n = m) { + int seqno = 2 * n; + + GEM_BUG_ON(find_first_bit(bitmap, count) != n); + + if (intel_wait_complete(&waiters[n])) { + pr_err("waiter[%d, seqno=%d] completed too early\n", + n, waiters[n].seqno); + err = -EINVAL; + goto out_bitmap; + } + + /* complete the following waiters */ + mock_seqno_advance(engine, seqno + seqno_bias); + for (m = n; m <= seqno; m++) { + if (m == count) + break; + + GEM_BUG_ON(!test_bit(m, bitmap)); + __clear_bit(m, bitmap); + } + + intel_engine_remove_wait(engine, &waiters[n]); + RB_CLEAR_NODE(&waiters[n].node); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) { + pr_err("rbtree corrupt after seqno advance to %d\n", + seqno + seqno_bias); + goto out_bitmap; + } + + err = check_completion(engine, bitmap, waiters, count); + if (err) { + pr_err("completions after seqno advance to %d failed\n", + seqno + seqno_bias); + goto out_bitmap; + } + } + + err = check_rbtree_empty(engine); +out_bitmap: + kfree(bitmap); +out_waiters: + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + +struct igt_wakeup { + struct task_struct *tsk; + atomic_t *ready, *set, *done; + struct intel_engine_cs *engine; + unsigned long flags; +#define STOP 0 +#define IDLE 1 + wait_queue_head_t *wq; + u32 seqno; +}; + +static int wait_atomic(atomic_t *p) +{ + schedule(); + return 0; +} + +static int wait_atomic_timeout(atomic_t *p) +{ + return schedule_timeout(10 * HZ) ? 0 : -ETIMEDOUT; +} + +static bool wait_for_ready(struct igt_wakeup *w) +{ + DEFINE_WAIT(ready); + + set_bit(IDLE, &w->flags); + if (atomic_dec_and_test(w->done)) + wake_up_atomic_t(w->done); + + if (test_bit(STOP, &w->flags)) + goto out; + + for (;;) { + prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE); + if (atomic_read(w->ready) == 0) + break; + + schedule(); + } + finish_wait(w->wq, &ready); + +out: + clear_bit(IDLE, &w->flags); + if (atomic_dec_and_test(w->set)) + wake_up_atomic_t(w->set); + + return !test_bit(STOP, &w->flags); +} + +static int igt_wakeup_thread(void *arg) +{ + struct igt_wakeup *w = arg; + struct intel_wait wait; + + while (wait_for_ready(w)) { + GEM_BUG_ON(kthread_should_stop()); + + intel_wait_init_for_seqno(&wait, w->seqno); + intel_engine_add_wait(w->engine, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (i915_seqno_passed(intel_engine_get_seqno(w->engine), + w->seqno)) + break; + + if (test_bit(STOP, &w->flags)) /* emergency escape */ + break; + + schedule(); + } + intel_engine_remove_wait(w->engine, &wait); + __set_current_state(TASK_RUNNING); + } + + return 0; +} + +static void igt_wake_all_sync(atomic_t *ready, + atomic_t *set, + atomic_t *done, + wait_queue_head_t *wq, + int count) +{ + atomic_set(set, count); + atomic_set(ready, 0); + wake_up_all(wq); + + wait_on_atomic_t(set, wait_atomic, TASK_UNINTERRUPTIBLE); + atomic_set(ready, count); + atomic_set(done, count); +} + +static int igt_wakeup(void *arg) +{ + I915_RND_STATE(prng); + const int state = TASK_UNINTERRUPTIBLE; + struct intel_engine_cs *engine = arg; + struct igt_wakeup *waiters; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + const int count = 4096; + const u32 max_seqno = count / 4; + atomic_t ready, set, done; + int err = -ENOMEM; + int n, step; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + /* Create a large number of threads, each waiting on a random seqno. + * Multiple waiters will be waiting for the same seqno. + */ + atomic_set(&ready, count); + for (n = 0; n < count; n++) { + waiters[n].wq = &wq; + waiters[n].ready = &ready; + waiters[n].set = &set; + waiters[n].done = &done; + waiters[n].engine = engine; + waiters[n].flags = BIT(IDLE); + + waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n], + "i915/igt:%d", n); + if (IS_ERR(waiters[n].tsk)) + goto out_waiters; + + get_task_struct(waiters[n].tsk); + } + + for (step = 1; step <= max_seqno; step <<= 1) { + u32 seqno; + + /* The waiter threads start paused as we assign them a random + * seqno and reset the engine. Once the engine is reset, + * we signal that the threads may begin their wait upon their + * seqno. + */ + for (n = 0; n < count; n++) { + GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags)); + waiters[n].seqno = + 1 + prandom_u32_state(&prng) % max_seqno; + } + mock_seqno_advance(engine, 0); + igt_wake_all_sync(&ready, &set, &done, &wq, count); + + /* Simulate the GPU doing chunks of work, with one or more + * seqno appearing to finish at the same time. A random number + * of threads will be waiting upon the update and hopefully be + * woken. + */ + for (seqno = 1; seqno <= max_seqno + step; seqno += step) { + usleep_range(50, 500); + mock_seqno_advance(engine, seqno); + } + GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno); + + /* With the seqno now beyond any of the waiting threads, they + * should all be woken, see that they are complete and signal + * that they are ready for the next test. We wait until all + * threads are complete and waiting for us (i.e. not a seqno). + */ + err = wait_on_atomic_t(&done, wait_atomic_timeout, state); + if (err) { + pr_err("Timed out waiting for %d remaining waiters\n", + atomic_read(&done)); + break; + } + + err = check_rbtree_empty(engine); + if (err) + break; + } + +out_waiters: + for (n = 0; n < count; n++) { + if (IS_ERR(waiters[n].tsk)) + break; + + set_bit(STOP, &waiters[n].flags); + } + mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */ + igt_wake_all_sync(&ready, &set, &done, &wq, n); + + for (n = 0; n < count; n++) { + if (IS_ERR(waiters[n].tsk)) + break; + + kthread_stop(waiters[n].tsk); + put_task_struct(waiters[n].tsk); + } + + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + +int intel_breadcrumbs_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_random_insert_remove), + SUBTEST(igt_insert_complete), + SUBTEST(igt_wakeup), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915->engine[RCS]); + drm_dev_unref(&i915->drm); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c new file mode 100644 index 000000000000..d4acee6730e9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -0,0 +1,537 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +struct hang { + struct drm_i915_private *i915; + struct drm_i915_gem_object *hws; + struct drm_i915_gem_object *obj; + u32 *seqno; + u32 *batch; +}; + +static int hang_init(struct hang *h, struct drm_i915_private *i915) +{ + void *vaddr; + int err; + + memset(h, 0, sizeof(*h)); + h->i915 = i915; + + h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(h->hws)) + return PTR_ERR(h->hws); + + h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(h->obj)) { + err = PTR_ERR(h->obj); + goto err_hws; + } + + i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC); + vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + h->seqno = memset(vaddr, 0xff, PAGE_SIZE); + + vaddr = i915_gem_object_pin_map(h->obj, + HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_unpin_hws; + } + h->batch = vaddr; + + return 0; + +err_unpin_hws: + i915_gem_object_unpin_map(h->hws); +err_obj: + i915_gem_object_put(h->obj); +err_hws: + i915_gem_object_put(h->hws); + return err; +} + +static u64 hws_address(const struct i915_vma *hws, + const struct drm_i915_gem_request *rq) +{ + return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); +} + +static int emit_recurse_batch(struct hang *h, + struct drm_i915_gem_request *rq) +{ + struct drm_i915_private *i915 = h->i915; + struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base; + struct i915_vma *hws, *vma; + unsigned int flags; + u32 *batch; + int err; + + vma = i915_vma_instance(h->obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + hws = i915_vma_instance(h->hws, vm, NULL); + if (IS_ERR(hws)) + return PTR_ERR(hws); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + err = i915_vma_pin(hws, 0, 0, PIN_USER); + if (err) + goto unpin_vma; + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto unpin_hws; + + err = i915_switch_context(rq); + if (err) + goto unpin_hws; + + i915_vma_move_to_active(vma, rq, 0); + if (!i915_gem_object_has_active_reference(vma->obj)) { + i915_gem_object_get(vma->obj); + i915_gem_object_set_active_reference(vma->obj); + } + + i915_vma_move_to_active(hws, rq, 0); + if (!i915_gem_object_has_active_reference(hws->obj)) { + i915_gem_object_get(hws->obj); + i915_gem_object_set_active_reference(hws->obj); + } + + batch = h->batch; + if (INTEL_GEN(i915) >= 8) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = upper_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *batch++ = lower_32_bits(vma->node.start); + *batch++ = upper_32_bits(vma->node.start); + } else if (INTEL_GEN(i915) >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 1 << 8; + *batch++ = lower_32_bits(vma->node.start); + } else if (INTEL_GEN(i915) >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *batch++ = 0; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6; + *batch++ = lower_32_bits(vma->node.start); + } else { + *batch++ = MI_STORE_DWORD_IMM; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1; + *batch++ = lower_32_bits(vma->node.start); + } + *batch++ = MI_BATCH_BUFFER_END; /* not reached */ + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); + +unpin_hws: + i915_vma_unpin(hws); +unpin_vma: + i915_vma_unpin(vma); + return err; +} + +static struct drm_i915_gem_request * +hang_create_request(struct hang *h, + struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + struct drm_i915_gem_request *rq; + int err; + + if (i915_gem_object_is_active(h->obj)) { + struct drm_i915_gem_object *obj; + void *vaddr; + + obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vaddr = i915_gem_object_pin_map(obj, + HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC); + if (IS_ERR(vaddr)) { + i915_gem_object_put(obj); + return ERR_CAST(vaddr); + } + + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); + + h->obj = obj; + h->batch = vaddr; + } + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return rq; + + err = emit_recurse_batch(h, rq); + if (err) { + __i915_add_request(rq, false); + return ERR_PTR(err); + } + + return rq; +} + +static u32 hws_seqno(const struct hang *h, + const struct drm_i915_gem_request *rq) +{ + return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); +} + +static void hang_fini(struct hang *h) +{ + *h->batch = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); + + i915_gem_object_unpin_map(h->hws); + i915_gem_object_put(h->hws); + + i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); + i915_gem_retire_requests(h->i915); +} + +static int igt_hang_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *rq; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err; + + /* Basic check that we can execute our hanging batch */ + + if (!igt_can_mi_store_dword_imm(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + for_each_engine(engine, i915, id) { + long timeout; + + rq = hang_create_request(&h, engine, i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + pr_err("Failed to create request for %s, err=%d\n", + engine->name, err); + goto fini; + } + + i915_gem_request_get(rq); + + *h.batch = MI_BATCH_BUFFER_END; + __i915_add_request(rq, true); + + timeout = i915_wait_request(rq, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + i915_gem_request_put(rq); + + if (timeout < 0) { + err = timeout; + pr_err("Wait for request failed on %s, err=%d\n", + engine->name, err); + goto fini; + } + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_global_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + unsigned int reset_count; + int err = 0; + + /* Check that we can issue a global GPU reset */ + + set_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags); + + mutex_lock(&i915->drm.struct_mutex); + reset_count = i915_reset_count(&i915->gpu_error); + + i915_reset(i915); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + } + mutex_unlock(&i915->drm.struct_mutex); + + GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags)); + if (i915_terminally_wedged(&i915->gpu_error)) + err = -EIO; + + return err; +} + +static u32 fake_hangcheck(struct drm_i915_gem_request *rq) +{ + u32 reset_count; + + rq->engine->hangcheck.stalled = true; + rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine); + + reset_count = i915_reset_count(&rq->i915->gpu_error); + + set_bit(I915_RESET_IN_PROGRESS, &rq->i915->gpu_error.flags); + wake_up_all(&rq->i915->gpu_error.wait_queue); + + return reset_count; +} + +static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) +{ + return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 10) && + wait_for(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 1000)); +} + +static int igt_wait_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *rq; + unsigned int reset_count; + struct hang h; + long timeout; + int err; + + /* Check that we detect a stuck waiter and issue a reset */ + + set_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags); + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + + if (!wait_for_hang(&h, rq)) { + pr_err("Failed to start request %x\n", rq->fence.seqno); + err = -EIO; + goto out_rq; + } + + reset_count = fake_hangcheck(rq); + + timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10); + if (timeout < 0) { + pr_err("i915_wait_request failed on a stuck request: err=%ld\n", + timeout); + err = timeout; + goto out_rq; + } + GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags)); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + goto out_rq; + } + +out_rq: + i915_gem_request_put(rq); +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + +static int igt_reset_queue(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err; + + /* Check that we replay pending requests following a hang */ + + if (!igt_can_mi_store_dword_imm(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + for_each_engine(engine, i915, id) { + struct drm_i915_gem_request *prev; + IGT_TIMEOUT(end_time); + unsigned int count; + + prev = hang_create_request(&h, engine, i915->kernel_context); + if (IS_ERR(prev)) { + err = PTR_ERR(prev); + goto fini; + } + + i915_gem_request_get(prev); + __i915_add_request(prev, true); + + count = 0; + do { + struct drm_i915_gem_request *rq; + unsigned int reset_count; + + rq = hang_create_request(&h, + engine, + i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + + if (!wait_for_hang(&h, prev)) { + pr_err("Failed to start request %x\n", + prev->fence.seqno); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EIO; + goto fini; + } + + reset_count = fake_hangcheck(prev); + + i915_reset(i915); + + GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, + &i915->gpu_error.flags)); + if (prev->fence.error != -EIO) { + pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", + prev->fence.error); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + if (rq->fence.error) { + pr_err("Fence error status not zero [%d] after unrelated reset\n", + rq->fence.error); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + i915_gem_request_put(prev); + prev = rq; + count++; + } while (time_before(jiffies, end_time)); + pr_info("%s: Completed %d resets\n", engine->name, count); + + *h.batch = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_request_put(prev); + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + +int intel_hangcheck_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_hang_sanitycheck), + SUBTEST(igt_global_reset), + SUBTEST(igt_wait_reset), + SUBTEST(igt_reset_queue), + }; + + if (!intel_has_gpu_reset(i915)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c new file mode 100644 index 000000000000..2d0fef2cfca6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -0,0 +1,182 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +static int intel_fw_table_check(const struct intel_forcewake_range *ranges, + unsigned int num_ranges, + bool is_watertight) +{ + unsigned int i; + s32 prev; + + for (i = 0, prev = -1; i < num_ranges; i++, ranges++) { + /* Check that the table is watertight */ + if (is_watertight && (prev + 1) != (s32)ranges->start) { + pr_err("%s: entry[%d]:(%x, %x) is not watertight to previous (%x)\n", + __func__, i, ranges->start, ranges->end, prev); + return -EINVAL; + } + + /* Check that the table never goes backwards */ + if (prev >= (s32)ranges->start) { + pr_err("%s: entry[%d]:(%x, %x) is less than the previous (%x)\n", + __func__, i, ranges->start, ranges->end, prev); + return -EINVAL; + } + + /* Check that the entry is valid */ + if (ranges->start >= ranges->end) { + pr_err("%s: entry[%d]:(%x, %x) has negative length\n", + __func__, i, ranges->start, ranges->end); + return -EINVAL; + } + + prev = ranges->end; + } + + return 0; +} + +static int intel_shadow_table_check(void) +{ + const i915_reg_t *reg = gen8_shadowed_regs; + unsigned int i; + s32 prev; + + for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { + u32 offset = i915_mmio_reg_offset(*reg); + + if (prev >= (s32)offset) { + pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", + __func__, i, offset, prev); + return -EINVAL; + } + + prev = offset; + } + + return 0; +} + +int intel_uncore_mock_selftests(void) +{ + struct { + const struct intel_forcewake_range *ranges; + unsigned int num_ranges; + bool is_watertight; + } fw[] = { + { __vlv_fw_ranges, ARRAY_SIZE(__vlv_fw_ranges), false }, + { __chv_fw_ranges, ARRAY_SIZE(__chv_fw_ranges), false }, + { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, + }; + int err, i; + + for (i = 0; i < ARRAY_SIZE(fw); i++) { + err = intel_fw_table_check(fw[i].ranges, + fw[i].num_ranges, + fw[i].is_watertight); + if (err) + return err; + } + + err = intel_shadow_table_check(); + if (err) + return err; + + return 0; +} + +static int intel_uncore_check_forcewake_domains(struct drm_i915_private *dev_priv) +{ +#define FW_RANGE 0x40000 + unsigned long *valid; + u32 offset; + int err; + + if (!HAS_FPGA_DBG_UNCLAIMED(dev_priv) && + !IS_VALLEYVIEW(dev_priv) && + !IS_CHERRYVIEW(dev_priv)) + return 0; + + if (IS_VALLEYVIEW(dev_priv)) /* XXX system lockup! */ + return 0; + + if (IS_BROADWELL(dev_priv)) /* XXX random GPU hang afterwards! */ + return 0; + + valid = kzalloc(BITS_TO_LONGS(FW_RANGE) * sizeof(*valid), + GFP_TEMPORARY); + if (!valid) + return -ENOMEM; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + check_for_unclaimed_mmio(dev_priv); + for (offset = 0; offset < FW_RANGE; offset += 4) { + i915_reg_t reg = { offset }; + + (void)I915_READ_FW(reg); + if (!check_for_unclaimed_mmio(dev_priv)) + set_bit(offset, valid); + } + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + err = 0; + for_each_set_bit(offset, valid, FW_RANGE) { + i915_reg_t reg = { offset }; + + intel_uncore_forcewake_reset(dev_priv, false); + check_for_unclaimed_mmio(dev_priv); + + (void)I915_READ(reg); + if (check_for_unclaimed_mmio(dev_priv)) { + pr_err("Unclaimed mmio read to register 0x%04x\n", + offset); + err = -EINVAL; + } + } + + kfree(valid); + return err; +} + +int intel_uncore_live_selftests(struct drm_i915_private *i915) +{ + int err; + + /* Confirm the table we load is still valid */ + err = intel_fw_table_check(i915->uncore.fw_domains_table, + i915->uncore.fw_domains_table_entries, + INTEL_GEN(i915) >= 9); + if (err) + return err; + + err = intel_uncore_check_forcewake_domains(i915); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c new file mode 100644 index 000000000000..8d3a90c3f8ac --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -0,0 +1,78 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_context.h" +#include "mock_gtt.h" + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name) +{ + struct i915_gem_context *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + kref_init(&ctx->ref); + INIT_LIST_HEAD(&ctx->link); + ctx->i915 = i915; + + ret = ida_simple_get(&i915->context_hw_ida, + 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + if (ret < 0) + goto err_free; + ctx->hw_id = ret; + + if (name) { + ctx->name = kstrdup(name, GFP_KERNEL); + if (!ctx->name) + goto err_put; + + ctx->ppgtt = mock_ppgtt(i915, name); + if (!ctx->ppgtt) + goto err_put; + } + + return ctx; + +err_free: + kfree(ctx); + return NULL; + +err_put: + i915_gem_context_set_closed(ctx); + i915_gem_context_put(ctx); + return NULL; +} + +void mock_context_close(struct i915_gem_context *ctx) +{ + i915_gem_context_set_closed(ctx); + + i915_ppgtt_close(&ctx->ppgtt->base); + + i915_gem_context_put(ctx); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h new file mode 100644 index 000000000000..2427e5c0916a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_context.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_CONTEXT_H +#define __MOCK_CONTEXT_H + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name); + +void mock_context_close(struct i915_gem_context *ctx); + +#endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c new file mode 100644 index 000000000000..99da8f4ef497 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c @@ -0,0 +1,176 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_dmabuf.h" + +static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment, + enum dma_data_direction dir) +{ + struct mock_dmabuf *mock = to_mock(attachment->dmabuf); + struct sg_table *st; + struct scatterlist *sg; + int i, err; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + err = sg_alloc_table(st, mock->npages, GFP_KERNEL); + if (err) + goto err_free; + + sg = st->sgl; + for (i = 0; i < mock->npages; i++) { + sg_set_page(sg, mock->pages[i], PAGE_SIZE, 0); + sg = sg_next(sg); + } + + if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { + err = -ENOMEM; + goto err_st; + } + + return st; + +err_st: + sg_free_table(st); +err_free: + kfree(st); + return ERR_PTR(err); +} + +static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment, + struct sg_table *st, + enum dma_data_direction dir) +{ + dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir); + sg_free_table(st); + kfree(st); +} + +static void mock_dmabuf_release(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + int i; + + for (i = 0; i < mock->npages; i++) + put_page(mock->pages[i]); + + kfree(mock); +} + +static void *mock_dmabuf_vmap(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL); +} + +static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + vm_unmap_ram(vaddr, mock->npages); +} + +static void *mock_dmabuf_kmap_atomic(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kmap_atomic(mock->pages[page_num]); +} + +static void mock_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + kunmap_atomic(addr); +} + +static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kmap(mock->pages[page_num]); +} + +static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kunmap(mock->pages[page_num]); +} + +static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + return -ENODEV; +} + +static const struct dma_buf_ops mock_dmabuf_ops = { + .map_dma_buf = mock_map_dma_buf, + .unmap_dma_buf = mock_unmap_dma_buf, + .release = mock_dmabuf_release, + .kmap = mock_dmabuf_kmap, + .kmap_atomic = mock_dmabuf_kmap_atomic, + .kunmap = mock_dmabuf_kunmap, + .kunmap_atomic = mock_dmabuf_kunmap_atomic, + .mmap = mock_dmabuf_mmap, + .vmap = mock_dmabuf_vmap, + .vunmap = mock_dmabuf_vunmap, +}; + +static struct dma_buf *mock_dmabuf(int npages) +{ + struct mock_dmabuf *mock; + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct dma_buf *dmabuf; + int i; + + mock = kmalloc(sizeof(*mock) + npages * sizeof(struct page *), + GFP_KERNEL); + if (!mock) + return ERR_PTR(-ENOMEM); + + mock->npages = npages; + for (i = 0; i < npages; i++) { + mock->pages[i] = alloc_page(GFP_KERNEL); + if (!mock->pages[i]) + goto err; + } + + exp_info.ops = &mock_dmabuf_ops; + exp_info.size = npages * PAGE_SIZE; + exp_info.flags = O_CLOEXEC; + exp_info.priv = mock; + + dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(dmabuf)) + goto err; + + return dmabuf; + +err: + while (i--) + put_page(mock->pages[i]); + kfree(mock); + return ERR_PTR(-ENOMEM); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h new file mode 100644 index 000000000000..ec80613159b9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h @@ -0,0 +1,41 @@ + +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_DMABUF_H__ +#define __MOCK_DMABUF_H__ + +#include <linux/dma-buf.h> + +struct mock_dmabuf { + int npages; + struct page *pages[]; +}; + +static struct mock_dmabuf *to_mock(struct dma_buf *buf) +{ + return buf->priv; +} + +#endif /* !__MOCK_DMABUF_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.c b/drivers/gpu/drm/i915/selftests/mock_drm.c new file mode 100644 index 000000000000..113dec05c7dc --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_drm.c @@ -0,0 +1,54 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_drm.h" + +static inline struct inode fake_inode(struct drm_i915_private *i915) +{ + return (struct inode){ .i_rdev = i915->drm.primary->index }; +} + +struct drm_file *mock_file(struct drm_i915_private *i915) +{ + struct inode inode = fake_inode(i915); + struct file filp = {}; + struct drm_file *file; + int err; + + err = drm_open(&inode, &filp); + if (unlikely(err)) + return ERR_PTR(err); + + file = filp.private_data; + file->authenticated = true; + return file; +} + +void mock_file_free(struct drm_i915_private *i915, struct drm_file *file) +{ + struct inode inode = fake_inode(i915); + struct file filp = { .private_data = file }; + + drm_release(&inode, &filp); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.h b/drivers/gpu/drm/i915/selftests/mock_drm.h new file mode 100644 index 000000000000..b39beee9f8f6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_drm.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_DRM_H +#define __MOCK_DRM_H + +struct drm_file *mock_file(struct drm_i915_private *i915); +void mock_file_free(struct drm_i915_private *i915, struct drm_file *file); + +#endif /* !__MOCK_DRM_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c new file mode 100644 index 000000000000..8d5ba037064c --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -0,0 +1,207 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_engine.h" +#include "mock_request.h" + +static struct mock_request *first_request(struct mock_engine *engine) +{ + return list_first_entry_or_null(&engine->hw_queue, + struct mock_request, + link); +} + +static void hw_delay_complete(unsigned long data) +{ + struct mock_engine *engine = (typeof(engine))data; + struct mock_request *request; + + spin_lock(&engine->hw_lock); + + request = first_request(engine); + if (request) { + list_del_init(&request->link); + mock_seqno_advance(&engine->base, request->base.global_seqno); + } + + request = first_request(engine); + if (request) + mod_timer(&engine->hw_delay, jiffies + request->delay); + + spin_unlock(&engine->hw_lock); +} + +static int mock_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + i915_gem_context_get(ctx); + return 0; +} + +static void mock_context_unpin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + i915_gem_context_put(ctx); +} + +static int mock_request_alloc(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + + INIT_LIST_HEAD(&mock->link); + mock->delay = 0; + + request->ring = request->engine->buffer; + return 0; +} + +static int mock_emit_flush(struct drm_i915_gem_request *request, + unsigned int flags) +{ + return 0; +} + +static void mock_emit_breadcrumb(struct drm_i915_gem_request *request, + u32 *flags) +{ +} + +static void mock_submit_request(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + struct mock_engine *engine = + container_of(request->engine, typeof(*engine), base); + + i915_gem_request_submit(request); + GEM_BUG_ON(!request->global_seqno); + + spin_lock_irq(&engine->hw_lock); + list_add_tail(&mock->link, &engine->hw_queue); + if (mock->link.prev == &engine->hw_queue) + mod_timer(&engine->hw_delay, jiffies + mock->delay); + spin_unlock_irq(&engine->hw_lock); +} + +static struct intel_ring *mock_ring(struct intel_engine_cs *engine) +{ + const unsigned long sz = roundup_pow_of_two(sizeof(struct intel_ring)); + struct intel_ring *ring; + + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); + if (!ring) + return NULL; + + ring->engine = engine; + ring->size = sz; + ring->effective_size = sz; + ring->vaddr = (void *)(ring + 1); + + INIT_LIST_HEAD(&ring->request_list); + ring->last_retired_head = -1; + intel_ring_update_space(ring); + + return ring; +} + +struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, + const char *name) +{ + struct mock_engine *engine; + static int id; + + engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); + if (!engine) + return NULL; + + engine->base.buffer = mock_ring(&engine->base); + if (!engine->base.buffer) { + kfree(engine); + return NULL; + } + + /* minimal engine setup for requests */ + engine->base.i915 = i915; + engine->base.name = name; + engine->base.id = id++; + engine->base.status_page.page_addr = (void *)(engine + 1); + + engine->base.context_pin = mock_context_pin; + engine->base.context_unpin = mock_context_unpin; + engine->base.request_alloc = mock_request_alloc; + engine->base.emit_flush = mock_emit_flush; + engine->base.emit_breadcrumb = mock_emit_breadcrumb; + engine->base.submit_request = mock_submit_request; + + engine->base.timeline = + &i915->gt.global_timeline.engine[engine->base.id]; + + intel_engine_init_breadcrumbs(&engine->base); + engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ + + /* fake hw queue */ + spin_lock_init(&engine->hw_lock); + setup_timer(&engine->hw_delay, + hw_delay_complete, + (unsigned long)engine); + INIT_LIST_HEAD(&engine->hw_queue); + + return &engine->base; +} + +void mock_engine_flush(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + struct mock_request *request, *rn; + + del_timer_sync(&mock->hw_delay); + + spin_lock_irq(&mock->hw_lock); + list_for_each_entry_safe(request, rn, &mock->hw_queue, link) { + list_del_init(&request->link); + mock_seqno_advance(&mock->base, request->base.global_seqno); + } + spin_unlock_irq(&mock->hw_lock); +} + +void mock_engine_reset(struct intel_engine_cs *engine) +{ + intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0); +} + +void mock_engine_free(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + + GEM_BUG_ON(timer_pending(&mock->hw_delay)); + + if (engine->last_retired_context) + engine->context_unpin(engine, engine->last_retired_context); + + intel_engine_fini_breadcrumbs(engine); + + kfree(engine->buffer); + kfree(engine); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h new file mode 100644 index 000000000000..e5e240216ba3 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_ENGINE_H__ +#define __MOCK_ENGINE_H__ + +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/timer.h> + +#include "../intel_ringbuffer.h" + +struct mock_engine { + struct intel_engine_cs base; + + spinlock_t hw_lock; + struct list_head hw_queue; + struct timer_list hw_delay; +}; + +struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, + const char *name); +void mock_engine_flush(struct intel_engine_cs *engine); +void mock_engine_reset(struct intel_engine_cs *engine); +void mock_engine_free(struct intel_engine_cs *engine); + +static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno) +{ + intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); + intel_engine_wakeup(engine); +} + +#endif /* !__MOCK_ENGINE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c new file mode 100644 index 000000000000..6a8258eacdcb --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -0,0 +1,226 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/pm_runtime.h> + +#include "mock_engine.h" +#include "mock_context.h" +#include "mock_request.h" +#include "mock_gem_device.h" +#include "mock_gem_object.h" +#include "mock_gtt.h" + +void mock_device_flush(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) + mock_engine_flush(engine); + + i915_gem_retire_requests(i915); +} + +static void mock_device_release(struct drm_device *dev) +{ + struct drm_i915_private *i915 = to_i915(dev); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + mutex_lock(&i915->drm.struct_mutex); + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + + cancel_delayed_work_sync(&i915->gt.retire_work); + cancel_delayed_work_sync(&i915->gt.idle_work); + + mutex_lock(&i915->drm.struct_mutex); + for_each_engine(engine, i915, id) + mock_engine_free(engine); + i915_gem_context_fini(i915); + mutex_unlock(&i915->drm.struct_mutex); + + drain_workqueue(i915->wq); + i915_gem_drain_freed_objects(i915); + + mutex_lock(&i915->drm.struct_mutex); + mock_fini_ggtt(i915); + i915_gem_timeline_fini(&i915->gt.global_timeline); + mutex_unlock(&i915->drm.struct_mutex); + + destroy_workqueue(i915->wq); + + kmem_cache_destroy(i915->dependencies); + kmem_cache_destroy(i915->requests); + kmem_cache_destroy(i915->vmas); + kmem_cache_destroy(i915->objects); + + drm_dev_fini(&i915->drm); + put_device(&i915->drm.pdev->dev); +} + +static struct drm_driver mock_driver = { + .name = "mock", + .driver_features = DRIVER_GEM, + .release = mock_device_release, + + .gem_close_object = i915_gem_close_object, + .gem_free_object_unlocked = i915_gem_free_object, +}; + +static void release_dev(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + kfree(pdev); +} + +static void mock_retire_work_handler(struct work_struct *work) +{ +} + +static void mock_idle_work_handler(struct work_struct *work) +{ +} + +struct drm_i915_private *mock_gem_device(void) +{ + struct drm_i915_private *i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct pci_dev *pdev; + int err; + + pdev = kzalloc(sizeof(*pdev) + sizeof(*i915), GFP_KERNEL); + if (!pdev) + goto err; + + device_initialize(&pdev->dev); + pdev->dev.release = release_dev; + dev_set_name(&pdev->dev, "mock"); + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + i915 = (struct drm_i915_private *)(pdev + 1); + pci_set_drvdata(pdev, i915); + + err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev); + if (err) { + pr_err("Failed to initialise mock GEM device: err=%d\n", err); + goto put_device; + } + i915->drm.pdev = pdev; + i915->drm.dev_private = i915; + + /* Using the global GTT may ask questions about KMS users, so prepare */ + drm_mode_config_init(&i915->drm); + + mkwrite_device_info(i915)->gen = -1; + + spin_lock_init(&i915->mm.object_stat_lock); + + init_waitqueue_head(&i915->gpu_error.wait_queue); + init_waitqueue_head(&i915->gpu_error.reset_queue); + + i915->wq = alloc_ordered_workqueue("mock", 0); + if (!i915->wq) + goto put_device; + + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); + init_llist_head(&i915->mm.free_list); + INIT_LIST_HEAD(&i915->mm.unbound_list); + INIT_LIST_HEAD(&i915->mm.bound_list); + + ida_init(&i915->context_hw_ida); + + INIT_DELAYED_WORK(&i915->gt.retire_work, mock_retire_work_handler); + INIT_DELAYED_WORK(&i915->gt.idle_work, mock_idle_work_handler); + + i915->gt.awake = true; + + i915->objects = KMEM_CACHE(mock_object, SLAB_HWCACHE_ALIGN); + if (!i915->objects) + goto err_wq; + + i915->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); + if (!i915->vmas) + goto err_objects; + + i915->requests = KMEM_CACHE(mock_request, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU); + if (!i915->requests) + goto err_vmas; + + i915->dependencies = KMEM_CACHE(i915_dependency, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT); + if (!i915->dependencies) + goto err_requests; + + mutex_lock(&i915->drm.struct_mutex); + INIT_LIST_HEAD(&i915->gt.timelines); + err = i915_gem_timeline_init__global(i915); + if (err) { + mutex_unlock(&i915->drm.struct_mutex); + goto err_dependencies; + } + + mock_init_ggtt(i915); + mutex_unlock(&i915->drm.struct_mutex); + + mkwrite_device_info(i915)->ring_mask = BIT(0); + i915->engine[RCS] = mock_engine(i915, "mock"); + if (!i915->engine[RCS]) + goto err_dependencies; + + i915->kernel_context = mock_context(i915, NULL); + if (!i915->kernel_context) + goto err_engine; + + return i915; + +err_engine: + for_each_engine(engine, i915, id) + mock_engine_free(engine); +err_dependencies: + kmem_cache_destroy(i915->dependencies); +err_requests: + kmem_cache_destroy(i915->requests); +err_vmas: + kmem_cache_destroy(i915->vmas); +err_objects: + kmem_cache_destroy(i915->objects); +err_wq: + destroy_workqueue(i915->wq); +put_device: + put_device(&pdev->dev); +err: + return NULL; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.h b/drivers/gpu/drm/i915/selftests/mock_gem_device.h new file mode 100644 index 000000000000..4cca4d57f52c --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.h @@ -0,0 +1,9 @@ +#ifndef __MOCK_GEM_DEVICE_H__ +#define __MOCK_GEM_DEVICE_H__ + +struct drm_i915_private; + +struct drm_i915_private *mock_gem_device(void); +void mock_device_flush(struct drm_i915_private *i915); + +#endif /* !__MOCK_GEM_DEVICE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/selftests/mock_gem_object.h new file mode 100644 index 000000000000..9fbf67321662 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_object.h @@ -0,0 +1,8 @@ +#ifndef __MOCK_GEM_OBJECT_H__ +#define __MOCK_GEM_OBJECT_H__ + +struct mock_object { + struct drm_i915_gem_object base; +}; + +#endif /* !__MOCK_GEM_OBJECT_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c new file mode 100644 index 000000000000..a61309c7cb3e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -0,0 +1,138 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_gtt.h" + +static void mock_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ +} + +static void mock_insert_entries(struct i915_address_space *vm, + struct sg_table *st, + u64 start, + enum i915_cache_level level, u32 flags) +{ +} + +static int mock_bind_ppgtt(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); + vma->pages = vma->obj->mm.pages; + vma->flags |= I915_VMA_LOCAL_BIND; + return 0; +} + +static void mock_unbind_ppgtt(struct i915_vma *vma) +{ +} + +static void mock_cleanup(struct i915_address_space *vm) +{ +} + +struct i915_hw_ppgtt * +mock_ppgtt(struct drm_i915_private *i915, + const char *name) +{ + struct i915_hw_ppgtt *ppgtt; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return NULL; + + kref_init(&ppgtt->ref); + ppgtt->base.i915 = i915; + ppgtt->base.total = round_down(U64_MAX, PAGE_SIZE); + ppgtt->base.file = ERR_PTR(-ENODEV); + + INIT_LIST_HEAD(&ppgtt->base.active_list); + INIT_LIST_HEAD(&ppgtt->base.inactive_list); + INIT_LIST_HEAD(&ppgtt->base.unbound_list); + + INIT_LIST_HEAD(&ppgtt->base.global_link); + drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total); + i915_gem_timeline_init(i915, &ppgtt->base.timeline, name); + + ppgtt->base.clear_range = nop_clear_range; + ppgtt->base.insert_page = mock_insert_page; + ppgtt->base.insert_entries = mock_insert_entries; + ppgtt->base.bind_vma = mock_bind_ppgtt; + ppgtt->base.unbind_vma = mock_unbind_ppgtt; + ppgtt->base.cleanup = mock_cleanup; + + return ppgtt; +} + +static int mock_bind_ggtt(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + int err; + + err = i915_get_ggtt_vma_pages(vma); + if (err) + return err; + + vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; + return 0; +} + +static void mock_unbind_ggtt(struct i915_vma *vma) +{ +} + +void mock_init_ggtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + INIT_LIST_HEAD(&i915->vm_list); + + ggtt->base.i915 = i915; + + ggtt->mappable_base = 0; + ggtt->mappable_end = 2048 * PAGE_SIZE; + ggtt->base.total = 4096 * PAGE_SIZE; + + ggtt->base.clear_range = nop_clear_range; + ggtt->base.insert_page = mock_insert_page; + ggtt->base.insert_entries = mock_insert_entries; + ggtt->base.bind_vma = mock_bind_ggtt; + ggtt->base.unbind_vma = mock_unbind_ggtt; + ggtt->base.cleanup = mock_cleanup; + + i915_address_space_init(&ggtt->base, i915, "global"); +} + +void mock_fini_ggtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + i915_address_space_fini(&ggtt->base); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.h b/drivers/gpu/drm/i915/selftests/mock_gtt.h new file mode 100644 index 000000000000..9a0a833bb545 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.h @@ -0,0 +1,35 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_GTT_H +#define __MOCK_GTT_H + +void mock_init_ggtt(struct drm_i915_private *i915); +void mock_fini_ggtt(struct drm_i915_private *i915); + +struct i915_hw_ppgtt * +mock_ppgtt(struct drm_i915_private *i915, + const char *name); + +#endif /* !__MOCK_GTT_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c new file mode 100644 index 000000000000..0e8d2e7f8c70 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -0,0 +1,63 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_engine.h" +#include "mock_request.h" + +struct drm_i915_gem_request * +mock_request(struct intel_engine_cs *engine, + struct i915_gem_context *context, + unsigned long delay) +{ + struct drm_i915_gem_request *request; + struct mock_request *mock; + + /* NB the i915->requests slab cache is enlarged to fit mock_request */ + request = i915_gem_request_alloc(engine, context); + if (!request) + return NULL; + + mock = container_of(request, typeof(*mock), base); + mock->delay = delay; + + return &mock->base; +} + +bool mock_cancel_request(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + struct mock_engine *engine = + container_of(request->engine, typeof(*engine), base); + bool was_queued; + + spin_lock_irq(&engine->hw_lock); + was_queued = !list_empty(&mock->link); + list_del_init(&mock->link); + spin_unlock_irq(&engine->hw_lock); + + if (was_queued) + i915_gem_request_unsubmit(request); + + return was_queued; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h new file mode 100644 index 000000000000..4dea74c8e96d --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_request.h @@ -0,0 +1,46 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_REQUEST__ +#define __MOCK_REQUEST__ + +#include <linux/list.h> + +#include "../i915_gem_request.h" + +struct mock_request { + struct drm_i915_gem_request base; + + struct list_head link; + unsigned long delay; +}; + +struct drm_i915_gem_request * +mock_request(struct intel_engine_cs *engine, + struct i915_gem_context *context, + unsigned long delay); + +bool mock_cancel_request(struct drm_i915_gem_request *request); + +#endif /* !__MOCK_REQUEST__ */ diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c new file mode 100644 index 000000000000..eb2cda8e2b9f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/scatterlist.c @@ -0,0 +1,355 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/prime_numbers.h> +#include <linux/random.h> + +#include "../i915_selftest.h" + +#define PFN_BIAS (1 << 10) + +struct pfn_table { + struct sg_table st; + unsigned long start, end; +}; + +typedef unsigned int (*npages_fn_t)(unsigned long n, + unsigned long count, + struct rnd_state *rnd); + +static noinline int expect_pfn_sg(struct pfn_table *pt, + npages_fn_t npages_fn, + struct rnd_state *rnd, + const char *who, + unsigned long timeout) +{ + struct scatterlist *sg; + unsigned long pfn, n; + + pfn = pt->start; + for_each_sg(pt->st.sgl, sg, pt->st.nents, n) { + struct page *page = sg_page(sg); + unsigned int npages = npages_fn(n, pt->st.nents, rnd); + + if (page_to_pfn(page) != pfn) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sg)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (sg->length != npages * PAGE_SIZE) { + pr_err("%s: %s copied wrong sg length, expected size %lu, found %u (using for_each_sg)\n", + __func__, who, npages * PAGE_SIZE, sg->length); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn += npages; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static noinline int expect_pfn_sg_page_iter(struct pfn_table *pt, + const char *who, + unsigned long timeout) +{ + struct sg_page_iter sgiter; + unsigned long pfn; + + pfn = pt->start; + for_each_sg_page(pt->st.sgl, &sgiter, pt->st.nents, 0) { + struct page *page = sg_page_iter_page(&sgiter); + + if (page != pfn_to_page(pfn)) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sg_page)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn++; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static noinline int expect_pfn_sgtiter(struct pfn_table *pt, + const char *who, + unsigned long timeout) +{ + struct sgt_iter sgt; + struct page *page; + unsigned long pfn; + + pfn = pt->start; + for_each_sgt_page(page, sgt, &pt->st) { + if (page != pfn_to_page(pfn)) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sgt_page)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn++; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static int expect_pfn_sgtable(struct pfn_table *pt, + npages_fn_t npages_fn, + struct rnd_state *rnd, + const char *who, + unsigned long timeout) +{ + int err; + + err = expect_pfn_sg(pt, npages_fn, rnd, who, timeout); + if (err) + return err; + + err = expect_pfn_sg_page_iter(pt, who, timeout); + if (err) + return err; + + err = expect_pfn_sgtiter(pt, who, timeout); + if (err) + return err; + + return 0; +} + +static unsigned int one(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return 1; +} + +static unsigned int grow(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return n + 1; +} + +static unsigned int shrink(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return count - n; +} + +static unsigned int random(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return 1 + (prandom_u32_state(rnd) % 1024); +} + +static int alloc_table(struct pfn_table *pt, + unsigned long count, unsigned long max, + npages_fn_t npages_fn, + struct rnd_state *rnd, + int alloc_error) +{ + struct scatterlist *sg; + unsigned long n, pfn; + + if (sg_alloc_table(&pt->st, max, + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN)) + return alloc_error; + + /* count should be less than 20 to prevent overflowing sg->length */ + GEM_BUG_ON(overflows_type(count * PAGE_SIZE, sg->length)); + + /* Construct a table where each scatterlist contains different number + * of entries. The idea is to check that we can iterate the individual + * pages from inside the coalesced lists. + */ + pt->start = PFN_BIAS; + pfn = pt->start; + sg = pt->st.sgl; + for (n = 0; n < count; n++) { + unsigned long npages = npages_fn(n, count, rnd); + + /* Nobody expects the Sparse Memmap! */ + if (pfn_to_page(pfn + npages) != pfn_to_page(pfn) + npages) { + sg_free_table(&pt->st); + return -ENOSPC; + } + + if (n) + sg = sg_next(sg); + sg_set_page(sg, pfn_to_page(pfn), npages * PAGE_SIZE, 0); + + GEM_BUG_ON(page_to_pfn(sg_page(sg)) != pfn); + GEM_BUG_ON(sg->length != npages * PAGE_SIZE); + GEM_BUG_ON(sg->offset != 0); + + pfn += npages; + } + sg_mark_end(sg); + pt->st.nents = n; + pt->end = pfn; + + return 0; +} + +static const npages_fn_t npages_funcs[] = { + one, + grow, + shrink, + random, + NULL, +}; + +static int igt_sg_alloc(void *ignored) +{ + IGT_TIMEOUT(end_time); + const unsigned long max_order = 20; /* approximating a 4GiB object */ + struct rnd_state prng; + unsigned long prime; + int alloc_error = -ENOMEM; + + for_each_prime_number(prime, max_order) { + unsigned long size = BIT(prime); + int offset; + + for (offset = -1; offset <= 1; offset++) { + unsigned long sz = size + offset; + const npages_fn_t *npages; + struct pfn_table pt; + int err; + + for (npages = npages_funcs; *npages; npages++) { + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = alloc_table(&pt, sz, sz, *npages, &prng, + alloc_error); + if (err == -ENOSPC) + break; + if (err) + return err; + + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = expect_pfn_sgtable(&pt, *npages, &prng, + "sg_alloc_table", + end_time); + sg_free_table(&pt.st); + if (err) + return err; + } + } + + /* Test at least one continuation before accepting oom */ + if (size > SG_MAX_SINGLE_ALLOC) + alloc_error = -ENOSPC; + } + + return 0; +} + +static int igt_sg_trim(void *ignored) +{ + IGT_TIMEOUT(end_time); + const unsigned long max = PAGE_SIZE; /* not prime! */ + struct pfn_table pt; + unsigned long prime; + int alloc_error = -ENOMEM; + + for_each_prime_number(prime, max) { + const npages_fn_t *npages; + int err; + + for (npages = npages_funcs; *npages; npages++) { + struct rnd_state prng; + + prandom_seed_state(&prng, i915_selftest.random_seed); + err = alloc_table(&pt, prime, max, *npages, &prng, + alloc_error); + if (err == -ENOSPC) + break; + if (err) + return err; + + if (i915_sg_trim(&pt.st)) { + if (pt.st.orig_nents != prime || + pt.st.nents != prime) { + pr_err("i915_sg_trim failed (nents %u, orig_nents %u), expected %lu\n", + pt.st.nents, pt.st.orig_nents, prime); + err = -EINVAL; + } else { + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = expect_pfn_sgtable(&pt, + *npages, &prng, + "i915_sg_trim", + end_time); + } + } + sg_free_table(&pt.st); + if (err) + return err; + } + + /* Test at least one continuation before accepting oom */ + if (prime > SG_MAX_SINGLE_ALLOC) + alloc_error = -ENOSPC; + } + + return 0; +} + +int scatterlist_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_sg_alloc), + SUBTEST(igt_sg_trim), + }; + + return i915_subtests(tests, NULL); +} |