summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2017-10-20 03:56:10 +0300
committerDave Airlie <airlied@redhat.com>2017-10-20 03:56:10 +0300
commit282dc8322a95b5c6a246fc781d89e5930821d486 (patch)
tree55ce320a8ccee6be53342670a007b3b31d69b122
parent6585d4274b0baf1d09318539c4a726a96b51af34 (diff)
parentfa9caf0b6e69703ff8a4d4da17897008ec2f2dd3 (diff)
downloadlinux-282dc8322a95b5c6a246fc781d89e5930821d486.tar.xz
Merge tag 'drm-intel-next-2017-10-12' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Last batch of drm/i915 features for v4.15: - transparent huge pages support (Matthew) - uapi: I915_PARAM_HAS_SCHEDULER into a capability bitmask (Chris) - execlists: preemption (Chris) - scheduler: user defined priorities (Chris) - execlists optimization (MichaƂ) - plenty of display fixes (Imre) - has_ipc fix (Rodrigo) - platform features definition refactoring (Rodrigo) - legacy cursor update fix (Maarten) - fix vblank waits for cursor updates (Maarten) - reprogram dmc firmware on resume, dmc state fix (Imre) - remove use_mmio_flip module parameter (Maarten) - wa fixes (Oscar) - huc/guc firmware refacoring (Sagar, Michal) - push encoder specific code to encoder hooks (Jani) - DP MST fixes (Dhinakaran) - eDP power sequencing fixes (Manasi) - selftest updates (Chris, Matthew) - mmu notifier cpu hotplug deadlock fix (Daniel) - more VBT parser refactoring (Jani) - max pipe refactoring (Mika Kahola) - rc6/rps refactoring and separation (Sagar) - userptr lockdep fix (Chris) - tracepoint fixes and defunct tracepoint removal (Chris) - use rcu instead of abusing stop_machine (Daniel) - plenty of other fixes all around (Everyone) * tag 'drm-intel-next-2017-10-12' of git://anongit.freedesktop.org/drm/drm-intel: (145 commits) drm/i915: Update DRIVER_DATE to 20171012 drm/i915: Simplify intel_sanitize_enable_ppgtt drm/i915/userptr: Drop struct_mutex before cleanup drm/i915/dp: limit sink rates based on rate drm/i915/dp: centralize max source rate conditions more drm/i915: Allow PCH platforms fall back to BIOS LVDS mode drm/i915: Reuse normal state readout for LVDS/DVO fixed mode drm/i915: Use rcu instead of stop_machine in set_wedged drm/i915: Introduce separate status variable for RC6 and LLC ring frequency setup drm/i915: Create generic functions to control RC6, RPS drm/i915: Create generic function to setup LLC ring frequency table drm/i915: Rename intel_enable_rc6 to intel_rc6_enabled drm/i915: Name structure in dev_priv that contains RPS/RC6 state as "gt_pm" drm/i915: Move rps.hw_lock to dev_priv and s/hw_lock/pcu_lock drm/i915: Name i915_runtime_pm structure in dev_priv as "runtime_pm" drm/i915: Separate RPS and RC6 handling for CHV drm/i915: Separate RPS and RC6 handling for VLV drm/i915: Separate RPS and RC6 handling for BDW drm/i915: Remove superfluous IS_BDW checks and non-BDW changes from gen8_enable_rps drm/i915: Separate RPS and RC6 handling for gen6+ ...
-rw-r--r--drivers/gpu/drm/i915/Makefile3
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c1
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c379
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c48
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h83
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c375
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c99
-rw-r--r--drivers/gpu/drm/i915/i915_gem_dmabuf.c18
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c7
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c14
-rw-r--r--drivers/gpu/drm/i915/i915_gem_fence_reg.c40
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c285
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.h20
-rw-r--r--drivers/gpu/drm/i915/i915_gem_internal.c18
-rw-r--r--drivers/gpu/drm/i915/i915_gem_object.h32
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.c20
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.h13
-rw-r--r--drivers/gpu/drm/i915/i915_gem_stolen.c16
-rw-r--r--drivers/gpu/drm/i915/i915_gem_userptr.c53
-rw-r--r--drivers/gpu/drm/i915/i915_gemfs.c74
-rw-r--r--drivers/gpu/drm/i915/i915_gemfs.h34
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c14
-rw-r--r--drivers/gpu/drm/i915/i915_guc_submission.c123
-rw-r--r--drivers/gpu/drm/i915/i915_guc_submission.h80
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c138
-rw-r--r--drivers/gpu/drm/i915/i915_params.c3
-rw-r--r--drivers/gpu/drm/i915/i915_params.h1
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c77
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h25
-rw-r--r--drivers/gpu/drm/i915/i915_suspend.c2
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.c76
-rw-r--r--drivers/gpu/drm/i915/i915_trace.h116
-rw-r--r--drivers/gpu/drm/i915/i915_utils.h18
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c121
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h49
-rw-r--r--drivers/gpu/drm/i915/intel_audio.c2
-rw-r--r--drivers/gpu/drm/i915/intel_bios.c130
-rw-r--r--drivers/gpu/drm/i915/intel_cdclk.c40
-rw-r--r--drivers/gpu/drm/i915/intel_color.c16
-rw-r--r--drivers/gpu/drm/i915/intel_crt.c86
-rw-r--r--drivers/gpu/drm/i915/intel_csr.c2
-rw-r--r--drivers/gpu/drm/i915/intel_ddi.c44
-rw-r--r--drivers/gpu/drm/i915/intel_display.c211
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c49
-rw-r--r--drivers/gpu/drm/i915/intel_dp_mst.c10
-rw-r--r--drivers/gpu/drm/i915/intel_dpio_phy.c20
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h22
-rw-r--r--drivers/gpu/drm/i915/intel_dsi.c7
-rw-r--r--drivers/gpu/drm/i915/intel_dvo.c33
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c240
-rw-r--r--drivers/gpu/drm/i915/intel_guc.c265
-rw-r--r--drivers/gpu/drm/i915/intel_guc.h110
-rw-r--r--drivers/gpu/drm/i915/intel_guc_fwif.h48
-rw-r--r--drivers/gpu/drm/i915/intel_guc_loader.c5
-rw-r--r--drivers/gpu/drm/i915/intel_guc_log.c6
-rw-r--r--drivers/gpu/drm/i915/intel_guc_log.h59
-rw-r--r--drivers/gpu/drm/i915/intel_huc.c11
-rw-r--r--drivers/gpu/drm/i915/intel_huc.h41
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c282
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.h1
-rw-r--r--drivers/gpu/drm/i915/intel_lvds.c23
-rw-r--r--drivers/gpu/drm/i915/intel_pipe_crc.c15
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c695
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c11
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h9
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c31
-rw-r--r--drivers/gpu/drm/i915/intel_sideband.c6
-rw-r--r--drivers/gpu/drm/i915/intel_sprite.c6
-rw-r--r--drivers/gpu/drm/i915/intel_uc.c313
-rw-r--r--drivers/gpu/drm/i915/intel_uc.h228
-rw-r--r--drivers/gpu/drm/i915/intel_uc_fw.c193
-rw-r--r--drivers/gpu/drm/i915/intel_uc_fw.h107
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c18
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.h8
-rw-r--r--drivers/gpu/drm/i915/selftests/huge_gem_object.c14
-rw-r--r--drivers/gpu/drm/i915/selftests/huge_pages.c1734
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_gtt.c15
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_object.c8
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_request.c12
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_live_selftests.h1
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_mock_selftests.h1
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_hangcheck.c26
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gem_device.c17
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gtt.c11
-rw-r--r--drivers/gpu/drm/i915/selftests/scatterlist.c15
-rw-r--r--include/linux/shmem_fs.h2
-rw-r--r--include/uapi/drm/i915_drm.h18
-rw-r--r--mm/shmem.c30
88 files changed, 5560 insertions, 2222 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 5182e3d5557d..66d23b619db1 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -47,6 +47,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_tiling.o \
i915_gem_timeline.o \
i915_gem_userptr.o \
+ i915_gemfs.o \
i915_trace_points.o \
i915_vma.o \
intel_breadcrumbs.o \
@@ -59,6 +60,8 @@ i915-y += i915_cmd_parser.o \
# general-purpose microcontroller (GuC) support
i915-y += intel_uc.o \
+ intel_uc_fw.o \
+ intel_guc.o \
intel_guc_ct.o \
intel_guc_log.o \
intel_guc_loader.o \
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index d5892d24f0b6..f6ded475bb2c 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -174,6 +174,7 @@ static int shadow_context_status_change(struct notifier_block *nb,
atomic_set(&workload->shadow_ctx_active, 1);
break;
case INTEL_CONTEXT_SCHEDULE_OUT:
+ case INTEL_CONTEXT_SCHEDULE_PREEMPTED:
atomic_set(&workload->shadow_ctx_active, 0);
break;
default:
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b4a6ac60e7c6..0bb6e01121fc 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -30,6 +30,7 @@
#include <linux/sort.h>
#include <linux/sched/mm.h>
#include "intel_drv.h"
+#include "i915_guc_submission.h"
static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
{
@@ -97,7 +98,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
static char get_global_flag(struct drm_i915_gem_object *obj)
{
- return !list_empty(&obj->userfault_link) ? 'g' : ' ';
+ return obj->userfault_count ? 'g' : ' ';
}
static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
@@ -118,6 +119,36 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
return size;
}
+static const char *
+stringify_page_sizes(unsigned int page_sizes, char *buf, size_t len)
+{
+ size_t x = 0;
+
+ switch (page_sizes) {
+ case 0:
+ return "";
+ case I915_GTT_PAGE_SIZE_4K:
+ return "4K";
+ case I915_GTT_PAGE_SIZE_64K:
+ return "64K";
+ case I915_GTT_PAGE_SIZE_2M:
+ return "2M";
+ default:
+ if (!buf)
+ return "M";
+
+ if (page_sizes & I915_GTT_PAGE_SIZE_2M)
+ x += snprintf(buf + x, len - x, "2M, ");
+ if (page_sizes & I915_GTT_PAGE_SIZE_64K)
+ x += snprintf(buf + x, len - x, "64K, ");
+ if (page_sizes & I915_GTT_PAGE_SIZE_4K)
+ x += snprintf(buf + x, len - x, "4K, ");
+ buf[x-2] = '\0';
+
+ return buf;
+ }
+}
+
static void
describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
{
@@ -155,9 +186,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
if (!drm_mm_node_allocated(&vma->node))
continue;
- seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
+ seq_printf(m, " (%sgtt offset: %08llx, size: %08llx, pages: %s",
i915_vma_is_ggtt(vma) ? "g" : "pp",
- vma->node.start, vma->node.size);
+ vma->node.start, vma->node.size,
+ stringify_page_sizes(vma->page_sizes.gtt, NULL, 0));
if (i915_vma_is_ggtt(vma)) {
switch (vma->ggtt_view.type) {
case I915_GGTT_VIEW_NORMAL:
@@ -402,10 +434,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct i915_ggtt *ggtt = &dev_priv->ggtt;
- u32 count, mapped_count, purgeable_count, dpy_count;
- u64 size, mapped_size, purgeable_size, dpy_size;
+ u32 count, mapped_count, purgeable_count, dpy_count, huge_count;
+ u64 size, mapped_size, purgeable_size, dpy_size, huge_size;
struct drm_i915_gem_object *obj;
+ unsigned int page_sizes = 0;
struct drm_file *file;
+ char buf[80];
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -419,6 +453,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
size = count = 0;
mapped_size = mapped_count = 0;
purgeable_size = purgeable_count = 0;
+ huge_size = huge_count = 0;
list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) {
size += obj->base.size;
++count;
@@ -432,6 +467,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
mapped_count++;
mapped_size += obj->base.size;
}
+
+ if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) {
+ huge_count++;
+ huge_size += obj->base.size;
+ page_sizes |= obj->mm.page_sizes.sg;
+ }
}
seq_printf(m, "%u unbound objects, %llu bytes\n", count, size);
@@ -454,6 +495,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
mapped_count++;
mapped_size += obj->base.size;
}
+
+ if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) {
+ huge_count++;
+ huge_size += obj->base.size;
+ page_sizes |= obj->mm.page_sizes.sg;
+ }
}
seq_printf(m, "%u bound objects, %llu bytes\n",
count, size);
@@ -461,11 +508,18 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
purgeable_count, purgeable_size);
seq_printf(m, "%u mapped objects, %llu bytes\n",
mapped_count, mapped_size);
+ seq_printf(m, "%u huge-paged objects (%s) %llu bytes\n",
+ huge_count,
+ stringify_page_sizes(page_sizes, buf, sizeof(buf)),
+ huge_size);
seq_printf(m, "%u display objects (pinned), %llu bytes\n",
dpy_count, dpy_size);
seq_printf(m, "%llu [%llu] gtt total\n",
ggtt->base.total, ggtt->mappable_end);
+ seq_printf(m, "Supported page sizes: %s\n",
+ stringify_page_sizes(INTEL_INFO(dev_priv)->page_sizes,
+ buf, sizeof(buf)));
seq_putc(m, '\n');
print_batch_pool_stats(m, dev_priv);
@@ -1026,6 +1080,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
static int i915_frequency_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
int ret = 0;
intel_runtime_pm_get(dev_priv);
@@ -1041,9 +1096,19 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
seq_printf(m, "Current P-state: %d\n",
(rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT);
} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
- u32 freq_sts;
+ u32 rpmodectl, freq_sts;
+
+ mutex_lock(&dev_priv->pcu_lock);
+
+ rpmodectl = I915_READ(GEN6_RP_CONTROL);
+ seq_printf(m, "Video Turbo Mode: %s\n",
+ yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
+ seq_printf(m, "HW control enabled: %s\n",
+ yesno(rpmodectl & GEN6_RP_ENABLE));
+ seq_printf(m, "SW control enabled: %s\n",
+ yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
+ GEN6_RP_MEDIA_SW_MODE));
- mutex_lock(&dev_priv->rps.hw_lock);
freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq);
@@ -1052,21 +1117,21 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff));
seq_printf(m, "current GPU freq: %d MHz\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
+ intel_gpu_freq(dev_priv, rps->cur_freq));
seq_printf(m, "max GPU freq: %d MHz\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
+ intel_gpu_freq(dev_priv, rps->max_freq));
seq_printf(m, "min GPU freq: %d MHz\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
+ intel_gpu_freq(dev_priv, rps->min_freq));
seq_printf(m, "idle GPU freq: %d MHz\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
+ intel_gpu_freq(dev_priv, rps->idle_freq));
seq_printf(m,
"efficient (RPe) frequency: %d MHz\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq));
- mutex_unlock(&dev_priv->rps.hw_lock);
+ intel_gpu_freq(dev_priv, rps->efficient_freq));
+ mutex_unlock(&dev_priv->pcu_lock);
} else if (INTEL_GEN(dev_priv) >= 6) {
u32 rp_state_limits;
u32 gt_perf_status;
@@ -1136,10 +1201,17 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
pm_iir = I915_READ(GEN8_GT_IIR(2));
pm_mask = I915_READ(GEN6_PMINTRMSK);
}
+ seq_printf(m, "Video Turbo Mode: %s\n",
+ yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
+ seq_printf(m, "HW control enabled: %s\n",
+ yesno(rpmodectl & GEN6_RP_ENABLE));
+ seq_printf(m, "SW control enabled: %s\n",
+ yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
+ GEN6_RP_MEDIA_SW_MODE));
seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n",
pm_ier, pm_imr, pm_isr, pm_iir, pm_mask);
seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n",
- dev_priv->rps.pm_intrmsk_mbz);
+ rps->pm_intrmsk_mbz);
seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
seq_printf(m, "Render p-state ratio: %d\n",
(gt_perf_status & (INTEL_GEN(dev_priv) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
@@ -1159,8 +1231,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
rpcurup, GT_PM_INTERVAL_TO_US(dev_priv, rpcurup));
seq_printf(m, "RP PREV UP: %d (%dus)\n",
rpprevup, GT_PM_INTERVAL_TO_US(dev_priv, rpprevup));
- seq_printf(m, "Up threshold: %d%%\n",
- dev_priv->rps.up_threshold);
+ seq_printf(m, "Up threshold: %d%%\n", rps->up_threshold);
seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
rpdownei, GT_PM_INTERVAL_TO_US(dev_priv, rpdownei));
@@ -1168,8 +1239,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
rpcurdown, GT_PM_INTERVAL_TO_US(dev_priv, rpcurdown));
seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
rpprevdown, GT_PM_INTERVAL_TO_US(dev_priv, rpprevdown));
- seq_printf(m, "Down threshold: %d%%\n",
- dev_priv->rps.down_threshold);
+ seq_printf(m, "Down threshold: %d%%\n", rps->down_threshold);
max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 :
rp_state_cap >> 16) & 0xff;
@@ -1191,22 +1261,22 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
intel_gpu_freq(dev_priv, max_freq));
seq_printf(m, "Max overclocked frequency: %dMHz\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
+ intel_gpu_freq(dev_priv, rps->max_freq));
seq_printf(m, "Current freq: %d MHz\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
+ intel_gpu_freq(dev_priv, rps->cur_freq));
seq_printf(m, "Actual freq: %d MHz\n", cagf);
seq_printf(m, "Idle freq: %d MHz\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
+ intel_gpu_freq(dev_priv, rps->idle_freq));
seq_printf(m, "Min freq: %d MHz\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
+ intel_gpu_freq(dev_priv, rps->min_freq));
seq_printf(m, "Boost freq: %d MHz\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq));
+ intel_gpu_freq(dev_priv, rps->boost_freq));
seq_printf(m, "Max freq: %d MHz\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
+ intel_gpu_freq(dev_priv, rps->max_freq));
seq_printf(m,
"efficient (RPe) frequency: %d MHz\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq));
+ intel_gpu_freq(dev_priv, rps->efficient_freq));
} else {
seq_puts(m, "no P-state info available\n");
}
@@ -1447,21 +1517,11 @@ static void print_rc6_res(struct seq_file *m,
static int vlv_drpc_info(struct seq_file *m)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
- u32 rpmodectl1, rcctl1, pw_status;
+ u32 rcctl1, pw_status;
pw_status = I915_READ(VLV_GTLC_PW_STATUS);
- rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
rcctl1 = I915_READ(GEN6_RC_CONTROL);
- seq_printf(m, "Video Turbo Mode: %s\n",
- yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO));
- seq_printf(m, "Turbo enabled: %s\n",
- yesno(rpmodectl1 & GEN6_RP_ENABLE));
- seq_printf(m, "HW control enabled: %s\n",
- yesno(rpmodectl1 & GEN6_RP_ENABLE));
- seq_printf(m, "SW control enabled: %s\n",
- yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) ==
- GEN6_RP_MEDIA_SW_MODE));
seq_printf(m, "RC6 Enabled: %s\n",
yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE |
GEN6_RC_CTL_EI_MODE(1))));
@@ -1479,7 +1539,7 @@ static int vlv_drpc_info(struct seq_file *m)
static int gen6_drpc_info(struct seq_file *m)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
- u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0;
+ u32 gt_core_status, rcctl1, rc6vids = 0;
u32 gen9_powergate_enable = 0, gen9_powergate_status = 0;
unsigned forcewake_count;
int count = 0;
@@ -1498,24 +1558,16 @@ static int gen6_drpc_info(struct seq_file *m)
gt_core_status = I915_READ_FW(GEN6_GT_CORE_STATUS);
trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true);
- rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
rcctl1 = I915_READ(GEN6_RC_CONTROL);
if (INTEL_GEN(dev_priv) >= 9) {
gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE);
gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS);
}
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
- seq_printf(m, "Video Turbo Mode: %s\n",
- yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO));
- seq_printf(m, "HW control enabled: %s\n",
- yesno(rpmodectl1 & GEN6_RP_ENABLE));
- seq_printf(m, "SW control enabled: %s\n",
- yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) ==
- GEN6_RP_MEDIA_SW_MODE));
seq_printf(m, "RC1e Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
seq_printf(m, "RC6 Enabled: %s\n",
@@ -1778,6 +1830,7 @@ static int i915_emon_status(struct seq_file *m, void *unused)
static int i915_ring_freq_table(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
int ret = 0;
int gpu_freq, ia_freq;
unsigned int max_gpu_freq, min_gpu_freq;
@@ -1789,19 +1842,17 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
intel_runtime_pm_get(dev_priv);
- ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
+ ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
if (ret)
goto out;
if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
/* Convert GT frequency to 50 HZ units */
- min_gpu_freq =
- dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER;
- max_gpu_freq =
- dev_priv->rps.max_freq_softlimit / GEN9_FREQ_SCALER;
+ min_gpu_freq = rps->min_freq_softlimit / GEN9_FREQ_SCALER;
+ max_gpu_freq = rps->max_freq_softlimit / GEN9_FREQ_SCALER;
} else {
- min_gpu_freq = dev_priv->rps.min_freq_softlimit;
- max_gpu_freq = dev_priv->rps.max_freq_softlimit;
+ min_gpu_freq = rps->min_freq_softlimit;
+ max_gpu_freq = rps->max_freq_softlimit;
}
seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
@@ -1820,7 +1871,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
((ia_freq >> 8) & 0xff) * 100);
}
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
out:
intel_runtime_pm_put(dev_priv);
@@ -2254,25 +2305,26 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
struct drm_file *file;
- seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled);
+ seq_printf(m, "RPS enabled? %d\n", rps->enabled);
seq_printf(m, "GPU busy? %s [%d requests]\n",
yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
seq_printf(m, "Boosts outstanding? %d\n",
- atomic_read(&dev_priv->rps.num_waiters));
+ atomic_read(&rps->num_waiters));
seq_printf(m, "Frequency requested %d\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
+ intel_gpu_freq(dev_priv, rps->cur_freq));
seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
- intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit),
- intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit),
- intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
+ intel_gpu_freq(dev_priv, rps->min_freq),
+ intel_gpu_freq(dev_priv, rps->min_freq_softlimit),
+ intel_gpu_freq(dev_priv, rps->max_freq_softlimit),
+ intel_gpu_freq(dev_priv, rps->max_freq));
seq_printf(m, " idle:%d, efficient:%d, boost:%d\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq),
- intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
- intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq));
+ intel_gpu_freq(dev_priv, rps->idle_freq),
+ intel_gpu_freq(dev_priv, rps->efficient_freq),
+ intel_gpu_freq(dev_priv, rps->boost_freq));
mutex_lock(&dev->filelist_mutex);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
@@ -2284,15 +2336,15 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
seq_printf(m, "%s [%d]: %d boosts\n",
task ? task->comm : "<unknown>",
task ? task->pid : -1,
- atomic_read(&file_priv->rps.boosts));
+ atomic_read(&file_priv->rps_client.boosts));
rcu_read_unlock();
}
seq_printf(m, "Kernel (anonymous) boosts: %d\n",
- atomic_read(&dev_priv->rps.boosts));
+ atomic_read(&rps->boosts));
mutex_unlock(&dev->filelist_mutex);
if (INTEL_GEN(dev_priv) >= 6 &&
- dev_priv->rps.enabled &&
+ rps->enabled &&
dev_priv->gt.active_requests) {
u32 rpup, rpupei;
u32 rpdown, rpdownei;
@@ -2305,13 +2357,13 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n",
- rps_power_to_str(dev_priv->rps.power));
+ rps_power_to_str(rps->power));
seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n",
rpup && rpupei ? 100 * rpup / rpupei : 0,
- dev_priv->rps.up_threshold);
+ rps->up_threshold);
seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n",
rpdown && rpdownei ? 100 * rpdown / rpdownei : 0,
- dev_priv->rps.down_threshold);
+ rps->down_threshold);
} else {
seq_puts(m, "\nRPS Autotuning inactive\n");
}
@@ -3238,9 +3290,9 @@ static int i915_display_info(struct seq_file *m, void *unused)
static int i915_engine_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
- struct i915_gpu_error *error = &dev_priv->gpu_error;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ struct drm_printer p;
intel_runtime_pm_get(dev_priv);
@@ -3249,149 +3301,9 @@ static int i915_engine_info(struct seq_file *m, void *unused)
seq_printf(m, "Global active requests: %d\n",
dev_priv->gt.active_requests);
- for_each_engine(engine, dev_priv, id) {
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct drm_i915_gem_request *rq;
- struct rb_node *rb;
- u64 addr;
-
- seq_printf(m, "%s\n", engine->name);
- seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n",
- intel_engine_get_seqno(engine),
- intel_engine_last_submit(engine),
- engine->hangcheck.seqno,
- jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp),
- engine->timeline->inflight_seqnos);
- seq_printf(m, "\tReset count: %d\n",
- i915_reset_engine_count(error, engine));
-
- rcu_read_lock();
-
- seq_printf(m, "\tRequests:\n");
-
- rq = list_first_entry(&engine->timeline->requests,
- struct drm_i915_gem_request, link);
- if (&rq->link != &engine->timeline->requests)
- print_request(m, rq, "\t\tfirst ");
-
- rq = list_last_entry(&engine->timeline->requests,
- struct drm_i915_gem_request, link);
- if (&rq->link != &engine->timeline->requests)
- print_request(m, rq, "\t\tlast ");
-
- rq = i915_gem_find_active_request(engine);
- if (rq) {
- print_request(m, rq, "\t\tactive ");
- seq_printf(m,
- "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n",
- rq->head, rq->postfix, rq->tail,
- rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
- rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
- }
-
- seq_printf(m, "\tRING_START: 0x%08x [0x%08x]\n",
- I915_READ(RING_START(engine->mmio_base)),
- rq ? i915_ggtt_offset(rq->ring->vma) : 0);
- seq_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n",
- I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR,
- rq ? rq->ring->head : 0);
- seq_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n",
- I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR,
- rq ? rq->ring->tail : 0);
- seq_printf(m, "\tRING_CTL: 0x%08x [%s]\n",
- I915_READ(RING_CTL(engine->mmio_base)),
- I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : "");
-
- rcu_read_unlock();
-
- addr = intel_engine_get_active_head(engine);
- seq_printf(m, "\tACTHD: 0x%08x_%08x\n",
- upper_32_bits(addr), lower_32_bits(addr));
- addr = intel_engine_get_last_batch_head(engine);
- seq_printf(m, "\tBBADDR: 0x%08x_%08x\n",
- upper_32_bits(addr), lower_32_bits(addr));
-
- if (i915_modparams.enable_execlists) {
- const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
- struct intel_engine_execlists * const execlists = &engine->execlists;
- u32 ptr, read, write;
- unsigned int idx;
-
- seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
- I915_READ(RING_EXECLIST_STATUS_LO(engine)),
- I915_READ(RING_EXECLIST_STATUS_HI(engine)));
-
- ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
- read = GEN8_CSB_READ_PTR(ptr);
- write = GEN8_CSB_WRITE_PTR(ptr);
- seq_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n",
- read, execlists->csb_head,
- write,
- intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
- yesno(test_bit(ENGINE_IRQ_EXECLIST,
- &engine->irq_posted)));
- if (read >= GEN8_CSB_ENTRIES)
- read = 0;
- if (write >= GEN8_CSB_ENTRIES)
- write = 0;
- if (read > write)
- write += GEN8_CSB_ENTRIES;
- while (read < write) {
- idx = ++read % GEN8_CSB_ENTRIES;
- seq_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n",
- idx,
- I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
- hws[idx * 2],
- I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)),
- hws[idx * 2 + 1]);
- }
-
- rcu_read_lock();
- for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
- unsigned int count;
-
- rq = port_unpack(&execlists->port[idx], &count);
- if (rq) {
- seq_printf(m, "\t\tELSP[%d] count=%d, ",
- idx, count);
- print_request(m, rq, "rq: ");
- } else {
- seq_printf(m, "\t\tELSP[%d] idle\n",
- idx);
- }
- }
- rcu_read_unlock();
-
- spin_lock_irq(&engine->timeline->lock);
- for (rb = execlists->first; rb; rb = rb_next(rb)) {
- struct i915_priolist *p =
- rb_entry(rb, typeof(*p), node);
-
- list_for_each_entry(rq, &p->requests,
- priotree.link)
- print_request(m, rq, "\t\tQ ");
- }
- spin_unlock_irq(&engine->timeline->lock);
- } else if (INTEL_GEN(dev_priv) > 6) {
- seq_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
- I915_READ(RING_PP_DIR_BASE(engine)));
- seq_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
- I915_READ(RING_PP_DIR_BASE_READ(engine)));
- seq_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
- I915_READ(RING_PP_DIR_DCLV(engine)));
- }
-
- spin_lock_irq(&b->rb_lock);
- for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
- struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
- seq_printf(m, "\t%s [%d] waiting for %x\n",
- w->tsk->comm, w->tsk->pid, w->seqno);
- }
- spin_unlock_irq(&b->rb_lock);
-
- seq_puts(m, "\n");
- }
+ p = drm_seq_file_printer(m);
+ for_each_engine(engine, dev_priv, id)
+ intel_engine_dump(engine, &p);
intel_runtime_pm_put(dev_priv);
@@ -4258,8 +4170,7 @@ fault_irq_set(struct drm_i915_private *i915,
mutex_unlock(&i915->drm.struct_mutex);
/* Flush idle worker to disarm irq */
- while (flush_delayed_work(&i915->gt.idle_work))
- ;
+ drain_delayed_work(&i915->gt.idle_work);
return 0;
@@ -4392,7 +4303,7 @@ i915_max_freq_get(void *data, u64 *val)
if (INTEL_GEN(dev_priv) < 6)
return -ENODEV;
- *val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit);
+ *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit);
return 0;
}
@@ -4400,6 +4311,7 @@ static int
i915_max_freq_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 hw_max, hw_min;
int ret;
@@ -4408,7 +4320,7 @@ i915_max_freq_set(void *data, u64 val)
DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val);
- ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
+ ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
if (ret)
return ret;
@@ -4417,20 +4329,20 @@ i915_max_freq_set(void *data, u64 val)
*/
val = intel_freq_opcode(dev_priv, val);
- hw_max = dev_priv->rps.max_freq;
- hw_min = dev_priv->rps.min_freq;
+ hw_max = rps->max_freq;
+ hw_min = rps->min_freq;
- if (val < hw_min || val > hw_max || val < dev_priv->rps.min_freq_softlimit) {
- mutex_unlock(&dev_priv->rps.hw_lock);
+ if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) {
+ mutex_unlock(&dev_priv->pcu_lock);
return -EINVAL;
}
- dev_priv->rps.max_freq_softlimit = val;
+ rps->max_freq_softlimit = val;
if (intel_set_rps(dev_priv, val))
DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n");
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
return 0;
}
@@ -4447,7 +4359,7 @@ i915_min_freq_get(void *data, u64 *val)
if (INTEL_GEN(dev_priv) < 6)
return -ENODEV;
- *val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit);
+ *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit);
return 0;
}
@@ -4455,6 +4367,7 @@ static int
i915_min_freq_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 hw_max, hw_min;
int ret;
@@ -4463,7 +4376,7 @@ i915_min_freq_set(void *data, u64 val)
DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val);
- ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
+ ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
if (ret)
return ret;
@@ -4472,21 +4385,21 @@ i915_min_freq_set(void *data, u64 val)
*/
val = intel_freq_opcode(dev_priv, val);
- hw_max = dev_priv->rps.max_freq;
- hw_min = dev_priv->rps.min_freq;
+ hw_max = rps->max_freq;
+ hw_min = rps->min_freq;
if (val < hw_min ||
- val > hw_max || val > dev_priv->rps.max_freq_softlimit) {
- mutex_unlock(&dev_priv->rps.hw_lock);
+ val > hw_max || val > rps->max_freq_softlimit) {
+ mutex_unlock(&dev_priv->pcu_lock);
return -EINVAL;
}
- dev_priv->rps.min_freq_softlimit = val;
+ rps->min_freq_softlimit = val;
if (intel_set_rps(dev_priv, val))
DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n");
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 59ac9199b35d..3db5851756f0 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -367,9 +367,18 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = i915_gem_mmap_gtt_version();
break;
case I915_PARAM_HAS_SCHEDULER:
- value = dev_priv->engine[RCS] &&
- dev_priv->engine[RCS]->schedule;
+ value = 0;
+ if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) {
+ value |= I915_SCHEDULER_CAP_ENABLED;
+ value |= I915_SCHEDULER_CAP_PRIORITY;
+
+ if (INTEL_INFO(dev_priv)->has_logical_ring_preemption &&
+ i915_modparams.enable_execlists &&
+ !i915_modparams.enable_guc_submission)
+ value |= I915_SCHEDULER_CAP_PREEMPTION;
+ }
break;
+
case I915_PARAM_MMAP_VERSION:
/* Remember to bump this if the version changes! */
case I915_PARAM_HAS_GEM:
@@ -606,9 +615,10 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
intel_uc_fini_hw(dev_priv);
i915_gem_cleanup_engines(dev_priv);
i915_gem_contexts_fini(dev_priv);
- i915_gem_cleanup_userptr(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
+ i915_gem_cleanup_userptr(dev_priv);
+
i915_gem_drain_freed_objects(dev_priv);
WARN_ON(!list_empty(&dev_priv->contexts.list));
@@ -1007,6 +1017,8 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
intel_uncore_init(dev_priv);
+ intel_uc_init_mmio(dev_priv);
+
ret = intel_engines_init_mmio(dev_priv);
if (ret)
goto err_uncore;
@@ -1580,7 +1592,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
intel_display_set_init_power(dev_priv, false);
- fw_csr = !IS_GEN9_LP(dev_priv) &&
+ fw_csr = !IS_GEN9_LP(dev_priv) && !hibernation &&
suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload;
/*
* In case of firmware assisted context save/restore don't manually
@@ -2070,11 +2082,14 @@ static int i915_pm_resume(struct device *kdev)
/* freeze: before creating the hibernation_image */
static int i915_pm_freeze(struct device *kdev)
{
+ struct drm_device *dev = &kdev_to_i915(kdev)->drm;
int ret;
- ret = i915_pm_suspend(kdev);
- if (ret)
- return ret;
+ if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) {
+ ret = i915_drm_suspend(dev);
+ if (ret)
+ return ret;
+ }
ret = i915_gem_freeze(kdev_to_i915(kdev));
if (ret)
@@ -2085,11 +2100,14 @@ static int i915_pm_freeze(struct device *kdev)
static int i915_pm_freeze_late(struct device *kdev)
{
+ struct drm_device *dev = &kdev_to_i915(kdev)->drm;
int ret;
- ret = i915_pm_suspend_late(kdev);
- if (ret)
- return ret;
+ if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) {
+ ret = i915_drm_suspend_late(dev, true);
+ if (ret)
+ return ret;
+ }
ret = i915_gem_freeze_late(kdev_to_i915(kdev));
if (ret)
@@ -2485,7 +2503,7 @@ static int intel_runtime_suspend(struct device *kdev)
struct drm_i915_private *dev_priv = to_i915(dev);
int ret;
- if (WARN_ON_ONCE(!(dev_priv->rps.enabled && intel_enable_rc6())))
+ if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && intel_rc6_enabled())))
return -ENODEV;
if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv)))
@@ -2527,12 +2545,12 @@ static int intel_runtime_suspend(struct device *kdev)
intel_uncore_suspend(dev_priv);
enable_rpm_wakeref_asserts(dev_priv);
- WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count));
+ WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count));
if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv))
DRM_ERROR("Unclaimed access detected prior to suspending\n");
- dev_priv->pm.suspended = true;
+ dev_priv->runtime_pm.suspended = true;
/*
* FIXME: We really should find a document that references the arguments
@@ -2578,11 +2596,11 @@ static int intel_runtime_resume(struct device *kdev)
DRM_DEBUG_KMS("Resuming device\n");
- WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count));
+ WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count));
disable_rpm_wakeref_asserts(dev_priv);
intel_opregion_notify_adapter(dev_priv, PCI_D0);
- dev_priv->pm.suspended = false;
+ dev_priv->runtime_pm.suspended = false;
if (intel_uncore_unclaimed_mmio(dev_priv))
DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7ca11318ac69..c7b2ca6aff05 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -80,8 +80,8 @@
#define DRIVER_NAME "i915"
#define DRIVER_DESC "Intel Graphics"
-#define DRIVER_DATE "20170929"
-#define DRIVER_TIMESTAMP 1506682238
+#define DRIVER_DATE "20171012"
+#define DRIVER_TIMESTAMP 1507831511
/* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
* WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -609,7 +609,7 @@ struct drm_i915_file_private {
struct intel_rps_client {
atomic_t boosts;
- } rps;
+ } rps_client;
unsigned int bsd_engine;
@@ -783,6 +783,7 @@ struct intel_csr {
func(has_l3_dpf); \
func(has_llc); \
func(has_logical_ring_contexts); \
+ func(has_logical_ring_preemption); \
func(has_overlay); \
func(has_pipe_cxsr); \
func(has_pooled_eu); \
@@ -868,6 +869,8 @@ struct intel_device_info {
u8 num_sprites[I915_MAX_PIPES];
u8 num_scalers[I915_MAX_PIPES];
+ unsigned int page_sizes; /* page sizes supported by the HW */
+
#define DEFINE_FLAG(name) u8 name:1
DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
#undef DEFINE_FLAG
@@ -981,6 +984,7 @@ struct i915_gpu_state {
pid_t pid;
u32 handle;
u32 hw_id;
+ int priority;
int ban_score;
int active;
int guilty;
@@ -1003,6 +1007,7 @@ struct i915_gpu_state {
long jiffies;
pid_t pid;
u32 context;
+ int priority;
int ban_score;
u32 seqno;
u32 head;
@@ -1312,7 +1317,7 @@ struct intel_rps_ei {
u32 media_c0;
};
-struct intel_gen6_power_mgmt {
+struct intel_rps {
/*
* work, interrupts_enabled and pm_iir are protected by
* dev_priv->irq_lock
@@ -1353,20 +1358,26 @@ struct intel_gen6_power_mgmt {
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
bool enabled;
- struct delayed_work autoenable_work;
atomic_t num_waiters;
atomic_t boosts;
/* manual wa residency calculations */
struct intel_rps_ei ei;
+};
- /*
- * Protects RPS/RC6 register access and PCU communication.
- * Must be taken after struct_mutex if nested. Note that
- * this lock may be held for long periods of time when
- * talking to hw - so only take it when talking to hw!
- */
- struct mutex hw_lock;
+struct intel_rc6 {
+ bool enabled;
+};
+
+struct intel_llc_pstate {
+ bool enabled;
+};
+
+struct intel_gen6_power_mgmt {
+ struct intel_rps rps;
+ struct intel_rc6 rc6;
+ struct intel_llc_pstate llc_pstate;
+ struct delayed_work autoenable_work;
};
/* defined intel_pm.c */
@@ -1508,6 +1519,11 @@ struct i915_gem_mm {
/** Usable portion of the GTT for GEM */
dma_addr_t stolen_base; /* limited to low memory (32-bit) */
+ /**
+ * tmpfs instance used for shmem backed objects
+ */
+ struct vfsmount *gemfs;
+
/** PPGTT used for aliasing the PPGTT with the GTT */
struct i915_hw_ppgtt *aliasing_ppgtt;
@@ -2251,8 +2267,11 @@ struct drm_i915_private {
wait_queue_head_t gmbus_wait_queue;
struct pci_dev *bridge_dev;
- struct i915_gem_context *kernel_context;
struct intel_engine_cs *engine[I915_NUM_ENGINES];
+ /* Context used internally to idle the GPU and setup initial state */
+ struct i915_gem_context *kernel_context;
+ /* Context only to be used for injecting preemption commands */
+ struct i915_gem_context *preempt_context;
struct i915_vma *semaphore;
struct drm_dma_handle *status_page_dmah;
@@ -2408,8 +2427,16 @@ struct drm_i915_private {
/* Cannot be determined by PCIID. You must always read a register. */
u32 edram_cap;
- /* gen6+ rps state */
- struct intel_gen6_power_mgmt rps;
+ /*
+ * Protects RPS/RC6 register access and PCU communication.
+ * Must be taken after struct_mutex if nested. Note that
+ * this lock may be held for long periods of time when
+ * talking to hw - so only take it when talking to hw!
+ */
+ struct mutex pcu_lock;
+
+ /* gen6+ GT PM state */
+ struct intel_gen6_power_mgmt gt_pm;
/* ilk-only ips/rps state. Everything in here is protected by the global
* mchdev_lock in intel_pm.c */
@@ -2520,7 +2547,7 @@ struct drm_i915_private {
bool distrust_bios_wm;
} wm;
- struct i915_runtime_pm pm;
+ struct i915_runtime_pm runtime_pm;
struct {
bool initialized;
@@ -2859,6 +2886,21 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
(((__iter).curr += PAGE_SIZE) >= (__iter).max) ? \
(__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0)
+static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg)
+{
+ unsigned int page_sizes;
+
+ page_sizes = 0;
+ while (sg) {
+ GEM_BUG_ON(sg->offset);
+ GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE));
+ page_sizes |= sg->length;
+ sg = __sg_next(sg);
+ }
+
+ return page_sizes;
+}
+
static inline unsigned int i915_sg_segment_size(void)
{
unsigned int size = swiotlb_max_segment();
@@ -3088,6 +3130,10 @@ intel_info(const struct drm_i915_private *dev_priv)
#define USES_PPGTT(dev_priv) (i915_modparams.enable_ppgtt)
#define USES_FULL_PPGTT(dev_priv) (i915_modparams.enable_ppgtt >= 2)
#define USES_FULL_48BIT_PPGTT(dev_priv) (i915_modparams.enable_ppgtt == 3)
+#define HAS_PAGE_SIZES(dev_priv, sizes) ({ \
+ GEM_BUG_ON((sizes) == 0); \
+ ((sizes) & ~(dev_priv)->info.page_sizes) == 0; \
+})
#define HAS_OVERLAY(dev_priv) ((dev_priv)->info.has_overlay)
#define OVERLAY_NEEDS_PHYSICAL(dev_priv) \
@@ -3504,7 +3550,8 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
unsigned long n);
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
- struct sg_table *pages);
+ struct sg_table *pages,
+ unsigned int sg_page_sizes);
int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
static inline int __must_check
@@ -3726,8 +3773,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm)
}
/* i915_gem_fence_reg.c */
-int __must_check i915_vma_get_fence(struct i915_vma *vma);
-int __must_check i915_vma_put_fence(struct i915_vma *vma);
struct drm_i915_fence_reg *
i915_reserve_fence(struct drm_i915_private *dev_priv);
void i915_unreserve_fence(struct drm_i915_fence_reg *fence);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 73eeb6b1f1cd..20fcac37c85a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -35,6 +35,7 @@
#include "intel_drv.h"
#include "intel_frontbuffer.h"
#include "intel_mocs.h"
+#include "i915_gemfs.h"
#include <linux/dma-fence-array.h>
#include <linux/kthread.h>
#include <linux/reservation.h>
@@ -161,8 +162,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
return 0;
}
-static struct sg_table *
-i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
+static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
{
struct address_space *mapping = obj->base.filp->f_mapping;
drm_dma_handle_t *phys;
@@ -170,9 +170,10 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
struct scatterlist *sg;
char *vaddr;
int i;
+ int err;
if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
/* Always aligning to the object size, allows a single allocation
* to handle all possible callers, and given typical object sizes,
@@ -182,7 +183,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
roundup_pow_of_two(obj->base.size),
roundup_pow_of_two(obj->base.size));
if (!phys)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
vaddr = phys->vaddr;
for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
@@ -191,7 +192,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
page = shmem_read_mapping_page(mapping, i);
if (IS_ERR(page)) {
- st = ERR_CAST(page);
+ err = PTR_ERR(page);
goto err_phys;
}
@@ -208,13 +209,13 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (!st) {
- st = ERR_PTR(-ENOMEM);
+ err = -ENOMEM;
goto err_phys;
}
if (sg_alloc_table(st, 1, GFP_KERNEL)) {
kfree(st);
- st = ERR_PTR(-ENOMEM);
+ err = -ENOMEM;
goto err_phys;
}
@@ -226,11 +227,15 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
sg_dma_len(sg) = obj->base.size;
obj->phys_handle = phys;
- return st;
+
+ __i915_gem_object_set_pages(obj, st, sg->length);
+
+ return 0;
err_phys:
drm_pci_free(obj->base.dev, phys);
- return st;
+
+ return err;
}
static void __start_cpu_write(struct drm_i915_gem_object *obj)
@@ -353,7 +358,7 @@ static long
i915_gem_object_wait_fence(struct dma_fence *fence,
unsigned int flags,
long timeout,
- struct intel_rps_client *rps)
+ struct intel_rps_client *rps_client)
{
struct drm_i915_gem_request *rq;
@@ -386,11 +391,11 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
* forcing the clocks too high for the whole system, we only allow
* each client to waitboost once in a busy period.
*/
- if (rps) {
+ if (rps_client) {
if (INTEL_GEN(rq->i915) >= 6)
- gen6_rps_boost(rq, rps);
+ gen6_rps_boost(rq, rps_client);
else
- rps = NULL;
+ rps_client = NULL;
}
timeout = i915_wait_request(rq, flags, timeout);
@@ -406,7 +411,7 @@ static long
i915_gem_object_wait_reservation(struct reservation_object *resv,
unsigned int flags,
long timeout,
- struct intel_rps_client *rps)
+ struct intel_rps_client *rps_client)
{
unsigned int seq = __read_seqcount_begin(&resv->seq);
struct dma_fence *excl;
@@ -425,7 +430,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
for (i = 0; i < count; i++) {
timeout = i915_gem_object_wait_fence(shared[i],
flags, timeout,
- rps);
+ rps_client);
if (timeout < 0)
break;
@@ -442,7 +447,8 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
}
if (excl && timeout >= 0) {
- timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
+ timeout = i915_gem_object_wait_fence(excl, flags, timeout,
+ rps_client);
prune_fences = timeout >= 0;
}
@@ -538,7 +544,7 @@ int
i915_gem_object_wait(struct drm_i915_gem_object *obj,
unsigned int flags,
long timeout,
- struct intel_rps_client *rps)
+ struct intel_rps_client *rps_client)
{
might_sleep();
#if IS_ENABLED(CONFIG_LOCKDEP)
@@ -550,7 +556,7 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
timeout = i915_gem_object_wait_reservation(obj->resv,
flags, timeout,
- rps);
+ rps_client);
return timeout < 0 ? timeout : 0;
}
@@ -558,7 +564,7 @@ static struct intel_rps_client *to_rps_client(struct drm_file *file)
{
struct drm_i915_file_private *fpriv = file->driver_priv;
- return &fpriv->rps;
+ return &fpriv->rps_client;
}
static int
@@ -1050,7 +1056,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
intel_runtime_pm_get(i915);
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
- PIN_MAPPABLE | PIN_NONBLOCK);
+ PIN_MAPPABLE |
+ PIN_NONFAULT |
+ PIN_NONBLOCK);
if (!IS_ERR(vma)) {
node.start = i915_ggtt_offset(vma);
node.allocated = false;
@@ -1234,7 +1242,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
intel_runtime_pm_get(i915);
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
- PIN_MAPPABLE | PIN_NONBLOCK);
+ PIN_MAPPABLE |
+ PIN_NONFAULT |
+ PIN_NONBLOCK);
if (!IS_ERR(vma)) {
node.start = i915_ggtt_offset(vma);
node.allocated = false;
@@ -1905,22 +1915,27 @@ int i915_gem_fault(struct vm_fault *vmf)
if (ret)
goto err_unpin;
- ret = i915_vma_get_fence(vma);
+ ret = i915_vma_pin_fence(vma);
if (ret)
goto err_unpin;
- /* Mark as being mmapped into userspace for later revocation */
- assert_rpm_wakelock_held(dev_priv);
- if (list_empty(&obj->userfault_link))
- list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
-
/* Finally, remap it using the new GTT offset */
ret = remap_io_mapping(area,
area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
(ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
min_t(u64, vma->size, area->vm_end - area->vm_start),
&ggtt->mappable);
+ if (ret)
+ goto err_fence;
+ /* Mark as being mmapped into userspace for later revocation */
+ assert_rpm_wakelock_held(dev_priv);
+ if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
+ list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
+ GEM_BUG_ON(!obj->userfault_count);
+
+err_fence:
+ i915_vma_unpin_fence(vma);
err_unpin:
__i915_vma_unpin(vma);
err_unlock:
@@ -1972,6 +1987,25 @@ err:
return ret;
}
+static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+ struct i915_vma *vma;
+
+ GEM_BUG_ON(!obj->userfault_count);
+
+ obj->userfault_count = 0;
+ list_del(&obj->userfault_link);
+ drm_vma_node_unmap(&obj->base.vma_node,
+ obj->base.dev->anon_inode->i_mapping);
+
+ list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ if (!i915_vma_is_ggtt(vma))
+ break;
+
+ i915_vma_unset_userfault(vma);
+ }
+}
+
/**
* i915_gem_release_mmap - remove physical page mappings
* @obj: obj in question
@@ -2002,12 +2036,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
lockdep_assert_held(&i915->drm.struct_mutex);
intel_runtime_pm_get(i915);
- if (list_empty(&obj->userfault_link))
+ if (!obj->userfault_count)
goto out;
- list_del_init(&obj->userfault_link);
- drm_vma_node_unmap(&obj->base.vma_node,
- obj->base.dev->anon_inode->i_mapping);
+ __i915_gem_object_release_mmap(obj);
/* Ensure that the CPU's PTE are revoked and there are not outstanding
* memory transactions from userspace before we return. The TLB
@@ -2035,11 +2067,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
*/
list_for_each_entry_safe(obj, on,
- &dev_priv->mm.userfault_list, userfault_link) {
- list_del_init(&obj->userfault_link);
- drm_vma_node_unmap(&obj->base.vma_node,
- obj->base.dev->anon_inode->i_mapping);
- }
+ &dev_priv->mm.userfault_list, userfault_link)
+ __i915_gem_object_release_mmap(obj);
/* The fence will be lost when the device powers down. If any were
* in use by hardware (i.e. they are pinned), we should not be powering
@@ -2062,7 +2091,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
if (!reg->vma)
continue;
- GEM_BUG_ON(!list_empty(&reg->vma->obj->userfault_link));
+ GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
reg->dirty = true;
}
}
@@ -2261,6 +2290,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
if (!IS_ERR(pages))
obj->ops->put_pages(obj, pages);
+ obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+
unlock:
mutex_unlock(&obj->mm.lock);
}
@@ -2291,8 +2322,7 @@ static bool i915_sg_trim(struct sg_table *orig_st)
return true;
}
-static struct sg_table *
-i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
+static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
const unsigned long page_count = obj->base.size / PAGE_SIZE;
@@ -2304,6 +2334,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
struct page *page;
unsigned long last_pfn = 0; /* suppress gcc warning */
unsigned int max_segment = i915_sg_segment_size();
+ unsigned int sg_page_sizes;
gfp_t noreclaim;
int ret;
@@ -2316,12 +2347,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (st == NULL)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
rebuild_st:
if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
kfree(st);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
/* Get the list of pages out of our struct file. They'll be pinned
@@ -2335,6 +2366,7 @@ rebuild_st:
sg = st->sgl;
st->nents = 0;
+ sg_page_sizes = 0;
for (i = 0; i < page_count; i++) {
const unsigned int shrink[] = {
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
@@ -2387,8 +2419,10 @@ rebuild_st:
if (!i ||
sg->length >= max_segment ||
page_to_pfn(page) != last_pfn + 1) {
- if (i)
+ if (i) {
+ sg_page_sizes |= sg->length;
sg = sg_next(sg);
+ }
st->nents++;
sg_set_page(sg, page, PAGE_SIZE, 0);
} else {
@@ -2399,8 +2433,10 @@ rebuild_st:
/* Check that the i965g/gm workaround works. */
WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
}
- if (sg) /* loop terminated early; short sg table */
+ if (sg) { /* loop terminated early; short sg table */
+ sg_page_sizes |= sg->length;
sg_mark_end(sg);
+ }
/* Trim unused sg entries to avoid wasting memory. */
i915_sg_trim(st);
@@ -2429,7 +2465,9 @@ rebuild_st:
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_do_bit_17_swizzle(obj, st);
- return st;
+ __i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+ return 0;
err_sg:
sg_mark_end(sg);
@@ -2450,12 +2488,17 @@ err_pages:
if (ret == -ENOSPC)
ret = -ENOMEM;
- return ERR_PTR(ret);
+ return ret;
}
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
- struct sg_table *pages)
+ struct sg_table *pages,
+ unsigned int sg_page_sizes)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ unsigned long supported = INTEL_INFO(i915)->page_sizes;
+ int i;
+
lockdep_assert_held(&obj->mm.lock);
obj->mm.get_page.sg_pos = pages->sgl;
@@ -2469,23 +2512,40 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
__i915_gem_object_pin_pages(obj);
obj->mm.quirked = true;
}
+
+ GEM_BUG_ON(!sg_page_sizes);
+ obj->mm.page_sizes.phys = sg_page_sizes;
+
+ /*
+ * Calculate the supported page-sizes which fit into the given
+ * sg_page_sizes. This will give us the page-sizes which we may be able
+ * to use opportunistically when later inserting into the GTT. For
+ * example if phys=2G, then in theory we should be able to use 1G, 2M,
+ * 64K or 4K pages, although in practice this will depend on a number of
+ * other factors.
+ */
+ obj->mm.page_sizes.sg = 0;
+ for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+ if (obj->mm.page_sizes.phys & ~0u << i)
+ obj->mm.page_sizes.sg |= BIT(i);
+ }
+
+ GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
}
static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
{
- struct sg_table *pages;
+ int err;
if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
DRM_DEBUG("Attempting to obtain a purgeable object\n");
return -EFAULT;
}
- pages = obj->ops->get_pages(obj);
- if (unlikely(IS_ERR(pages)))
- return PTR_ERR(pages);
+ err = obj->ops->get_pages(obj);
+ GEM_BUG_ON(!err && IS_ERR_OR_NULL(obj->mm.pages));
- __i915_gem_object_set_pages(obj, pages);
- return 0;
+ return err;
}
/* Ensure that the associated pages are gathered from the backing storage
@@ -2796,7 +2856,17 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request = NULL;
- /* Prevent the signaler thread from updating the request
+ /*
+ * During the reset sequence, we must prevent the engine from
+ * entering RC6. As the context state is undefined until we restart
+ * the engine, if it does enter RC6 during the reset, the state
+ * written to the powercontext is undefined and so we may lose
+ * GPU state upon resume, i.e. fail to restart after a reset.
+ */
+ intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
+
+ /*
+ * Prevent the signaler thread from updating the request
* state (by calling dma_fence_signal) as we are processing
* the reset. The write from the GPU of the seqno is
* asynchronous and the signaler thread may see a different
@@ -2807,7 +2877,8 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
*/
kthread_park(engine->breadcrumbs.signaler);
- /* Prevent request submission to the hardware until we have
+ /*
+ * Prevent request submission to the hardware until we have
* completed the reset in i915_gem_reset_finish(). If a request
* is completed by one engine, it may then queue a request
* to a second via its engine->irq_tasklet *just* as we are
@@ -2997,6 +3068,8 @@ void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
{
tasklet_enable(&engine->execlists.irq_tasklet);
kthread_unpark(engine->breadcrumbs.signaler);
+
+ intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
}
void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
@@ -3016,49 +3089,76 @@ static void nop_submit_request(struct drm_i915_gem_request *request)
{
GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error));
dma_fence_set_error(&request->fence, -EIO);
+
i915_gem_request_submit(request);
- intel_engine_init_global_seqno(request->engine, request->global_seqno);
}
-static void engine_set_wedged(struct intel_engine_cs *engine)
+static void nop_complete_submit_request(struct drm_i915_gem_request *request)
{
- /* We need to be sure that no thread is running the old callback as
- * we install the nop handler (otherwise we would submit a request
- * to hardware that will never complete). In order to prevent this
- * race, we wait until the machine is idle before making the swap
- * (using stop_machine()).
- */
- engine->submit_request = nop_submit_request;
+ unsigned long flags;
- /* Mark all executing requests as skipped */
- engine->cancel_requests(engine);
+ GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error));
+ dma_fence_set_error(&request->fence, -EIO);
- /* Mark all pending requests as complete so that any concurrent
- * (lockless) lookup doesn't try and wait upon the request as we
- * reset it.
- */
- intel_engine_init_global_seqno(engine,
- intel_engine_last_submit(engine));
+ spin_lock_irqsave(&request->engine->timeline->lock, flags);
+ __i915_gem_request_submit(request);
+ intel_engine_init_global_seqno(request->engine, request->global_seqno);
+ spin_unlock_irqrestore(&request->engine->timeline->lock, flags);
}
-static int __i915_gem_set_wedged_BKL(void *data)
+void i915_gem_set_wedged(struct drm_i915_private *i915)
{
- struct drm_i915_private *i915 = data;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ /*
+ * First, stop submission to hw, but do not yet complete requests by
+ * rolling the global seqno forward (since this would complete requests
+ * for which we haven't set the fence error to EIO yet).
+ */
for_each_engine(engine, i915, id)
- engine_set_wedged(engine);
+ engine->submit_request = nop_submit_request;
- set_bit(I915_WEDGED, &i915->gpu_error.flags);
- wake_up_all(&i915->gpu_error.reset_queue);
+ /*
+ * Make sure no one is running the old callback before we proceed with
+ * cancelling requests and resetting the completion tracking. Otherwise
+ * we might submit a request to the hardware which never completes.
+ */
+ synchronize_rcu();
- return 0;
-}
+ for_each_engine(engine, i915, id) {
+ /* Mark all executing requests as skipped */
+ engine->cancel_requests(engine);
-void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
-{
- stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
+ /*
+ * Only once we've force-cancelled all in-flight requests can we
+ * start to complete all requests.
+ */
+ engine->submit_request = nop_complete_submit_request;
+ }
+
+ /*
+ * Make sure no request can slip through without getting completed by
+ * either this call here to intel_engine_init_global_seqno, or the one
+ * in nop_complete_submit_request.
+ */
+ synchronize_rcu();
+
+ for_each_engine(engine, i915, id) {
+ unsigned long flags;
+
+ /* Mark all pending requests as complete so that any concurrent
+ * (lockless) lookup doesn't try and wait upon the request as we
+ * reset it.
+ */
+ spin_lock_irqsave(&engine->timeline->lock, flags);
+ intel_engine_init_global_seqno(engine,
+ intel_engine_last_submit(engine));
+ spin_unlock_irqrestore(&engine->timeline->lock, flags);
+ }
+
+ set_bit(I915_WEDGED, &i915->gpu_error.flags);
+ wake_up_all(&i915->gpu_error.reset_queue);
}
bool i915_gem_unset_wedged(struct drm_i915_private *i915)
@@ -3959,42 +4059,47 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
lockdep_assert_held(&obj->base.dev->struct_mutex);
+ if (!view && flags & PIN_MAPPABLE) {
+ /* If the required space is larger than the available
+ * aperture, we will not able to find a slot for the
+ * object and unbinding the object now will be in
+ * vain. Worse, doing so may cause us to ping-pong
+ * the object in and out of the Global GTT and
+ * waste a lot of cycles under the mutex.
+ */
+ if (obj->base.size > dev_priv->ggtt.mappable_end)
+ return ERR_PTR(-E2BIG);
+
+ /* If NONBLOCK is set the caller is optimistically
+ * trying to cache the full object within the mappable
+ * aperture, and *must* have a fallback in place for
+ * situations where we cannot bind the object. We
+ * can be a little more lax here and use the fallback
+ * more often to avoid costly migrations of ourselves
+ * and other objects within the aperture.
+ *
+ * Half-the-aperture is used as a simple heuristic.
+ * More interesting would to do search for a free
+ * block prior to making the commitment to unbind.
+ * That caters for the self-harm case, and with a
+ * little more heuristics (e.g. NOFAULT, NOEVICT)
+ * we could try to minimise harm to others.
+ */
+ if (flags & PIN_NONBLOCK &&
+ obj->base.size > dev_priv->ggtt.mappable_end / 2)
+ return ERR_PTR(-ENOSPC);
+ }
+
vma = i915_vma_instance(obj, vm, view);
if (unlikely(IS_ERR(vma)))
return vma;
if (i915_vma_misplaced(vma, size, alignment, flags)) {
- if (flags & PIN_NONBLOCK &&
- (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
- return ERR_PTR(-ENOSPC);
+ if (flags & PIN_NONBLOCK) {
+ if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
+ return ERR_PTR(-ENOSPC);
- if (flags & PIN_MAPPABLE) {
- /* If the required space is larger than the available
- * aperture, we will not able to find a slot for the
- * object and unbinding the object now will be in
- * vain. Worse, doing so may cause us to ping-pong
- * the object in and out of the Global GTT and
- * waste a lot of cycles under the mutex.
- */
- if (vma->fence_size > dev_priv->ggtt.mappable_end)
- return ERR_PTR(-E2BIG);
-
- /* If NONBLOCK is set the caller is optimistically
- * trying to cache the full object within the mappable
- * aperture, and *must* have a fallback in place for
- * situations where we cannot bind the object. We
- * can be a little more lax here and use the fallback
- * more often to avoid costly migrations of ourselves
- * and other objects within the aperture.
- *
- * Half-the-aperture is used as a simple heuristic.
- * More interesting would to do search for a free
- * block prior to making the commitment to unbind.
- * That caters for the self-harm case, and with a
- * little more heuristics (e.g. NOFAULT, NOEVICT)
- * we could try to minimise harm to others.
- */
- if (flags & PIN_NONBLOCK &&
+ if (flags & PIN_MAPPABLE &&
vma->fence_size > dev_priv->ggtt.mappable_end / 2)
return ERR_PTR(-ENOSPC);
}
@@ -4221,7 +4326,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
mutex_init(&obj->mm.lock);
INIT_LIST_HEAD(&obj->global_link);
- INIT_LIST_HEAD(&obj->userfault_link);
INIT_LIST_HEAD(&obj->vma_list);
INIT_LIST_HEAD(&obj->lut_list);
INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -4251,6 +4355,30 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
.pwrite = i915_gem_object_pwrite_gtt,
};
+static int i915_gem_object_create_shmem(struct drm_device *dev,
+ struct drm_gem_object *obj,
+ size_t size)
+{
+ struct drm_i915_private *i915 = to_i915(dev);
+ unsigned long flags = VM_NORESERVE;
+ struct file *filp;
+
+ drm_gem_private_object_init(dev, obj, size);
+
+ if (i915->mm.gemfs)
+ filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
+ flags);
+ else
+ filp = shmem_file_setup("i915", size, flags);
+
+ if (IS_ERR(filp))
+ return PTR_ERR(filp);
+
+ obj->filp = filp;
+
+ return 0;
+}
+
struct drm_i915_gem_object *
i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
{
@@ -4275,7 +4403,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
if (obj == NULL)
return ERR_PTR(-ENOMEM);
- ret = drm_gem_object_init(&dev_priv->drm, &obj->base, size);
+ ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
if (ret)
goto fail;
@@ -4378,6 +4506,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
llist_for_each_entry_safe(obj, on, freed, freed) {
GEM_BUG_ON(obj->bind_count);
+ GEM_BUG_ON(obj->userfault_count);
GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
GEM_BUG_ON(!list_empty(&obj->lut_list));
@@ -4547,8 +4676,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
/* As the idle_work is rearming if it detects a race, play safe and
* repeat the flush until it is definitely idle.
*/
- while (flush_delayed_work(&dev_priv->gt.idle_work))
- ;
+ drain_delayed_work(&dev_priv->gt.idle_work);
/* Assert that we sucessfully flushed all the work and
* reset the GPU back to its idle, low power state.
@@ -4595,6 +4723,7 @@ void i915_gem_resume(struct drm_i915_private *dev_priv)
mutex_lock(&dev->struct_mutex);
i915_gem_restore_gtt_mappings(dev_priv);
+ i915_gem_restore_fences(dev_priv);
/* As we didn't flush the kernel context before suspend, we cannot
* guarantee that the context image is complete. So let's just reset
@@ -4757,6 +4886,15 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
mutex_lock(&dev_priv->drm.struct_mutex);
+ /*
+ * We need to fallback to 4K pages since gvt gtt handling doesn't
+ * support huge page entries - we will need to check either hypervisor
+ * mm can support huge guest page or just do emulation in gvt.
+ */
+ if (intel_vgpu_active(dev_priv))
+ mkwrite_device_info(dev_priv)->page_sizes =
+ I915_GTT_PAGE_SIZE_4K;
+
dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
if (!i915_modparams.enable_execlists) {
@@ -4914,6 +5052,10 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
spin_lock_init(&dev_priv->fb_tracking.lock);
+ err = i915_gemfs_init(dev_priv);
+ if (err)
+ DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
+
return 0;
err_priorities:
@@ -4952,6 +5094,8 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
rcu_barrier();
+
+ i915_gemfs_fini(dev_priv);
}
int i915_gem_freeze(struct drm_i915_private *dev_priv)
@@ -5341,6 +5485,7 @@ err_unlock:
#include "selftests/scatterlist.c"
#include "selftests/mock_gem_device.c"
#include "selftests/huge_gem_object.c"
+#include "selftests/huge_pages.c"
#include "selftests/i915_gem_object.c"
#include "selftests/i915_gem_coherency.c"
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 921ee369c74d..5bf96a258509 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -416,14 +416,43 @@ out:
return ctx;
}
+static struct i915_gem_context *
+create_kernel_context(struct drm_i915_private *i915, int prio)
+{
+ struct i915_gem_context *ctx;
+
+ ctx = i915_gem_create_context(i915, NULL);
+ if (IS_ERR(ctx))
+ return ctx;
+
+ i915_gem_context_clear_bannable(ctx);
+ ctx->priority = prio;
+ ctx->ring_size = PAGE_SIZE;
+
+ GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
+
+ return ctx;
+}
+
+static void
+destroy_kernel_context(struct i915_gem_context **ctxp)
+{
+ struct i915_gem_context *ctx;
+
+ /* Keep the context ref so that we can free it immediately ourselves */
+ ctx = i915_gem_context_get(fetch_and_zero(ctxp));
+ GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
+
+ context_close(ctx);
+ i915_gem_context_free(ctx);
+}
+
int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
{
struct i915_gem_context *ctx;
+ int err;
- /* Init should only be called once per module load. Eventually the
- * restriction on the context_disabled check can be loosened. */
- if (WARN_ON(dev_priv->kernel_context))
- return 0;
+ GEM_BUG_ON(dev_priv->kernel_context);
INIT_LIST_HEAD(&dev_priv->contexts.list);
INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker);
@@ -441,28 +470,38 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
ida_init(&dev_priv->contexts.hw_ida);
- ctx = i915_gem_create_context(dev_priv, NULL);
+ /* lowest priority; idle task */
+ ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN);
if (IS_ERR(ctx)) {
- DRM_ERROR("Failed to create default global context (error %ld)\n",
- PTR_ERR(ctx));
- return PTR_ERR(ctx);
+ DRM_ERROR("Failed to create default global context\n");
+ err = PTR_ERR(ctx);
+ goto err;
}
-
- /* For easy recognisablity, we want the kernel context to be 0 and then
+ /*
+ * For easy recognisablity, we want the kernel context to be 0 and then
* all user contexts will have non-zero hw_id.
*/
GEM_BUG_ON(ctx->hw_id);
-
- i915_gem_context_clear_bannable(ctx);
- ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */
dev_priv->kernel_context = ctx;
- GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
+ /* highest priority; preempting task */
+ ctx = create_kernel_context(dev_priv, INT_MAX);
+ if (IS_ERR(ctx)) {
+ DRM_ERROR("Failed to create default preempt context\n");
+ err = PTR_ERR(ctx);
+ goto err_kernel_context;
+ }
+ dev_priv->preempt_context = ctx;
DRM_DEBUG_DRIVER("%s context support initialized\n",
dev_priv->engine[RCS]->context_size ? "logical" :
"fake");
return 0;
+
+err_kernel_context:
+ destroy_kernel_context(&dev_priv->kernel_context);
+err:
+ return err;
}
void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
@@ -507,15 +546,10 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
void i915_gem_contexts_fini(struct drm_i915_private *i915)
{
- struct i915_gem_context *ctx;
-
lockdep_assert_held(&i915->drm.struct_mutex);
- /* Keep the context so that we can free it immediately ourselves */
- ctx = i915_gem_context_get(fetch_and_zero(&i915->kernel_context));
- GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
- context_close(ctx);
- i915_gem_context_free(ctx);
+ destroy_kernel_context(&i915->preempt_context);
+ destroy_kernel_context(&i915->kernel_context);
/* Must free all deferred contexts (via flush_workqueue) first */
ida_destroy(&i915->contexts.hw_ida);
@@ -1036,6 +1070,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
case I915_CONTEXT_PARAM_BANNABLE:
args->value = i915_gem_context_is_bannable(ctx);
break;
+ case I915_CONTEXT_PARAM_PRIORITY:
+ args->value = ctx->priority;
+ break;
default:
ret = -EINVAL;
break;
@@ -1091,6 +1128,26 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
else
i915_gem_context_clear_bannable(ctx);
break;
+
+ case I915_CONTEXT_PARAM_PRIORITY:
+ {
+ int priority = args->value;
+
+ if (args->size)
+ ret = -EINVAL;
+ else if (!to_i915(dev)->engine[RCS]->schedule)
+ ret = -ENODEV;
+ else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
+ priority < I915_CONTEXT_MIN_USER_PRIORITY)
+ ret = -EINVAL;
+ else if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
+ !capable(CAP_SYS_NICE))
+ ret = -EPERM;
+ else
+ ctx->priority = priority;
+ }
+ break;
+
default:
ret = -EINVAL;
break;
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 6176e589cf09..864439a214c8 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -256,11 +256,21 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
return drm_gem_dmabuf_export(dev, &exp_info);
}
-static struct sg_table *
-i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
+static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
{
- return dma_buf_map_attachment(obj->base.import_attach,
- DMA_BIDIRECTIONAL);
+ struct sg_table *pages;
+ unsigned int sg_page_sizes;
+
+ pages = dma_buf_map_attachment(obj->base.import_attach,
+ DMA_BIDIRECTIONAL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ sg_page_sizes = i915_sg_page_sizes(pages->sgl);
+
+ __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
+
+ return 0;
}
static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 4df039ef2ce3..a5a5b7e6daae 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -82,7 +82,7 @@ mark_free(struct drm_mm_scan *scan,
if (i915_vma_is_pinned(vma))
return false;
- if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link))
+ if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma))
return false;
list_add(&vma->evict_link, unwind);
@@ -315,6 +315,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
break;
}
+ if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) {
+ ret = -ENOSPC;
+ break;
+ }
+
/* Overlap of objects in the same batch? */
if (i915_vma_is_pinned(vma)) {
ret = -ENOSPC;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d733c4d5a500..3d7190764f10 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -367,12 +367,12 @@ eb_pin_vma(struct i915_execbuffer *eb,
return false;
if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
- if (unlikely(i915_vma_get_fence(vma))) {
+ if (unlikely(i915_vma_pin_fence(vma))) {
i915_vma_unpin(vma);
return false;
}
- if (i915_vma_pin_fence(vma))
+ if (vma->fence)
exec_flags |= __EXEC_OBJECT_HAS_FENCE;
}
@@ -385,7 +385,7 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
- i915_vma_unpin_fence(vma);
+ __i915_vma_unpin_fence(vma);
__i915_vma_unpin(vma);
}
@@ -563,13 +563,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
}
if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
- err = i915_vma_get_fence(vma);
+ err = i915_vma_pin_fence(vma);
if (unlikely(err)) {
i915_vma_unpin(vma);
return err;
}
- if (i915_vma_pin_fence(vma))
+ if (vma->fence)
exec_flags |= __EXEC_OBJECT_HAS_FENCE;
}
@@ -974,7 +974,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
return ERR_PTR(err);
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
- PIN_MAPPABLE | PIN_NONBLOCK);
+ PIN_MAPPABLE |
+ PIN_NONBLOCK |
+ PIN_NONFAULT);
if (IS_ERR(vma)) {
memset(&cache->node, 0, sizeof(cache->node));
err = drm_mm_insert_node_in_range
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 2783d63bd1ad..012250f25255 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -240,7 +240,8 @@ static int fence_update(struct drm_i915_fence_reg *fence,
/* Ensure that all userspace CPU access is completed before
* stealing the fence.
*/
- i915_gem_release_mmap(fence->vma->obj);
+ GEM_BUG_ON(fence->vma->fence != fence);
+ i915_vma_revoke_mmap(fence->vma);
fence->vma->fence = NULL;
fence->vma = NULL;
@@ -280,8 +281,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
*
* 0 on success, negative error code on failure.
*/
-int
-i915_vma_put_fence(struct i915_vma *vma)
+int i915_vma_put_fence(struct i915_vma *vma)
{
struct drm_i915_fence_reg *fence = vma->fence;
@@ -299,6 +299,8 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
struct drm_i915_fence_reg *fence;
list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
+ GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
+
if (fence->pin_count)
continue;
@@ -313,7 +315,7 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
}
/**
- * i915_vma_get_fence - set up fencing for a vma
+ * i915_vma_pin_fence - set up fencing for a vma
* @vma: vma to map through a fence reg
*
* When mapping objects through the GTT, userspace wants to be able to write
@@ -331,10 +333,11 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
* 0 on success, negative error code on failure.
*/
int
-i915_vma_get_fence(struct i915_vma *vma)
+i915_vma_pin_fence(struct i915_vma *vma)
{
struct drm_i915_fence_reg *fence;
struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
+ int err;
/* Note that we revoke fences on runtime suspend. Therefore the user
* must keep the device awake whilst using the fence.
@@ -344,6 +347,8 @@ i915_vma_get_fence(struct i915_vma *vma)
/* Just update our place in the LRU if our fence is getting reused. */
if (vma->fence) {
fence = vma->fence;
+ GEM_BUG_ON(fence->vma != vma);
+ fence->pin_count++;
if (!fence->dirty) {
list_move_tail(&fence->link,
&fence->i915->mm.fence_list);
@@ -353,10 +358,25 @@ i915_vma_get_fence(struct i915_vma *vma)
fence = fence_find(vma->vm->i915);
if (IS_ERR(fence))
return PTR_ERR(fence);
+
+ GEM_BUG_ON(fence->pin_count);
+ fence->pin_count++;
} else
return 0;
- return fence_update(fence, set);
+ err = fence_update(fence, set);
+ if (err)
+ goto out_unpin;
+
+ GEM_BUG_ON(fence->vma != set);
+ GEM_BUG_ON(vma->fence != (set ? fence : NULL));
+
+ if (set)
+ return 0;
+
+out_unpin:
+ fence->pin_count--;
+ return err;
}
/**
@@ -429,8 +449,10 @@ void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
for (i = 0; i < dev_priv->num_fence_regs; i++) {
struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
+ GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
+
if (fence->vma)
- i915_gem_release_mmap(fence->vma->obj);
+ i915_vma_revoke_mmap(fence->vma);
}
}
@@ -450,13 +472,15 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
struct i915_vma *vma = reg->vma;
+ GEM_BUG_ON(vma && vma->fence != reg);
+
/*
* Commit delayed tiling changes if we have an object still
* attached to the fence, otherwise just clear the fence.
*/
if (vma && !i915_gem_object_is_tiled(vma->obj)) {
GEM_BUG_ON(!reg->dirty);
- GEM_BUG_ON(!list_empty(&vma->obj->userfault_link));
+ GEM_BUG_ON(i915_vma_has_userfault(vma));
list_move(&reg->link, &dev_priv->mm.fence_list);
vma->fence = NULL;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4c82ceb8d318..daba55a4fce2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -135,11 +135,12 @@ static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
int enable_ppgtt)
{
- bool has_aliasing_ppgtt;
bool has_full_ppgtt;
bool has_full_48bit_ppgtt;
- has_aliasing_ppgtt = dev_priv->info.has_aliasing_ppgtt;
+ if (!dev_priv->info.has_aliasing_ppgtt)
+ return 0;
+
has_full_ppgtt = dev_priv->info.has_full_ppgtt;
has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt;
@@ -149,9 +150,6 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv);
}
- if (!has_aliasing_ppgtt)
- return 0;
-
/*
* We don't allow disabling PPGTT for gen9+ as it's a requirement for
* execlists, the sole mechanism available to submit work.
@@ -188,7 +186,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
return 2;
}
- return has_aliasing_ppgtt ? 1 : 0;
+ return 1;
}
static int ppgtt_bind_vma(struct i915_vma *vma,
@@ -205,8 +203,6 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
return ret;
}
- vma->pages = vma->obj->mm.pages;
-
/* Currently applicable only to VLV */
pte_flags = 0;
if (vma->obj->gt_ro)
@@ -222,6 +218,30 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
}
+static int ppgtt_set_pages(struct i915_vma *vma)
+{
+ GEM_BUG_ON(vma->pages);
+
+ vma->pages = vma->obj->mm.pages;
+
+ vma->page_sizes = vma->obj->mm.page_sizes;
+
+ return 0;
+}
+
+static void clear_pages(struct i915_vma *vma)
+{
+ GEM_BUG_ON(!vma->pages);
+
+ if (vma->pages != vma->obj->mm.pages) {
+ sg_free_table(vma->pages);
+ kfree(vma->pages);
+ }
+ vma->pages = NULL;
+
+ memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
+}
+
static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
enum i915_cache_level level)
{
@@ -497,22 +517,63 @@ static void fill_page_dma_32(struct i915_address_space *vm,
static int
setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
{
- struct page *page;
+ struct page *page = NULL;
dma_addr_t addr;
+ int order;
- page = alloc_page(gfp | __GFP_ZERO);
- if (unlikely(!page))
- return -ENOMEM;
+ /*
+ * In order to utilize 64K pages for an object with a size < 2M, we will
+ * need to support a 64K scratch page, given that every 16th entry for a
+ * page-table operating in 64K mode must point to a properly aligned 64K
+ * region, including any PTEs which happen to point to scratch.
+ *
+ * This is only relevant for the 48b PPGTT where we support
+ * huge-gtt-pages, see also i915_vma_insert().
+ *
+ * TODO: we should really consider write-protecting the scratch-page and
+ * sharing between ppgtt
+ */
+ if (i915_vm_is_48bit(vm) &&
+ HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
+ order = get_order(I915_GTT_PAGE_SIZE_64K);
+ page = alloc_pages(gfp | __GFP_ZERO | __GFP_NOWARN, order);
+ if (page) {
+ addr = dma_map_page(vm->dma, page, 0,
+ I915_GTT_PAGE_SIZE_64K,
+ PCI_DMA_BIDIRECTIONAL);
+ if (unlikely(dma_mapping_error(vm->dma, addr))) {
+ __free_pages(page, order);
+ page = NULL;
+ }
- addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE,
- PCI_DMA_BIDIRECTIONAL);
- if (unlikely(dma_mapping_error(vm->dma, addr))) {
- __free_page(page);
- return -ENOMEM;
+ if (!IS_ALIGNED(addr, I915_GTT_PAGE_SIZE_64K)) {
+ dma_unmap_page(vm->dma, addr,
+ I915_GTT_PAGE_SIZE_64K,
+ PCI_DMA_BIDIRECTIONAL);
+ __free_pages(page, order);
+ page = NULL;
+ }
+ }
+ }
+
+ if (!page) {
+ order = 0;
+ page = alloc_page(gfp | __GFP_ZERO);
+ if (unlikely(!page))
+ return -ENOMEM;
+
+ addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+ if (unlikely(dma_mapping_error(vm->dma, addr))) {
+ __free_page(page);
+ return -ENOMEM;
+ }
}
vm->scratch_page.page = page;
vm->scratch_page.daddr = addr;
+ vm->scratch_page.order = order;
+
return 0;
}
@@ -520,8 +581,9 @@ static void cleanup_scratch_page(struct i915_address_space *vm)
{
struct i915_page_dma *p = &vm->scratch_page;
- dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- __free_page(p->page);
+ dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT,
+ PCI_DMA_BIDIRECTIONAL);
+ __free_pages(p->page, p->order);
}
static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
@@ -989,6 +1051,105 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
cache_level);
+
+ vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+}
+
+static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
+ struct i915_page_directory_pointer **pdps,
+ struct sgt_dma *iter,
+ enum i915_cache_level cache_level)
+{
+ const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
+ u64 start = vma->node.start;
+ dma_addr_t rem = iter->sg->length;
+
+ do {
+ struct gen8_insert_pte idx = gen8_insert_pte(start);
+ struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
+ struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
+ unsigned int page_size;
+ bool maybe_64K = false;
+ gen8_pte_t encode = pte_encode;
+ gen8_pte_t *vaddr;
+ u16 index, max;
+
+ if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
+ IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
+ rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
+ index = idx.pde;
+ max = I915_PDES;
+ page_size = I915_GTT_PAGE_SIZE_2M;
+
+ encode |= GEN8_PDE_PS_2M;
+
+ vaddr = kmap_atomic_px(pd);
+ } else {
+ struct i915_page_table *pt = pd->page_table[idx.pde];
+
+ index = idx.pte;
+ max = GEN8_PTES;
+ page_size = I915_GTT_PAGE_SIZE;
+
+ if (!index &&
+ vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+ IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+ (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
+ rem >= (max - index) << PAGE_SHIFT))
+ maybe_64K = true;
+
+ vaddr = kmap_atomic_px(pt);
+ }
+
+ do {
+ GEM_BUG_ON(iter->sg->length < page_size);
+ vaddr[index++] = encode | iter->dma;
+
+ start += page_size;
+ iter->dma += page_size;
+ rem -= page_size;
+ if (iter->dma >= iter->max) {
+ iter->sg = __sg_next(iter->sg);
+ if (!iter->sg)
+ break;
+
+ rem = iter->sg->length;
+ iter->dma = sg_dma_address(iter->sg);
+ iter->max = iter->dma + rem;
+
+ if (maybe_64K && index < max &&
+ !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+ (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
+ rem >= (max - index) << PAGE_SHIFT)))
+ maybe_64K = false;
+
+ if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
+ break;
+ }
+ } while (rem >= page_size && index < max);
+
+ kunmap_atomic(vaddr);
+
+ /*
+ * Is it safe to mark the 2M block as 64K? -- Either we have
+ * filled whole page-table with 64K entries, or filled part of
+ * it and have reached the end of the sg table and we have
+ * enough padding.
+ */
+ if (maybe_64K &&
+ (index == max ||
+ (i915_vm_has_scratch_64K(vma->vm) &&
+ !iter->sg && IS_ALIGNED(vma->node.start +
+ vma->node.size,
+ I915_GTT_PAGE_SIZE_2M)))) {
+ vaddr = kmap_atomic_px(pd);
+ vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
+ kunmap_atomic(vaddr);
+ page_size = I915_GTT_PAGE_SIZE_64K;
+ }
+
+ vma->page_sizes.gtt |= page_size;
+ } while (iter->sg);
}
static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
@@ -1003,11 +1164,18 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
.max = iter.dma + iter.sg->length,
};
struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
- struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
- while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter,
- &idx, cache_level))
- GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
+ if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
+ gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level);
+ } else {
+ struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
+
+ while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
+ &iter, &idx, cache_level))
+ GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
+
+ vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+ }
}
static void gen8_free_page_tables(struct i915_address_space *vm,
@@ -1452,6 +1620,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->base.cleanup = gen8_ppgtt_cleanup;
ppgtt->base.unbind_vma = ppgtt_unbind_vma;
ppgtt->base.bind_vma = ppgtt_bind_vma;
+ ppgtt->base.set_pages = ppgtt_set_pages;
+ ppgtt->base.clear_pages = clear_pages;
ppgtt->debug_dump = gen8_dump_ppgtt;
return 0;
@@ -1726,6 +1896,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
}
} while (1);
kunmap_atomic(vaddr);
+
+ vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
}
static int gen6_alloc_va_range(struct i915_address_space *vm,
@@ -1894,6 +2066,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
ppgtt->base.unbind_vma = ppgtt_unbind_vma;
ppgtt->base.bind_vma = ppgtt_bind_vma;
+ ppgtt->base.set_pages = ppgtt_set_pages;
+ ppgtt->base.clear_pages = clear_pages;
ppgtt->base.cleanup = gen6_ppgtt_cleanup;
ppgtt->debug_dump = gen6_dump_ppgtt;
@@ -1961,6 +2135,23 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
else if (IS_GEN9_LP(dev_priv))
I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
+
+ /*
+ * To support 64K PTEs we need to first enable the use of the
+ * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
+ * mmio, otherwise the page-walker will simply ignore the IPS bit. This
+ * shouldn't be needed after GEN10.
+ *
+ * 64K pages were first introduced from BDW+, although technically they
+ * only *work* from gen9+. For pre-BDW we instead have the option for
+ * 32K pages, but we don't currently have any support for it in our
+ * driver.
+ */
+ if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
+ INTEL_GEN(dev_priv) <= 10)
+ I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
+ I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
+ GAMW_ECO_ENABLE_64K_IPS_FIELD);
}
int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
@@ -2405,12 +2596,6 @@ static int ggtt_bind_vma(struct i915_vma *vma,
struct drm_i915_gem_object *obj = vma->obj;
u32 pte_flags;
- if (unlikely(!vma->pages)) {
- int ret = i915_get_ggtt_vma_pages(vma);
- if (ret)
- return ret;
- }
-
/* Currently applicable only to VLV */
pte_flags = 0;
if (obj->gt_ro)
@@ -2420,6 +2605,8 @@ static int ggtt_bind_vma(struct i915_vma *vma,
vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
intel_runtime_pm_put(i915);
+ vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+
/*
* Without aliasing PPGTT there's no difference between
* GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
@@ -2447,12 +2634,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
u32 pte_flags;
int ret;
- if (unlikely(!vma->pages)) {
- ret = i915_get_ggtt_vma_pages(vma);
- if (ret)
- return ret;
- }
-
/* Currently applicable only to VLV */
pte_flags = 0;
if (vma->obj->gt_ro)
@@ -2467,7 +2648,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
vma->node.start,
vma->size);
if (ret)
- goto err_pages;
+ return ret;
}
appgtt->base.insert_entries(&appgtt->base, vma, cache_level,
@@ -2481,17 +2662,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
}
return 0;
-
-err_pages:
- if (!(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND))) {
- if (vma->pages != vma->obj->mm.pages) {
- GEM_BUG_ON(!vma->pages);
- sg_free_table(vma->pages);
- kfree(vma->pages);
- }
- vma->pages = NULL;
- }
- return ret;
}
static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
@@ -2529,6 +2699,21 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
}
+static int ggtt_set_pages(struct i915_vma *vma)
+{
+ int ret;
+
+ GEM_BUG_ON(vma->pages);
+
+ ret = i915_get_ggtt_vma_pages(vma);
+ if (ret)
+ return ret;
+
+ vma->page_sizes = vma->obj->mm.page_sizes;
+
+ return 0;
+}
+
static void i915_gtt_color_adjust(const struct drm_mm_node *node,
unsigned long color,
u64 *start,
@@ -3151,6 +3336,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->base.cleanup = gen6_gmch_remove;
ggtt->base.bind_vma = ggtt_bind_vma;
ggtt->base.unbind_vma = ggtt_unbind_vma;
+ ggtt->base.set_pages = ggtt_set_pages;
+ ggtt->base.clear_pages = clear_pages;
ggtt->base.insert_page = gen8_ggtt_insert_page;
ggtt->base.clear_range = nop_clear_range;
if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
@@ -3209,6 +3396,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
ggtt->base.insert_entries = gen6_ggtt_insert_entries;
ggtt->base.bind_vma = ggtt_bind_vma;
ggtt->base.unbind_vma = ggtt_unbind_vma;
+ ggtt->base.set_pages = ggtt_set_pages;
+ ggtt->base.clear_pages = clear_pages;
ggtt->base.cleanup = gen6_gmch_remove;
ggtt->invalidate = gen6_ggtt_invalidate;
@@ -3254,6 +3443,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
ggtt->base.clear_range = i915_ggtt_clear_range;
ggtt->base.bind_vma = ggtt_bind_vma;
ggtt->base.unbind_vma = ggtt_unbind_vma;
+ ggtt->base.set_pages = ggtt_set_pages;
+ ggtt->base.clear_pages = clear_pages;
ggtt->base.cleanup = i915_gmch_remove;
ggtt->invalidate = gmch_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index f62fb903dc24..93211a96fdad 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -42,7 +42,13 @@
#include "i915_gem_request.h"
#include "i915_selftest.h"
-#define I915_GTT_PAGE_SIZE 4096UL
+#define I915_GTT_PAGE_SIZE_4K BIT(12)
+#define I915_GTT_PAGE_SIZE_64K BIT(16)
+#define I915_GTT_PAGE_SIZE_2M BIT(21)
+
+#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K
+#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M
+
#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE
#define I915_FENCE_REG_NONE -1
@@ -148,6 +154,9 @@ typedef u64 gen8_ppgtt_pml4e_t;
#define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4))
#define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6))
+#define GEN8_PDE_IPS_64K BIT(11)
+#define GEN8_PDE_PS_2M BIT(7)
+
struct sg_table;
struct intel_rotation_info {
@@ -207,6 +216,7 @@ struct i915_vma;
struct i915_page_dma {
struct page *page;
+ int order;
union {
dma_addr_t daddr;
@@ -329,6 +339,8 @@ struct i915_address_space {
int (*bind_vma)(struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags);
+ int (*set_pages)(struct i915_vma *vma);
+ void (*clear_pages)(struct i915_vma *vma);
I915_SELFTEST_DECLARE(struct fault_attr fault_attr);
};
@@ -341,6 +353,12 @@ i915_vm_is_48bit(const struct i915_address_space *vm)
return (vm->total - 1) >> 32;
}
+static inline bool
+i915_vm_has_scratch_64K(struct i915_address_space *vm)
+{
+ return vm->scratch_page.order == get_order(I915_GTT_PAGE_SIZE_64K);
+}
+
/* The Graphics Translation Table is the way in which GEN hardware translates a
* Graphics Virtual Address into a Physical Address. In addition to the normal
* collateral associated with any va->pa translations GEN hardware also has a
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index c1f64ddaf8aa..ee83ec838ee7 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -44,12 +44,12 @@ static void internal_free_pages(struct sg_table *st)
kfree(st);
}
-static struct sg_table *
-i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
+static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct sg_table *st;
struct scatterlist *sg;
+ unsigned int sg_page_sizes;
unsigned int npages;
int max_order;
gfp_t gfp;
@@ -78,16 +78,17 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
create_st:
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (!st)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
npages = obj->base.size / PAGE_SIZE;
if (sg_alloc_table(st, npages, GFP_KERNEL)) {
kfree(st);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
sg = st->sgl;
st->nents = 0;
+ sg_page_sizes = 0;
do {
int order = min(fls(npages) - 1, max_order);
@@ -105,6 +106,7 @@ create_st:
} while (1);
sg_set_page(sg, page, PAGE_SIZE << order, 0);
+ sg_page_sizes |= PAGE_SIZE << order;
st->nents++;
npages -= 1 << order;
@@ -132,13 +134,17 @@ create_st:
* object are only valid whilst active and pinned.
*/
obj->mm.madv = I915_MADV_DONTNEED;
- return st;
+
+ __i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+ return 0;
err:
sg_set_page(sg, NULL, 0, 0);
sg_mark_end(sg);
internal_free_pages(st);
- return ERR_PTR(-ENOMEM);
+
+ return -ENOMEM;
}
static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index c30d8f808185..d67f1cbe842d 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -69,7 +69,7 @@ struct drm_i915_gem_object_ops {
* being released or under memory pressure (where we attempt to
* reap pages for the shrinker).
*/
- struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
+ int (*get_pages)(struct drm_i915_gem_object *);
void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
int (*pwrite)(struct drm_i915_gem_object *,
@@ -123,6 +123,7 @@ struct drm_i915_gem_object {
/**
* Whether the object is currently in the GGTT mmap.
*/
+ unsigned int userfault_count;
struct list_head userfault_link;
struct list_head batch_pool_link;
@@ -169,6 +170,35 @@ struct drm_i915_gem_object {
struct sg_table *pages;
void *mapping;
+ /* TODO: whack some of this into the error state */
+ struct i915_page_sizes {
+ /**
+ * The sg mask of the pages sg_table. i.e the mask of
+ * of the lengths for each sg entry.
+ */
+ unsigned int phys;
+
+ /**
+ * The gtt page sizes we are allowed to use given the
+ * sg mask and the supported page sizes. This will
+ * express the smallest unit we can use for the whole
+ * object, as well as the larger sizes we may be able
+ * to use opportunistically.
+ */
+ unsigned int sg;
+
+ /**
+ * The actual gtt page size usage. Since we can have
+ * multiple vma associated with this object we need to
+ * prevent any trampling of state, hence a copy of this
+ * struct also lives in each vma, therefore the gtt
+ * value here should only be read/write through the vma.
+ */
+ unsigned int gtt;
+ } page_sizes;
+
+ I915_SELFTEST_DECLARE(unsigned int page_mask);
+
struct i915_gem_object_page_iter {
struct scatterlist *sg_pos;
unsigned int sg_idx; /* in pages, but 32bit eek! */
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 4eb1a76731b2..d140fcf5c6a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -186,7 +186,7 @@ i915_priotree_init(struct i915_priotree *pt)
INIT_LIST_HEAD(&pt->signalers_list);
INIT_LIST_HEAD(&pt->waiters_list);
INIT_LIST_HEAD(&pt->link);
- pt->priority = INT_MIN;
+ pt->priority = I915_PRIORITY_INVALID;
}
static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
@@ -416,7 +416,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
spin_lock_irq(&request->lock);
if (request->waitboost)
- atomic_dec(&request->i915->rps.num_waiters);
+ atomic_dec(&request->i915->gt_pm.rps.num_waiters);
dma_fence_signal_locked(&request->fence);
spin_unlock_irq(&request->lock);
@@ -556,7 +556,16 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
switch (state) {
case FENCE_COMPLETE:
trace_i915_gem_request_submit(request);
+ /*
+ * We need to serialize use of the submit_request() callback with its
+ * hotplugging performed during an emergency i915_gem_set_wedged().
+ * We use the RCU mechanism to mark the critical section in order to
+ * force i915_gem_set_wedged() to wait until the submit_request() is
+ * completed before proceeding.
+ */
+ rcu_read_lock();
request->engine->submit_request(request);
+ rcu_read_unlock();
break;
case FENCE_FREE:
@@ -587,6 +596,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
lockdep_assert_held(&dev_priv->drm.struct_mutex);
+ /*
+ * Preempt contexts are reserved for exclusive use to inject a
+ * preemption context switch. They are never to be used for any trivial
+ * request!
+ */
+ GEM_BUG_ON(ctx == dev_priv->preempt_context);
+
/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
* EIO if the GPU is already wedged.
*/
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 96eb52471dad..26249f39de67 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -30,6 +30,8 @@
#include "i915_gem.h"
#include "i915_sw_fence.h"
+#include <uapi/drm/i915_drm.h>
+
struct drm_file;
struct drm_i915_gem_object;
struct drm_i915_gem_request;
@@ -69,9 +71,14 @@ struct i915_priotree {
struct list_head waiters_list; /* those after us, they depend upon us */
struct list_head link;
int priority;
-#define I915_PRIORITY_MAX 1024
-#define I915_PRIORITY_NORMAL 0
-#define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
+};
+
+enum {
+ I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
+ I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
+ I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1,
+
+ I915_PRIORITY_INVALID = INT_MIN
};
struct i915_gem_capture_list {
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 507c9f0d8df1..54fd4cfa9d07 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -539,12 +539,18 @@ i915_pages_create_for_stolen(struct drm_device *dev,
return st;
}
-static struct sg_table *
-i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
+static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
{
- return i915_pages_create_for_stolen(obj->base.dev,
- obj->stolen->start,
- obj->stolen->size);
+ struct sg_table *pages =
+ i915_pages_create_for_stolen(obj->base.dev,
+ obj->stolen->start,
+ obj->stolen->size);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ __i915_gem_object_set_pages(obj, pages, obj->stolen->size);
+
+ return 0;
}
static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 2d4996de7331..4d712a4db63b 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -164,7 +164,6 @@ static struct i915_mmu_notifier *
i915_mmu_notifier_create(struct mm_struct *mm)
{
struct i915_mmu_notifier *mn;
- int ret;
mn = kmalloc(sizeof(*mn), GFP_KERNEL);
if (mn == NULL)
@@ -179,14 +178,6 @@ i915_mmu_notifier_create(struct mm_struct *mm)
return ERR_PTR(-ENOMEM);
}
- /* Protected by mmap_sem (write-lock) */
- ret = __mmu_notifier_register(&mn->mn, mm);
- if (ret) {
- destroy_workqueue(mn->wq);
- kfree(mn);
- return ERR_PTR(ret);
- }
-
return mn;
}
@@ -210,23 +201,40 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
static struct i915_mmu_notifier *
i915_mmu_notifier_find(struct i915_mm_struct *mm)
{
- struct i915_mmu_notifier *mn = mm->mn;
+ struct i915_mmu_notifier *mn;
+ int err = 0;
mn = mm->mn;
if (mn)
return mn;
+ mn = i915_mmu_notifier_create(mm->mm);
+ if (IS_ERR(mn))
+ err = PTR_ERR(mn);
+
down_write(&mm->mm->mmap_sem);
mutex_lock(&mm->i915->mm_lock);
- if ((mn = mm->mn) == NULL) {
- mn = i915_mmu_notifier_create(mm->mm);
- if (!IS_ERR(mn))
- mm->mn = mn;
+ if (mm->mn == NULL && !err) {
+ /* Protected by mmap_sem (write-lock) */
+ err = __mmu_notifier_register(&mn->mn, mm->mm);
+ if (!err) {
+ /* Protected by mm_lock */
+ mm->mn = fetch_and_zero(&mn);
+ }
+ } else {
+ /* someone else raced and successfully installed the mmu
+ * notifier, we can cancel our own errors */
+ err = 0;
}
mutex_unlock(&mm->i915->mm_lock);
up_write(&mm->mm->mmap_sem);
- return mn;
+ if (mn) {
+ destroy_workqueue(mn->wq);
+ kfree(mn);
+ }
+
+ return err ? ERR_PTR(err) : mm->mn;
}
static int
@@ -405,6 +413,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
{
unsigned int max_segment = i915_sg_segment_size();
struct sg_table *st;
+ unsigned int sg_page_sizes;
int ret;
st = kmalloc(sizeof(*st), GFP_KERNEL);
@@ -434,6 +443,10 @@ alloc_table:
return ERR_PTR(ret);
}
+ sg_page_sizes = i915_sg_page_sizes(st->sgl);
+
+ __i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
return st;
}
@@ -521,7 +534,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
pages = __i915_gem_userptr_alloc_pages(obj, pvec,
npages);
if (!IS_ERR(pages)) {
- __i915_gem_object_set_pages(obj, pages);
pinned = 0;
pages = NULL;
}
@@ -582,8 +594,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
return ERR_PTR(-EAGAIN);
}
-static struct sg_table *
-i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
{
const int num_pages = obj->base.size >> PAGE_SHIFT;
struct mm_struct *mm = obj->userptr.mm->mm;
@@ -612,9 +623,9 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
if (obj->userptr.work) {
/* active flag should still be held for the pending work */
if (IS_ERR(obj->userptr.work))
- return ERR_CAST(obj->userptr.work);
+ return PTR_ERR(obj->userptr.work);
else
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
}
pvec = NULL;
@@ -650,7 +661,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
release_pages(pvec, pinned, 0);
kvfree(pvec);
- return pages;
+ return PTR_ERR_OR_ZERO(pages);
}
static void
diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/i915_gemfs.c
new file mode 100644
index 000000000000..e2993857df37
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gemfs.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+
+#include "i915_drv.h"
+#include "i915_gemfs.h"
+
+int i915_gemfs_init(struct drm_i915_private *i915)
+{
+ struct file_system_type *type;
+ struct vfsmount *gemfs;
+
+ type = get_fs_type("tmpfs");
+ if (!type)
+ return -ENODEV;
+
+ gemfs = kern_mount(type);
+ if (IS_ERR(gemfs))
+ return PTR_ERR(gemfs);
+
+ /*
+ * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most
+ * likely 2M. Note that within_size may overallocate huge-pages, if say
+ * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under
+ * memory pressure shmem should split any huge-pages which can be
+ * shrunk.
+ */
+
+ if (has_transparent_hugepage()) {
+ struct super_block *sb = gemfs->mnt_sb;
+ char options[] = "huge=within_size";
+ int flags = 0;
+ int err;
+
+ err = sb->s_op->remount_fs(sb, &flags, options);
+ if (err) {
+ kern_unmount(gemfs);
+ return err;
+ }
+ }
+
+ i915->mm.gemfs = gemfs;
+
+ return 0;
+}
+
+void i915_gemfs_fini(struct drm_i915_private *i915)
+{
+ kern_unmount(i915->mm.gemfs);
+}
diff --git a/drivers/gpu/drm/i915/i915_gemfs.h b/drivers/gpu/drm/i915/i915_gemfs.h
new file mode 100644
index 000000000000..cca8bdc5b93e
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gemfs.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_GEMFS_H__
+#define __I915_GEMFS_H__
+
+struct drm_i915_private;
+
+int i915_gemfs_init(struct drm_i915_private *i915);
+
+void i915_gemfs_fini(struct drm_i915_private *i915);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index c14552ab270b..653fb69e7ecb 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -377,9 +377,9 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
if (!erq->seqno)
return;
- err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n",
+ err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n",
prefix, erq->pid, erq->ban_score,
- erq->context, erq->seqno,
+ erq->context, erq->seqno, erq->priority,
jiffies_to_msecs(jiffies - erq->jiffies),
erq->head, erq->tail);
}
@@ -388,9 +388,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
const char *header,
const struct drm_i915_error_context *ctx)
{
- err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n",
+ err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n",
header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
- ctx->ban_score, ctx->guilty, ctx->active);
+ ctx->priority, ctx->ban_score, ctx->guilty, ctx->active);
}
static void error_print_engine(struct drm_i915_error_state_buf *m,
@@ -1271,6 +1271,7 @@ static void record_request(struct drm_i915_gem_request *request,
struct drm_i915_error_request *erq)
{
erq->context = request->ctx->hw_id;
+ erq->priority = request->priotree.priority;
erq->ban_score = atomic_read(&request->ctx->ban_score);
erq->seqno = request->global_seqno;
erq->jiffies = request->emitted_jiffies;
@@ -1364,6 +1365,7 @@ static void record_context(struct drm_i915_error_context *e,
e->handle = ctx->user_handle;
e->hw_id = ctx->hw_id;
+ e->priority = ctx->priority;
e->ban_score = atomic_read(&ctx->ban_score);
e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count);
@@ -1672,8 +1674,8 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
struct i915_gpu_state *error)
{
error->awake = dev_priv->gt.awake;
- error->wakelock = atomic_read(&dev_priv->pm.wakeref_count);
- error->suspended = dev_priv->pm.suspended;
+ error->wakelock = atomic_read(&dev_priv->runtime_pm.wakeref_count);
+ error->suspended = dev_priv->runtime_pm.suspended;
error->iommu = -1;
#ifdef CONFIG_INTEL_IOMMU
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 04f1281d81a5..a2e8114b739d 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -21,12 +21,13 @@
* IN THE SOFTWARE.
*
*/
-#include <linux/circ_buf.h>
-#include "i915_drv.h"
-#include "intel_uc.h"
+#include <linux/circ_buf.h>
#include <trace/events/dma_fence.h>
+#include "i915_guc_submission.h"
+#include "i915_drv.h"
+
/**
* DOC: GuC-based command submission
*
@@ -337,7 +338,7 @@ static void guc_stage_desc_init(struct intel_guc *guc,
for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
struct intel_context *ce = &ctx->engine[engine->id];
- uint32_t guc_engine_id = engine->guc_id;
+ u32 guc_engine_id = engine->guc_id;
struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
/* TODO: We have a design issue to be solved here. Only when we
@@ -387,13 +388,13 @@ static void guc_stage_desc_init(struct intel_guc *guc,
gfx_addr = guc_ggtt_offset(client->vma);
desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
client->doorbell_offset;
- desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client);
+ desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client));
desc->db_trigger_uk = gfx_addr + client->doorbell_offset;
desc->process_desc = gfx_addr + client->proc_desc_offset;
desc->wq_addr = gfx_addr + GUC_DB_SIZE;
desc->wq_size = GUC_WQ_SIZE;
- desc->desc_private = (uintptr_t)client;
+ desc->desc_private = ptr_to_u64(client);
}
static void guc_stage_desc_fini(struct intel_guc *guc,
@@ -499,7 +500,7 @@ static void i915_guc_submit(struct intel_engine_cs *engine)
const unsigned int engine_id = engine->id;
unsigned int n;
- for (n = 0; n < ARRAY_SIZE(execlists->port); n++) {
+ for (n = 0; n < execlists_num_ports(execlists); n++) {
struct drm_i915_gem_request *rq;
unsigned int count;
@@ -643,48 +644,6 @@ static void i915_guc_irq_handler(unsigned long data)
* path of i915_guc_submit() above.
*/
-/**
- * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage
- * @guc: the guc
- * @size: size of area to allocate (both virtual space and memory)
- *
- * This is a wrapper to create an object for use with the GuC. In order to
- * use it inside the GuC, an object needs to be pinned lifetime, so we allocate
- * both some backing storage and a range inside the Global GTT. We must pin
- * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that
- * range is reserved inside GuC.
- *
- * Return: A i915_vma if successful, otherwise an ERR_PTR.
- */
-struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
-{
- struct drm_i915_private *dev_priv = guc_to_i915(guc);
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- int ret;
-
- obj = i915_gem_object_create(dev_priv, size);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
- if (IS_ERR(vma))
- goto err;
-
- ret = i915_vma_pin(vma, 0, PAGE_SIZE,
- PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
- if (ret) {
- vma = ERR_PTR(ret);
- goto err;
- }
-
- return vma;
-
-err:
- i915_gem_object_put(obj);
- return vma;
-}
-
/* Check that a doorbell register is in the expected state */
static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
{
@@ -796,8 +755,8 @@ static int guc_init_doorbell_hw(struct intel_guc *guc)
*/
static struct i915_guc_client *
guc_client_alloc(struct drm_i915_private *dev_priv,
- uint32_t engines,
- uint32_t priority,
+ u32 engines,
+ u32 priority,
struct i915_gem_context *ctx)
{
struct i915_guc_client *client;
@@ -1069,6 +1028,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
static void guc_interrupts_capture(struct drm_i915_private *dev_priv)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
struct intel_engine_cs *engine;
enum intel_engine_id id;
int irqs;
@@ -1105,12 +1065,13 @@ static void guc_interrupts_capture(struct drm_i915_private *dev_priv)
* Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will
* result in the register bit being left SET!
*/
- dev_priv->rps.pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
- dev_priv->rps.pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+ rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
+ rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
}
static void guc_interrupts_release(struct drm_i915_private *dev_priv)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
struct intel_engine_cs *engine;
enum intel_engine_id id;
int irqs;
@@ -1129,8 +1090,8 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv)
I915_WRITE(GUC_VCS2_VCS1_IER, 0);
I915_WRITE(GUC_WD_VECS_IER, 0);
- dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
- dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK;
+ rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+ rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK;
}
int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
@@ -1212,55 +1173,3 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
guc_client_free(guc->execbuf_client);
guc->execbuf_client = NULL;
}
-
-/**
- * intel_guc_suspend() - notify GuC entering suspend state
- * @dev_priv: i915 device private
- */
-int intel_guc_suspend(struct drm_i915_private *dev_priv)
-{
- struct intel_guc *guc = &dev_priv->guc;
- struct i915_gem_context *ctx;
- u32 data[3];
-
- if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
- return 0;
-
- gen9_disable_guc_interrupts(dev_priv);
-
- ctx = dev_priv->kernel_context;
-
- data[0] = INTEL_GUC_ACTION_ENTER_S_STATE;
- /* any value greater than GUC_POWER_D0 */
- data[1] = GUC_POWER_D1;
- /* first page is shared data with GuC */
- data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + LRC_GUCSHR_PN * PAGE_SIZE;
-
- return intel_guc_send(guc, data, ARRAY_SIZE(data));
-}
-
-/**
- * intel_guc_resume() - notify GuC resuming from suspend state
- * @dev_priv: i915 device private
- */
-int intel_guc_resume(struct drm_i915_private *dev_priv)
-{
- struct intel_guc *guc = &dev_priv->guc;
- struct i915_gem_context *ctx;
- u32 data[3];
-
- if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
- return 0;
-
- if (i915_modparams.guc_log_level >= 0)
- gen9_enable_guc_interrupts(dev_priv);
-
- ctx = dev_priv->kernel_context;
-
- data[0] = INTEL_GUC_ACTION_EXIT_S_STATE;
- data[1] = GUC_POWER_D0;
- /* first page is shared data with GuC */
- data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + LRC_GUCSHR_PN * PAGE_SIZE;
-
- return intel_guc_send(guc, data, ARRAY_SIZE(data));
-}
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.h b/drivers/gpu/drm/i915/i915_guc_submission.h
new file mode 100644
index 000000000000..cb4353b59059
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_guc_submission.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2014-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _I915_GUC_SUBMISSION_H_
+#define _I915_GUC_SUBMISSION_H_
+
+#include <linux/spinlock.h>
+
+#include "i915_gem.h"
+
+struct drm_i915_private;
+
+/*
+ * This structure primarily describes the GEM object shared with the GuC.
+ * The specs sometimes refer to this object as a "GuC context", but we use
+ * the term "client" to avoid confusion with hardware contexts. This
+ * GEM object is held for the entire lifetime of our interaction with
+ * the GuC, being allocated before the GuC is loaded with its firmware.
+ * Because there's no way to update the address used by the GuC after
+ * initialisation, the shared object must stay pinned into the GGTT as
+ * long as the GuC is in use. We also keep the first page (only) mapped
+ * into kernel address space, as it includes shared data that must be
+ * updated on every request submission.
+ *
+ * The single GEM object described here is actually made up of several
+ * separate areas, as far as the GuC is concerned. The first page (kept
+ * kmap'd) includes the "process descriptor" which holds sequence data for
+ * the doorbell, and one cacheline which actually *is* the doorbell; a
+ * write to this will "ring the doorbell" (i.e. send an interrupt to the
+ * GuC). The subsequent pages of the client object constitute the work
+ * queue (a circular array of work items), again described in the process
+ * descriptor. Work queue pages are mapped momentarily as required.
+ */
+struct i915_guc_client {
+ struct i915_vma *vma;
+ void *vaddr;
+ struct i915_gem_context *owner;
+ struct intel_guc *guc;
+
+ /* bitmap of (host) engine ids */
+ u32 engines;
+ u32 priority;
+ u32 stage_id;
+ u32 proc_desc_offset;
+
+ u16 doorbell_id;
+ unsigned long doorbell_offset;
+
+ spinlock_t wq_lock;
+ /* Per-engine counts of GuC submissions */
+ u64 submissions[I915_NUM_ENGINES];
+};
+
+int i915_guc_submission_init(struct drm_i915_private *dev_priv);
+int i915_guc_submission_enable(struct drm_i915_private *dev_priv);
+void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
+void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index efd7827ff181..b1296a55c1e4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -404,19 +404,21 @@ void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv)
{
spin_lock_irq(&dev_priv->irq_lock);
gen6_reset_pm_iir(dev_priv, dev_priv->pm_rps_events);
- dev_priv->rps.pm_iir = 0;
+ dev_priv->gt_pm.rps.pm_iir = 0;
spin_unlock_irq(&dev_priv->irq_lock);
}
void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv)
{
- if (READ_ONCE(dev_priv->rps.interrupts_enabled))
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+ if (READ_ONCE(rps->interrupts_enabled))
return;
spin_lock_irq(&dev_priv->irq_lock);
- WARN_ON_ONCE(dev_priv->rps.pm_iir);
+ WARN_ON_ONCE(rps->pm_iir);
WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
- dev_priv->rps.interrupts_enabled = true;
+ rps->interrupts_enabled = true;
gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
spin_unlock_irq(&dev_priv->irq_lock);
@@ -424,11 +426,13 @@ void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv)
void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
{
- if (!READ_ONCE(dev_priv->rps.interrupts_enabled))
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+ if (!READ_ONCE(rps->interrupts_enabled))
return;
spin_lock_irq(&dev_priv->irq_lock);
- dev_priv->rps.interrupts_enabled = false;
+ rps->interrupts_enabled = false;
I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
@@ -442,7 +446,7 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
* we will reset the GPU to minimum frequencies, so the current
* state of the worker can be discarded.
*/
- cancel_work_sync(&dev_priv->rps.work);
+ cancel_work_sync(&rps->work);
gen6_reset_rps_interrupts(dev_priv);
}
@@ -1119,12 +1123,13 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
{
- memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
+ memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei));
}
static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
{
- const struct intel_rps_ei *prev = &dev_priv->rps.ei;
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+ const struct intel_rps_ei *prev = &rps->ei;
struct intel_rps_ei now;
u32 events = 0;
@@ -1151,28 +1156,29 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
c0 = max(render, media);
c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
- if (c0 > time * dev_priv->rps.up_threshold)
+ if (c0 > time * rps->up_threshold)
events = GEN6_PM_RP_UP_THRESHOLD;
- else if (c0 < time * dev_priv->rps.down_threshold)
+ else if (c0 < time * rps->down_threshold)
events = GEN6_PM_RP_DOWN_THRESHOLD;
}
- dev_priv->rps.ei = now;
+ rps->ei = now;
return events;
}
static void gen6_pm_rps_work(struct work_struct *work)
{
struct drm_i915_private *dev_priv =
- container_of(work, struct drm_i915_private, rps.work);
+ container_of(work, struct drm_i915_private, gt_pm.rps.work);
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
bool client_boost = false;
int new_delay, adj, min, max;
u32 pm_iir = 0;
spin_lock_irq(&dev_priv->irq_lock);
- if (dev_priv->rps.interrupts_enabled) {
- pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir);
- client_boost = atomic_read(&dev_priv->rps.num_waiters);
+ if (rps->interrupts_enabled) {
+ pm_iir = fetch_and_zero(&rps->pm_iir);
+ client_boost = atomic_read(&rps->num_waiters);
}
spin_unlock_irq(&dev_priv->irq_lock);
@@ -1181,18 +1187,18 @@ static void gen6_pm_rps_work(struct work_struct *work)
if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
goto out;
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
- adj = dev_priv->rps.last_adj;
- new_delay = dev_priv->rps.cur_freq;
- min = dev_priv->rps.min_freq_softlimit;
- max = dev_priv->rps.max_freq_softlimit;
+ adj = rps->last_adj;
+ new_delay = rps->cur_freq;
+ min = rps->min_freq_softlimit;
+ max = rps->max_freq_softlimit;
if (client_boost)
- max = dev_priv->rps.max_freq;
- if (client_boost && new_delay < dev_priv->rps.boost_freq) {
- new_delay = dev_priv->rps.boost_freq;
+ max = rps->max_freq;
+ if (client_boost && new_delay < rps->boost_freq) {
+ new_delay = rps->boost_freq;
adj = 0;
} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
if (adj > 0)
@@ -1200,15 +1206,15 @@ static void gen6_pm_rps_work(struct work_struct *work)
else /* CHV needs even encode values */
adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
- if (new_delay >= dev_priv->rps.max_freq_softlimit)
+ if (new_delay >= rps->max_freq_softlimit)
adj = 0;
} else if (client_boost) {
adj = 0;
} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
- if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
- new_delay = dev_priv->rps.efficient_freq;
- else if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
- new_delay = dev_priv->rps.min_freq_softlimit;
+ if (rps->cur_freq > rps->efficient_freq)
+ new_delay = rps->efficient_freq;
+ else if (rps->cur_freq > rps->min_freq_softlimit)
+ new_delay = rps->min_freq_softlimit;
adj = 0;
} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
if (adj < 0)
@@ -1216,13 +1222,13 @@ static void gen6_pm_rps_work(struct work_struct *work)
else /* CHV needs even encode values */
adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
- if (new_delay <= dev_priv->rps.min_freq_softlimit)
+ if (new_delay <= rps->min_freq_softlimit)
adj = 0;
} else { /* unknown event */
adj = 0;
}
- dev_priv->rps.last_adj = adj;
+ rps->last_adj = adj;
/* sysfs frequency interfaces may have snuck in while servicing the
* interrupt
@@ -1232,15 +1238,15 @@ static void gen6_pm_rps_work(struct work_struct *work)
if (intel_set_rps(dev_priv, new_delay)) {
DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
- dev_priv->rps.last_adj = 0;
+ rps->last_adj = 0;
}
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
out:
/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
spin_lock_irq(&dev_priv->irq_lock);
- if (dev_priv->rps.interrupts_enabled)
+ if (rps->interrupts_enabled)
gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events);
spin_unlock_irq(&dev_priv->irq_lock);
}
@@ -1382,10 +1388,8 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
bool tasklet = false;
if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
- if (port_count(&execlists->port[0])) {
- __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
- tasklet = true;
- }
+ __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+ tasklet = true;
}
if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
@@ -1723,12 +1727,14 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv,
* the work queue. */
static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
if (pm_iir & dev_priv->pm_rps_events) {
spin_lock(&dev_priv->irq_lock);
gen6_mask_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events);
- if (dev_priv->rps.interrupts_enabled) {
- dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events;
- schedule_work(&dev_priv->rps.work);
+ if (rps->interrupts_enabled) {
+ rps->pm_iir |= pm_iir & dev_priv->pm_rps_events;
+ schedule_work(&rps->work);
}
spin_unlock(&dev_priv->irq_lock);
}
@@ -2254,18 +2260,14 @@ static void ivb_err_int_handler(struct drm_i915_private *dev_priv)
static void cpt_serr_int_handler(struct drm_i915_private *dev_priv)
{
u32 serr_int = I915_READ(SERR_INT);
+ enum pipe pipe;
if (serr_int & SERR_INT_POISON)
DRM_ERROR("PCH poison interrupt\n");
- if (serr_int & SERR_INT_TRANS_A_FIFO_UNDERRUN)
- intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_A);
-
- if (serr_int & SERR_INT_TRANS_B_FIFO_UNDERRUN)
- intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_B);
-
- if (serr_int & SERR_INT_TRANS_C_FIFO_UNDERRUN)
- intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_C);
+ for_each_pipe(dev_priv, pipe)
+ if (serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pipe))
+ intel_pch_fifo_underrun_irq_handler(dev_priv, pipe);
I915_WRITE(SERR_INT, serr_int);
}
@@ -3163,10 +3165,17 @@ void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv,
enum pipe pipe;
spin_lock_irq(&dev_priv->irq_lock);
+
+ if (!intel_irqs_enabled(dev_priv)) {
+ spin_unlock_irq(&dev_priv->irq_lock);
+ return;
+ }
+
for_each_pipe_masked(dev_priv, pipe, pipe_mask)
GEN8_IRQ_INIT_NDX(DE_PIPE, pipe,
dev_priv->de_irq_mask[pipe],
~dev_priv->de_irq_mask[pipe] | extra_ier);
+
spin_unlock_irq(&dev_priv->irq_lock);
}
@@ -3176,8 +3185,15 @@ void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv,
enum pipe pipe;
spin_lock_irq(&dev_priv->irq_lock);
+
+ if (!intel_irqs_enabled(dev_priv)) {
+ spin_unlock_irq(&dev_priv->irq_lock);
+ return;
+ }
+
for_each_pipe_masked(dev_priv, pipe, pipe_mask)
GEN8_IRQ_RESET_NDX(DE_PIPE, pipe);
+
spin_unlock_irq(&dev_priv->irq_lock);
/* make sure we're done processing display irqs */
@@ -3598,16 +3614,15 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
else if (IS_BROADWELL(dev_priv))
de_port_enables |= GEN8_PORT_DP_A_HOTPLUG;
- dev_priv->de_irq_mask[PIPE_A] = ~de_pipe_masked;
- dev_priv->de_irq_mask[PIPE_B] = ~de_pipe_masked;
- dev_priv->de_irq_mask[PIPE_C] = ~de_pipe_masked;
+ for_each_pipe(dev_priv, pipe) {
+ dev_priv->de_irq_mask[pipe] = ~de_pipe_masked;
- for_each_pipe(dev_priv, pipe)
if (intel_display_power_is_enabled(dev_priv,
POWER_DOMAIN_PIPE(pipe)))
GEN8_IRQ_INIT_NDX(DE_PIPE, pipe,
dev_priv->de_irq_mask[pipe],
de_pipe_enables);
+ }
GEN3_IRQ_INIT(GEN8_DE_PORT_, ~de_port_masked, de_port_enables);
GEN3_IRQ_INIT(GEN8_DE_MISC_, ~de_misc_masked, de_misc_masked);
@@ -4000,11 +4015,12 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
void intel_irq_init(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = &dev_priv->drm;
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
int i;
intel_hpd_init_work(dev_priv);
- INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
+ INIT_WORK(&rps->work, gen6_pm_rps_work);
INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
for (i = 0; i < MAX_L3_SLICES; ++i)
@@ -4020,7 +4036,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
else
dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
- dev_priv->rps.pm_intrmsk_mbz = 0;
+ rps->pm_intrmsk_mbz = 0;
/*
* SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
@@ -4029,10 +4045,10 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
* TODO: verify if this can be reproduced on VLV,CHV.
*/
if (INTEL_GEN(dev_priv) <= 7)
- dev_priv->rps.pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
+ rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
if (INTEL_GEN(dev_priv) >= 8)
- dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+ rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
if (IS_GEN2(dev_priv)) {
/* Gen2 doesn't have a hardware frame counter */
@@ -4166,7 +4182,7 @@ int intel_irq_install(struct drm_i915_private *dev_priv)
* interrupts as enabled _before_ actually enabling them to avoid
* special cases in our ordering checks.
*/
- dev_priv->pm.irqs_enabled = true;
+ dev_priv->runtime_pm.irqs_enabled = true;
return drm_irq_install(&dev_priv->drm, dev_priv->drm.pdev->irq);
}
@@ -4182,7 +4198,7 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv)
{
drm_irq_uninstall(&dev_priv->drm);
intel_hpd_cancel_work(dev_priv);
- dev_priv->pm.irqs_enabled = false;
+ dev_priv->runtime_pm.irqs_enabled = false;
}
/**
@@ -4195,7 +4211,7 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv)
void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv)
{
dev_priv->drm.driver->irq_uninstall(&dev_priv->drm);
- dev_priv->pm.irqs_enabled = false;
+ dev_priv->runtime_pm.irqs_enabled = false;
synchronize_irq(dev_priv->drm.irq);
}
@@ -4208,7 +4224,7 @@ void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv)
*/
void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv)
{
- dev_priv->pm.irqs_enabled = true;
+ dev_priv->runtime_pm.irqs_enabled = true;
dev_priv->drm.driver->irq_preinstall(&dev_priv->drm);
dev_priv->drm.driver->irq_postinstall(&dev_priv->drm);
}
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 9dff323a83d3..b4faeb6aa2bd 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -146,9 +146,6 @@ i915_param_named(disable_display, bool, 0400,
i915_param_named_unsafe(enable_cmd_parser, bool, 0400,
"Enable command parsing (true=enabled [default], false=disabled)");
-i915_param_named_unsafe(use_mmio_flip, int, 0600,
- "use MMIO flips (-1=never, 0=driver discretion [default], 1=always)");
-
i915_param_named(mmio_debug, int, 0600,
"Enable the MMIO debug code for the first N failures (default: off). "
"This may negatively affect performance.");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 4f3f8d650194..c7292268ed43 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -49,7 +49,6 @@
param(int, guc_log_level, -1) \
param(char *, guc_firmware_path, NULL) \
param(char *, huc_firmware_path, NULL) \
- param(int, use_mmio_flip, 0) \
param(int, mmio_debug, 0) \
param(int, edp_vswing, 0) \
param(int, reset, 2) \
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index da60866b6628..bf467f30c99b 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -54,8 +54,14 @@
.color = { .degamma_lut_size = 512, .gamma_lut_size = 512 }
#define CHV_COLORS \
.color = { .degamma_lut_size = 65, .gamma_lut_size = 257 }
+#define GLK_COLORS \
+ .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 }
/* Keep in gen based order, and chronological order within a gen */
+
+#define GEN_DEFAULT_PAGE_SIZES \
+ .page_sizes = I915_GTT_PAGE_SIZE_4K
+
#define GEN2_FEATURES \
.gen = 2, .num_pipes = 1, \
.has_overlay = 1, .overlay_needs_physical = 1, \
@@ -65,6 +71,7 @@
.ring_mask = RENDER_RING, \
.has_snoop = true, \
GEN_DEFAULT_PIPEOFFSETS, \
+ GEN_DEFAULT_PAGE_SIZES, \
CURSOR_OFFSETS
static const struct intel_device_info intel_i830_info __initconst = {
@@ -98,6 +105,7 @@ static const struct intel_device_info intel_i865g_info __initconst = {
.ring_mask = RENDER_RING, \
.has_snoop = true, \
GEN_DEFAULT_PIPEOFFSETS, \
+ GEN_DEFAULT_PAGE_SIZES, \
CURSOR_OFFSETS
static const struct intel_device_info intel_i915g_info __initconst = {
@@ -161,6 +169,7 @@ static const struct intel_device_info intel_pineview_info __initconst = {
.ring_mask = RENDER_RING, \
.has_snoop = true, \
GEN_DEFAULT_PIPEOFFSETS, \
+ GEN_DEFAULT_PAGE_SIZES, \
CURSOR_OFFSETS
static const struct intel_device_info intel_i965g_info __initconst = {
@@ -203,6 +212,7 @@ static const struct intel_device_info intel_gm45_info __initconst = {
.ring_mask = RENDER_RING | BSD_RING, \
.has_snoop = true, \
GEN_DEFAULT_PIPEOFFSETS, \
+ GEN_DEFAULT_PAGE_SIZES, \
CURSOR_OFFSETS
static const struct intel_device_info intel_ironlake_d_info __initconst = {
@@ -226,6 +236,7 @@ static const struct intel_device_info intel_ironlake_m_info __initconst = {
.has_rc6p = 1, \
.has_aliasing_ppgtt = 1, \
GEN_DEFAULT_PIPEOFFSETS, \
+ GEN_DEFAULT_PAGE_SIZES, \
CURSOR_OFFSETS
#define SNB_D_PLATFORM \
@@ -269,6 +280,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info __initconst =
.has_aliasing_ppgtt = 1, \
.has_full_ppgtt = 1, \
GEN_DEFAULT_PIPEOFFSETS, \
+ GEN_DEFAULT_PAGE_SIZES, \
IVB_CURSOR_OFFSETS
#define IVB_D_PLATFORM \
@@ -325,11 +337,12 @@ static const struct intel_device_info intel_valleyview_info __initconst = {
.has_snoop = true,
.ring_mask = RENDER_RING | BSD_RING | BLT_RING,
.display_mmio_offset = VLV_DISPLAY_BASE,
+ GEN_DEFAULT_PAGE_SIZES,
GEN_DEFAULT_PIPEOFFSETS,
CURSOR_OFFSETS
};
-#define HSW_FEATURES \
+#define G75_FEATURES \
GEN7_FEATURES, \
.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \
.has_ddi = 1, \
@@ -341,7 +354,7 @@ static const struct intel_device_info intel_valleyview_info __initconst = {
.has_runtime_pm = 1
#define HSW_PLATFORM \
- HSW_FEATURES, \
+ G75_FEATURES, \
.platform = INTEL_HASWELL, \
.has_l3_dpf = 1
@@ -360,16 +373,18 @@ static const struct intel_device_info intel_haswell_gt3_info __initconst = {
.gt = 3,
};
-#define BDW_FEATURES \
- HSW_FEATURES, \
+#define GEN8_FEATURES \
+ G75_FEATURES, \
BDW_COLORS, \
+ .page_sizes = I915_GTT_PAGE_SIZE_4K | \
+ I915_GTT_PAGE_SIZE_2M, \
.has_logical_ring_contexts = 1, \
.has_full_48bit_ppgtt = 1, \
.has_64bit_reloc = 1, \
.has_reset_engine = 1
#define BDW_PLATFORM \
- BDW_FEATURES, \
+ GEN8_FEATURES, \
.gen = 8, \
.platform = INTEL_BROADWELL
@@ -415,19 +430,31 @@ static const struct intel_device_info intel_cherryview_info __initconst = {
.has_reset_engine = 1,
.has_snoop = true,
.display_mmio_offset = VLV_DISPLAY_BASE,
+ GEN_DEFAULT_PAGE_SIZES,
GEN_CHV_PIPEOFFSETS,
CURSOR_OFFSETS,
CHV_COLORS,
};
-#define SKL_PLATFORM \
- BDW_FEATURES, \
- .gen = 9, \
- .platform = INTEL_SKYLAKE, \
+#define GEN9_DEFAULT_PAGE_SIZES \
+ .page_sizes = I915_GTT_PAGE_SIZE_4K | \
+ I915_GTT_PAGE_SIZE_64K | \
+ I915_GTT_PAGE_SIZE_2M
+
+#define GEN9_FEATURES \
+ GEN8_FEATURES, \
+ GEN9_DEFAULT_PAGE_SIZES, \
+ .has_logical_ring_preemption = 1, \
.has_csr = 1, \
.has_guc = 1, \
+ .has_ipc = 1, \
.ddb_size = 896
+#define SKL_PLATFORM \
+ GEN9_FEATURES, \
+ .gen = 9, \
+ .platform = INTEL_SKYLAKE
+
static const struct intel_device_info intel_skylake_gt1_info __initconst = {
SKL_PLATFORM,
.gt = 1,
@@ -463,6 +490,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = {
.has_ddi = 1, \
.has_fpga_dbg = 1, \
.has_fbc = 1, \
+ .has_psr = 1, \
.has_runtime_pm = 1, \
.has_pooled_eu = 0, \
.has_csr = 1, \
@@ -470,6 +498,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = {
.has_rc6 = 1, \
.has_dp_mst = 1, \
.has_logical_ring_contexts = 1, \
+ .has_logical_ring_preemption = 1, \
.has_guc = 1, \
.has_aliasing_ppgtt = 1, \
.has_full_ppgtt = 1, \
@@ -477,6 +506,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = {
.has_reset_engine = 1, \
.has_snoop = true, \
.has_ipc = 1, \
+ GEN9_DEFAULT_PAGE_SIZES, \
GEN_DEFAULT_PIPEOFFSETS, \
IVB_CURSOR_OFFSETS, \
BDW_COLORS
@@ -491,17 +521,13 @@ static const struct intel_device_info intel_geminilake_info __initconst = {
GEN9_LP_FEATURES,
.platform = INTEL_GEMINILAKE,
.ddb_size = 1024,
- .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 }
+ GLK_COLORS,
};
#define KBL_PLATFORM \
- BDW_FEATURES, \
+ GEN9_FEATURES, \
.gen = 9, \
- .platform = INTEL_KABYLAKE, \
- .has_csr = 1, \
- .has_guc = 1, \
- .has_ipc = 1, \
- .ddb_size = 896
+ .platform = INTEL_KABYLAKE
static const struct intel_device_info intel_kabylake_gt1_info __initconst = {
KBL_PLATFORM,
@@ -520,13 +546,9 @@ static const struct intel_device_info intel_kabylake_gt3_info __initconst = {
};
#define CFL_PLATFORM \
- BDW_FEATURES, \
+ GEN9_FEATURES, \
.gen = 9, \
- .platform = INTEL_COFFEELAKE, \
- .has_csr = 1, \
- .has_guc = 1, \
- .has_ipc = 1, \
- .ddb_size = 896
+ .platform = INTEL_COFFEELAKE
static const struct intel_device_info intel_coffeelake_gt1_info __initconst = {
CFL_PLATFORM,
@@ -544,16 +566,17 @@ static const struct intel_device_info intel_coffeelake_gt3_info __initconst = {
.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
};
+#define GEN10_FEATURES \
+ GEN9_FEATURES, \
+ .ddb_size = 1024, \
+ GLK_COLORS
+
static const struct intel_device_info intel_cannonlake_gt2_info __initconst = {
- BDW_FEATURES,
+ GEN10_FEATURES,
.is_alpha_support = 1,
.platform = INTEL_CANNONLAKE,
.gen = 10,
.gt = 2,
- .ddb_size = 1024,
- .has_csr = 1,
- .has_ipc = 1,
- .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 }
};
/*
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ee0d4f14ac98..d2d0a83c09b6 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2371,6 +2371,9 @@ enum i915_power_well_id {
#define GEN9_GAMT_ECO_REG_RW_IA _MMIO(0x4ab0)
#define GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS (1<<18)
+#define GEN8_GAMW_ECO_DEV_RW_IA _MMIO(0x4080)
+#define GAMW_ECO_ENABLE_64K_IPS_FIELD 0xF
+
#define GAMT_CHKN_BIT_REG _MMIO(0x4ab8)
#define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1<<28)
#define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1<<24)
@@ -3819,6 +3822,16 @@ enum {
#define PWM2_GATING_DIS (1 << 14)
#define PWM1_GATING_DIS (1 << 13)
+#define _CLKGATE_DIS_PSL_A 0x46520
+#define _CLKGATE_DIS_PSL_B 0x46524
+#define _CLKGATE_DIS_PSL_C 0x46528
+#define DPF_GATING_DIS (1 << 10)
+#define DPF_RAM_GATING_DIS (1 << 9)
+#define DPFR_GATING_DIS (1 << 8)
+
+#define CLKGATE_DIS_PSL(pipe) \
+ _MMIO_PIPE(pipe, _CLKGATE_DIS_PSL_A, _CLKGATE_DIS_PSL_B)
+
/*
* GEN10 clock gating regs
*/
@@ -5671,8 +5684,7 @@ enum {
#define CBR_PWM_CLOCK_MUX_SELECT (1<<30)
#define CBR4_VLV _MMIO(VLV_DISPLAY_BASE + 0x70450)
-#define CBR_DPLLBMD_PIPE_C (1<<29)
-#define CBR_DPLLBMD_PIPE_B (1<<18)
+#define CBR_DPLLBMD_PIPE(pipe) (1<<(7+(pipe)*11)) /* pipes B and C */
/* FIFO watermark sizes etc */
#define G4X_FIFO_LINE_SIZE 64
@@ -6993,6 +7005,12 @@ enum {
#define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec)
#define GEN9_CTX_PREEMPT_REG _MMIO(0x2248)
#define GEN8_CS_CHICKEN1 _MMIO(0x2580)
+#define GEN9_PREEMPT_3D_OBJECT_LEVEL (1<<0)
+#define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1))
+#define GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 0)
+#define GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 1)
+#define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0)
+#define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1)
/* GEN7 chicken */
#define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010)
@@ -7164,9 +7182,6 @@ enum {
#define SERR_INT _MMIO(0xc4040)
#define SERR_INT_POISON (1<<31)
-#define SERR_INT_TRANS_C_FIFO_UNDERRUN (1<<6)
-#define SERR_INT_TRANS_B_FIFO_UNDERRUN (1<<3)
-#define SERR_INT_TRANS_A_FIFO_UNDERRUN (1<<0)
#define SERR_INT_TRANS_FIFO_UNDERRUN(pipe) (1<<((pipe)*3))
/* digital port hotplug */
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 5c86925a0294..8f3aa4dc0c98 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -108,8 +108,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv)
mutex_lock(&dev_priv->drm.struct_mutex);
- i915_gem_restore_fences(dev_priv);
-
if (IS_GEN4(dev_priv))
pci_write_config_word(pdev, GCDGMBUS,
dev_priv->regfile.saveGCDGMBUS);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index d61c8727f756..791759f632e1 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -49,7 +49,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv,
static ssize_t
show_rc6_mask(struct device *kdev, struct device_attribute *attr, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%x\n", intel_enable_rc6());
+ return snprintf(buf, PAGE_SIZE, "%x\n", intel_rc6_enabled());
}
static ssize_t
@@ -246,7 +246,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
intel_runtime_pm_get(dev_priv);
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
u32 freq;
freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
@@ -261,7 +261,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
ret = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
ret = intel_gpu_freq(dev_priv, ret);
}
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
intel_runtime_pm_put(dev_priv);
@@ -275,7 +275,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
return snprintf(buf, PAGE_SIZE, "%d\n",
intel_gpu_freq(dev_priv,
- dev_priv->rps.cur_freq));
+ dev_priv->gt_pm.rps.cur_freq));
}
static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
@@ -284,7 +284,7 @@ static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribu
return snprintf(buf, PAGE_SIZE, "%d\n",
intel_gpu_freq(dev_priv,
- dev_priv->rps.boost_freq));
+ dev_priv->gt_pm.rps.boost_freq));
}
static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
@@ -292,6 +292,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
const char *buf, size_t count)
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 val;
ssize_t ret;
@@ -301,12 +302,12 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
/* Validate against (static) hardware limits */
val = intel_freq_opcode(dev_priv, val);
- if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq)
+ if (val < rps->min_freq || val > rps->max_freq)
return -EINVAL;
- mutex_lock(&dev_priv->rps.hw_lock);
- dev_priv->rps.boost_freq = val;
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
+ rps->boost_freq = val;
+ mutex_unlock(&dev_priv->pcu_lock);
return count;
}
@@ -318,7 +319,7 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev,
return snprintf(buf, PAGE_SIZE, "%d\n",
intel_gpu_freq(dev_priv,
- dev_priv->rps.efficient_freq));
+ dev_priv->gt_pm.rps.efficient_freq));
}
static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
@@ -327,7 +328,7 @@ static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute
return snprintf(buf, PAGE_SIZE, "%d\n",
intel_gpu_freq(dev_priv,
- dev_priv->rps.max_freq_softlimit));
+ dev_priv->gt_pm.rps.max_freq_softlimit));
}
static ssize_t gt_max_freq_mhz_store(struct device *kdev,
@@ -335,6 +336,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
const char *buf, size_t count)
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 val;
ssize_t ret;
@@ -344,34 +346,34 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
intel_runtime_pm_get(dev_priv);
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
val = intel_freq_opcode(dev_priv, val);
- if (val < dev_priv->rps.min_freq ||
- val > dev_priv->rps.max_freq ||
- val < dev_priv->rps.min_freq_softlimit) {
- mutex_unlock(&dev_priv->rps.hw_lock);
+ if (val < rps->min_freq ||
+ val > rps->max_freq ||
+ val < rps->min_freq_softlimit) {
+ mutex_unlock(&dev_priv->pcu_lock);
intel_runtime_pm_put(dev_priv);
return -EINVAL;
}
- if (val > dev_priv->rps.rp0_freq)
+ if (val > rps->rp0_freq)
DRM_DEBUG("User requested overclocking to %d\n",
intel_gpu_freq(dev_priv, val));
- dev_priv->rps.max_freq_softlimit = val;
+ rps->max_freq_softlimit = val;
- val = clamp_t(int, dev_priv->rps.cur_freq,
- dev_priv->rps.min_freq_softlimit,
- dev_priv->rps.max_freq_softlimit);
+ val = clamp_t(int, rps->cur_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit);
/* We still need *_set_rps to process the new max_delay and
* update the interrupt limits and PMINTRMSK even though
* frequency request may be unchanged. */
ret = intel_set_rps(dev_priv, val);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
intel_runtime_pm_put(dev_priv);
@@ -384,7 +386,7 @@ static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute
return snprintf(buf, PAGE_SIZE, "%d\n",
intel_gpu_freq(dev_priv,
- dev_priv->rps.min_freq_softlimit));
+ dev_priv->gt_pm.rps.min_freq_softlimit));
}
static ssize_t gt_min_freq_mhz_store(struct device *kdev,
@@ -392,6 +394,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
const char *buf, size_t count)
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 val;
ssize_t ret;
@@ -401,30 +404,30 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
intel_runtime_pm_get(dev_priv);
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
val = intel_freq_opcode(dev_priv, val);
- if (val < dev_priv->rps.min_freq ||
- val > dev_priv->rps.max_freq ||
- val > dev_priv->rps.max_freq_softlimit) {
- mutex_unlock(&dev_priv->rps.hw_lock);
+ if (val < rps->min_freq ||
+ val > rps->max_freq ||
+ val > rps->max_freq_softlimit) {
+ mutex_unlock(&dev_priv->pcu_lock);
intel_runtime_pm_put(dev_priv);
return -EINVAL;
}
- dev_priv->rps.min_freq_softlimit = val;
+ rps->min_freq_softlimit = val;
- val = clamp_t(int, dev_priv->rps.cur_freq,
- dev_priv->rps.min_freq_softlimit,
- dev_priv->rps.max_freq_softlimit);
+ val = clamp_t(int, rps->cur_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit);
/* We still need *_set_rps to process the new min_delay and
* update the interrupt limits and PMINTRMSK even though
* frequency request may be unchanged. */
ret = intel_set_rps(dev_priv, val);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
intel_runtime_pm_put(dev_priv);
@@ -448,14 +451,15 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL);
static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 val;
if (attr == &dev_attr_gt_RP0_freq_mhz)
- val = intel_gpu_freq(dev_priv, dev_priv->rps.rp0_freq);
+ val = intel_gpu_freq(dev_priv, rps->rp0_freq);
else if (attr == &dev_attr_gt_RP1_freq_mhz)
- val = intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq);
+ val = intel_gpu_freq(dev_priv, rps->rp1_freq);
else if (attr == &dev_attr_gt_RPn_freq_mhz)
- val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq);
+ val = intel_gpu_freq(dev_priv, rps->min_freq);
else
BUG();
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 92f4c5bb7aa7..9cab91ddeb79 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -345,7 +345,7 @@ TRACE_EVENT(i915_gem_object_create,
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
- __field(u32, size)
+ __field(u64, size)
),
TP_fast_assign(
@@ -353,7 +353,7 @@ TRACE_EVENT(i915_gem_object_create,
__entry->size = obj->base.size;
),
- TP_printk("obj=%p, size=%u", __entry->obj, __entry->size)
+ TP_printk("obj=%p, size=0x%llx", __entry->obj, __entry->size)
);
TRACE_EVENT(i915_gem_shrink,
@@ -384,7 +384,7 @@ TRACE_EVENT(i915_vma_bind,
__field(struct drm_i915_gem_object *, obj)
__field(struct i915_address_space *, vm)
__field(u64, offset)
- __field(u32, size)
+ __field(u64, size)
__field(unsigned, flags)
),
@@ -396,7 +396,7 @@ TRACE_EVENT(i915_vma_bind,
__entry->flags = flags;
),
- TP_printk("obj=%p, offset=%016llx size=%x%s vm=%p",
+ TP_printk("obj=%p, offset=0x%016llx size=0x%llx%s vm=%p",
__entry->obj, __entry->offset, __entry->size,
__entry->flags & PIN_MAPPABLE ? ", mappable" : "",
__entry->vm)
@@ -410,7 +410,7 @@ TRACE_EVENT(i915_vma_unbind,
__field(struct drm_i915_gem_object *, obj)
__field(struct i915_address_space *, vm)
__field(u64, offset)
- __field(u32, size)
+ __field(u64, size)
),
TP_fast_assign(
@@ -420,18 +420,18 @@ TRACE_EVENT(i915_vma_unbind,
__entry->size = vma->node.size;
),
- TP_printk("obj=%p, offset=%016llx size=%x vm=%p",
+ TP_printk("obj=%p, offset=0x%016llx size=0x%llx vm=%p",
__entry->obj, __entry->offset, __entry->size, __entry->vm)
);
TRACE_EVENT(i915_gem_object_pwrite,
- TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len),
+ TP_PROTO(struct drm_i915_gem_object *obj, u64 offset, u64 len),
TP_ARGS(obj, offset, len),
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
- __field(u32, offset)
- __field(u32, len)
+ __field(u64, offset)
+ __field(u64, len)
),
TP_fast_assign(
@@ -440,18 +440,18 @@ TRACE_EVENT(i915_gem_object_pwrite,
__entry->len = len;
),
- TP_printk("obj=%p, offset=%u, len=%u",
+ TP_printk("obj=%p, offset=0x%llx, len=0x%llx",
__entry->obj, __entry->offset, __entry->len)
);
TRACE_EVENT(i915_gem_object_pread,
- TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len),
+ TP_PROTO(struct drm_i915_gem_object *obj, u64 offset, u64 len),
TP_ARGS(obj, offset, len),
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
- __field(u32, offset)
- __field(u32, len)
+ __field(u64, offset)
+ __field(u64, len)
),
TP_fast_assign(
@@ -460,17 +460,17 @@ TRACE_EVENT(i915_gem_object_pread,
__entry->len = len;
),
- TP_printk("obj=%p, offset=%u, len=%u",
+ TP_printk("obj=%p, offset=0x%llx, len=0x%llx",
__entry->obj, __entry->offset, __entry->len)
);
TRACE_EVENT(i915_gem_object_fault,
- TP_PROTO(struct drm_i915_gem_object *obj, u32 index, bool gtt, bool write),
+ TP_PROTO(struct drm_i915_gem_object *obj, u64 index, bool gtt, bool write),
TP_ARGS(obj, index, gtt, write),
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
- __field(u32, index)
+ __field(u64, index)
__field(bool, gtt)
__field(bool, write)
),
@@ -482,7 +482,7 @@ TRACE_EVENT(i915_gem_object_fault,
__entry->write = write;
),
- TP_printk("obj=%p, %s index=%u %s",
+ TP_printk("obj=%p, %s index=%llu %s",
__entry->obj,
__entry->gtt ? "GTT" : "CPU",
__entry->index,
@@ -515,14 +515,14 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy,
);
TRACE_EVENT(i915_gem_evict,
- TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned int flags),
+ TP_PROTO(struct i915_address_space *vm, u64 size, u64 align, unsigned int flags),
TP_ARGS(vm, size, align, flags),
TP_STRUCT__entry(
__field(u32, dev)
__field(struct i915_address_space *, vm)
- __field(u32, size)
- __field(u32, align)
+ __field(u64, size)
+ __field(u64, align)
__field(unsigned int, flags)
),
@@ -534,43 +534,11 @@ TRACE_EVENT(i915_gem_evict,
__entry->flags = flags;
),
- TP_printk("dev=%d, vm=%p, size=%d, align=%d %s",
+ TP_printk("dev=%d, vm=%p, size=0x%llx, align=0x%llx %s",
__entry->dev, __entry->vm, __entry->size, __entry->align,
__entry->flags & PIN_MAPPABLE ? ", mappable" : "")
);
-TRACE_EVENT(i915_gem_evict_everything,
- TP_PROTO(struct drm_device *dev),
- TP_ARGS(dev),
-
- TP_STRUCT__entry(
- __field(u32, dev)
- ),
-
- TP_fast_assign(
- __entry->dev = dev->primary->index;
- ),
-
- TP_printk("dev=%d", __entry->dev)
-);
-
-TRACE_EVENT(i915_gem_evict_vm,
- TP_PROTO(struct i915_address_space *vm),
- TP_ARGS(vm),
-
- TP_STRUCT__entry(
- __field(u32, dev)
- __field(struct i915_address_space *, vm)
- ),
-
- TP_fast_assign(
- __entry->dev = vm->i915->drm.primary->index;
- __entry->vm = vm;
- ),
-
- TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
-);
-
TRACE_EVENT(i915_gem_evict_node,
TP_PROTO(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags),
TP_ARGS(vm, node, flags),
@@ -593,12 +561,29 @@ TRACE_EVENT(i915_gem_evict_node,
__entry->flags = flags;
),
- TP_printk("dev=%d, vm=%p, start=%llx size=%llx, color=%lx, flags=%x",
+ TP_printk("dev=%d, vm=%p, start=0x%llx size=0x%llx, color=0x%lx, flags=%x",
__entry->dev, __entry->vm,
__entry->start, __entry->size,
__entry->color, __entry->flags)
);
+TRACE_EVENT(i915_gem_evict_vm,
+ TP_PROTO(struct i915_address_space *vm),
+ TP_ARGS(vm),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(struct i915_address_space *, vm)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = vm->i915->drm.primary->index;
+ __entry->vm = vm;
+ ),
+
+ TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
+);
+
TRACE_EVENT(i915_gem_ring_sync_to,
TP_PROTO(struct drm_i915_gem_request *to,
struct drm_i915_gem_request *from),
@@ -649,29 +634,6 @@ TRACE_EVENT(i915_gem_request_queue,
__entry->flags)
);
-TRACE_EVENT(i915_gem_ring_flush,
- TP_PROTO(struct drm_i915_gem_request *req, u32 invalidate, u32 flush),
- TP_ARGS(req, invalidate, flush),
-
- TP_STRUCT__entry(
- __field(u32, dev)
- __field(u32, ring)
- __field(u32, invalidate)
- __field(u32, flush)
- ),
-
- TP_fast_assign(
- __entry->dev = req->i915->drm.primary->index;
- __entry->ring = req->engine->id;
- __entry->invalidate = invalidate;
- __entry->flush = flush;
- ),
-
- TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x",
- __entry->dev, __entry->ring,
- __entry->invalidate, __entry->flush)
-);
-
DECLARE_EVENT_CLASS(i915_gem_request,
TP_PROTO(struct drm_i915_gem_request *req),
TP_ARGS(req),
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 12fc250b47b9..af3d7cc53fa1 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -99,6 +99,11 @@
__T; \
})
+static inline u64 ptr_to_u64(const void *ptr)
+{
+ return (uintptr_t)ptr;
+}
+
#define u64_to_ptr(T, x) ({ \
typecheck(u64, x); \
(T *)(uintptr_t)(x); \
@@ -119,4 +124,17 @@ static inline void __list_del_many(struct list_head *head,
WRITE_ONCE(head->next, first);
}
+/*
+ * Wait until the work is finally complete, even if it tries to postpone
+ * by requeueing itself. Note, that if the worker never cancels itself,
+ * we will spin forever.
+ */
+static inline void drain_delayed_work(struct delayed_work *dw)
+{
+ do {
+ while (flush_delayed_work(dw))
+ ;
+ } while (delayed_work_pending(dw));
+}
+
#endif /* !__I915_UTILS_H */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 02d1a5eacb00..4dce2e0197d9 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -266,6 +266,8 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
if (bind_flags == 0)
return 0;
+ GEM_BUG_ON(!vma->pages);
+
trace_i915_vma_bind(vma, bind_flags);
ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
if (ret)
@@ -278,13 +280,16 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
{
void __iomem *ptr;
+ int err;
/* Access through the GTT requires the device to be awake. */
assert_rpm_wakelock_held(vma->vm->i915);
lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
- if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
- return IO_ERR_PTR(-ENODEV);
+ if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
+ err = -ENODEV;
+ goto err;
+ }
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
@@ -294,14 +299,36 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable,
vma->node.start,
vma->node.size);
- if (ptr == NULL)
- return IO_ERR_PTR(-ENOMEM);
+ if (ptr == NULL) {
+ err = -ENOMEM;
+ goto err;
+ }
vma->iomap = ptr;
}
__i915_vma_pin(vma);
+
+ err = i915_vma_pin_fence(vma);
+ if (err)
+ goto err_unpin;
+
return ptr;
+
+err_unpin:
+ __i915_vma_unpin(vma);
+err:
+ return IO_ERR_PTR(err);
+}
+
+void i915_vma_unpin_iomap(struct i915_vma *vma)
+{
+ lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
+
+ GEM_BUG_ON(vma->iomap == NULL);
+
+ i915_vma_unpin_fence(vma);
+ i915_vma_unpin(vma);
}
void i915_vma_unpin_and_release(struct i915_vma **p_vma)
@@ -471,25 +498,64 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
if (ret)
return ret;
+ GEM_BUG_ON(vma->pages);
+
+ ret = vma->vm->set_pages(vma);
+ if (ret)
+ goto err_unpin;
+
if (flags & PIN_OFFSET_FIXED) {
u64 offset = flags & PIN_OFFSET_MASK;
if (!IS_ALIGNED(offset, alignment) ||
range_overflows(offset, size, end)) {
ret = -EINVAL;
- goto err_unpin;
+ goto err_clear;
}
ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
size, offset, obj->cache_level,
flags);
if (ret)
- goto err_unpin;
+ goto err_clear;
} else {
+ /*
+ * We only support huge gtt pages through the 48b PPGTT,
+ * however we also don't want to force any alignment for
+ * objects which need to be tightly packed into the low 32bits.
+ *
+ * Note that we assume that GGTT are limited to 4GiB for the
+ * forseeable future. See also i915_ggtt_offset().
+ */
+ if (upper_32_bits(end - 1) &&
+ vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
+ /*
+ * We can't mix 64K and 4K PTEs in the same page-table
+ * (2M block), and so to avoid the ugliness and
+ * complexity of coloring we opt for just aligning 64K
+ * objects to 2M.
+ */
+ u64 page_alignment =
+ rounddown_pow_of_two(vma->page_sizes.sg |
+ I915_GTT_PAGE_SIZE_2M);
+
+ /*
+ * Check we don't expand for the limited Global GTT
+ * (mappable aperture is even more precious!). This
+ * also checks that we exclude the aliasing-ppgtt.
+ */
+ GEM_BUG_ON(i915_vma_is_ggtt(vma));
+
+ alignment = max(alignment, page_alignment);
+
+ if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
+ size = round_up(size, I915_GTT_PAGE_SIZE_2M);
+ }
+
ret = i915_gem_gtt_insert(vma->vm, &vma->node,
size, alignment, obj->cache_level,
start, end, flags);
if (ret)
- goto err_unpin;
+ goto err_clear;
GEM_BUG_ON(vma->node.start < start);
GEM_BUG_ON(vma->node.start + vma->node.size > end);
@@ -504,6 +570,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
return 0;
+err_clear:
+ vma->vm->clear_pages(vma);
err_unpin:
i915_gem_object_unpin_pages(obj);
return ret;
@@ -517,6 +585,8 @@ i915_vma_remove(struct i915_vma *vma)
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+ vma->vm->clear_pages(vma);
+
drm_mm_remove_node(&vma->node);
list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
@@ -569,8 +639,8 @@ int __i915_vma_do_pin(struct i915_vma *vma,
err_remove:
if ((bound & I915_VMA_BIND_MASK) == 0) {
- GEM_BUG_ON(vma->pages);
i915_vma_remove(vma);
+ GEM_BUG_ON(vma->pages);
}
err_unpin:
__i915_vma_unpin(vma);
@@ -620,6 +690,30 @@ static void __i915_vma_iounmap(struct i915_vma *vma)
vma->iomap = NULL;
}
+void i915_vma_revoke_mmap(struct i915_vma *vma)
+{
+ struct drm_vma_offset_node *node = &vma->obj->base.vma_node;
+ u64 vma_offset;
+
+ lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+
+ if (!i915_vma_has_userfault(vma))
+ return;
+
+ GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
+ GEM_BUG_ON(!vma->obj->userfault_count);
+
+ vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
+ unmap_mapping_range(vma->vm->i915->drm.anon_inode->i_mapping,
+ drm_vma_node_offset_addr(node) + vma_offset,
+ vma->size,
+ 1);
+
+ i915_vma_unset_userfault(vma);
+ if (!--vma->obj->userfault_count)
+ list_del(&vma->obj->userfault_link);
+}
+
int i915_vma_unbind(struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
@@ -683,11 +777,13 @@ int i915_vma_unbind(struct i915_vma *vma)
return ret;
/* Force a pagefault for domain tracking on next user access */
- i915_gem_release_mmap(obj);
+ i915_vma_revoke_mmap(vma);
__i915_vma_iounmap(vma);
vma->flags &= ~I915_VMA_CAN_FENCE;
}
+ GEM_BUG_ON(vma->fence);
+ GEM_BUG_ON(i915_vma_has_userfault(vma));
if (likely(!vma->vm->closed)) {
trace_i915_vma_unbind(vma);
@@ -695,13 +791,6 @@ int i915_vma_unbind(struct i915_vma *vma)
}
vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
- if (vma->pages != obj->mm.pages) {
- GEM_BUG_ON(!vma->pages);
- sg_free_table(vma->pages);
- kfree(vma->pages);
- }
- vma->pages = NULL;
-
i915_vma_remove(vma);
destroy:
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index e811067c7724..1e2bc9b3c3ac 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -55,6 +55,7 @@ struct i915_vma {
void __iomem *iomap;
u64 size;
u64 display_alignment;
+ struct i915_page_sizes page_sizes;
u32 fence_size;
u32 fence_alignment;
@@ -65,7 +66,7 @@ struct i915_vma {
* that exist in the ctx->handle_vmas LUT for this vma.
*/
unsigned int open_count;
- unsigned int flags;
+ unsigned long flags;
/**
* How many users have pinned this object in GTT space. The following
* users can each hold at most one reference: pwrite/pread, execbuffer
@@ -87,6 +88,8 @@ struct i915_vma {
#define I915_VMA_GGTT BIT(8)
#define I915_VMA_CAN_FENCE BIT(9)
#define I915_VMA_CLOSED BIT(10)
+#define I915_VMA_USERFAULT_BIT 11
+#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT)
unsigned int active;
struct i915_gem_active last_read[I915_NUM_ENGINES];
@@ -145,6 +148,22 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma)
return vma->flags & I915_VMA_CLOSED;
}
+static inline bool i915_vma_set_userfault(struct i915_vma *vma)
+{
+ GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
+ return __test_and_set_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
+}
+
+static inline void i915_vma_unset_userfault(struct i915_vma *vma)
+{
+ return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
+}
+
+static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
+{
+ return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
+}
+
static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
{
return vma->active;
@@ -243,6 +262,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level);
bool i915_vma_misplaced(const struct i915_vma *vma,
u64 size, u64 alignment, u64 flags);
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
+void i915_vma_revoke_mmap(struct i915_vma *vma);
int __must_check i915_vma_unbind(struct i915_vma *vma);
void i915_vma_unlink_ctx(struct i915_vma *vma);
void i915_vma_close(struct i915_vma *vma);
@@ -321,12 +341,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
* Callers must hold the struct_mutex. This function is only valid to be
* called on a VMA previously iomapped by the caller with i915_vma_pin_iomap().
*/
-static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
-{
- lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
- GEM_BUG_ON(vma->iomap == NULL);
- i915_vma_unpin(vma);
-}
+void i915_vma_unpin_iomap(struct i915_vma *vma);
static inline struct page *i915_vma_first_page(struct i915_vma *vma)
{
@@ -349,15 +364,13 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma)
*
* True if the vma has a fence, false otherwise.
*/
-static inline bool
-i915_vma_pin_fence(struct i915_vma *vma)
+int i915_vma_pin_fence(struct i915_vma *vma);
+int __must_check i915_vma_put_fence(struct i915_vma *vma);
+
+static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
{
- lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
- if (vma->fence) {
- vma->fence->pin_count++;
- return true;
- } else
- return false;
+ GEM_BUG_ON(vma->fence->pin_count <= 0);
+ vma->fence->pin_count--;
}
/**
@@ -372,10 +385,8 @@ static inline void
i915_vma_unpin_fence(struct i915_vma *vma)
{
lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
- if (vma->fence) {
- GEM_BUG_ON(vma->fence->pin_count <= 0);
- vma->fence->pin_count--;
- }
+ if (vma->fence)
+ __i915_vma_unpin_fence(vma);
}
#endif
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 27743be5b768..0ddba16fde1b 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -754,7 +754,7 @@ static struct intel_encoder *get_saved_enc(struct drm_i915_private *dev_priv,
{
struct intel_encoder *encoder;
- if (WARN_ON(pipe >= I915_MAX_PIPES))
+ if (WARN_ON(pipe >= INTEL_INFO(dev_priv)->num_pipes))
return NULL;
/* MST */
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index b881ce8596ee..9d5b42953436 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -431,27 +431,6 @@ parse_general_features(struct drm_i915_private *dev_priv,
dev_priv->vbt.fdi_rx_polarity_inverted);
}
-static void
-parse_general_definitions(struct drm_i915_private *dev_priv,
- const struct bdb_header *bdb)
-{
- const struct bdb_general_definitions *general;
-
- general = find_section(bdb, BDB_GENERAL_DEFINITIONS);
- if (general) {
- u16 block_size = get_blocksize(general);
- if (block_size >= sizeof(*general)) {
- int bus_pin = general->crt_ddc_gmbus_pin;
- DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin);
- if (intel_gmbus_is_valid_pin(dev_priv, bus_pin))
- dev_priv->vbt.crt_ddc_pin = bus_pin;
- } else {
- DRM_DEBUG_KMS("BDB_GD too small (%d). Invalid.\n",
- block_size);
- }
- }
-}
-
static const struct child_device_config *
child_device_ptr(const struct bdb_general_definitions *defs, int i)
{
@@ -459,41 +438,24 @@ child_device_ptr(const struct bdb_general_definitions *defs, int i)
}
static void
-parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
- const struct bdb_header *bdb)
+parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, u8 bdb_version)
{
struct sdvo_device_mapping *mapping;
- const struct bdb_general_definitions *defs;
const struct child_device_config *child;
- int i, child_device_num, count;
- u16 block_size;
-
- defs = find_section(bdb, BDB_GENERAL_DEFINITIONS);
- if (!defs) {
- DRM_DEBUG_KMS("No general definition block is found, unable to construct sdvo mapping.\n");
- return;
- }
+ int i, count = 0;
/*
- * Only parse SDVO mappings when the general definitions block child
- * device size matches that of the *legacy* child device config
- * struct. Thus, SDVO mapping will be skipped for newer VBT.
+ * Only parse SDVO mappings on gens that could have SDVO. This isn't
+ * accurate and doesn't have to be, as long as it's not too strict.
*/
- if (defs->child_dev_size != LEGACY_CHILD_DEVICE_CONFIG_SIZE) {
- DRM_DEBUG_KMS("Unsupported child device size for SDVO mapping.\n");
+ if (!IS_GEN(dev_priv, 3, 7)) {
+ DRM_DEBUG_KMS("Skipping SDVO device mapping\n");
return;
}
- /* get the block size of general definitions */
- block_size = get_blocksize(defs);
- /* get the number of child device */
- child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size;
- count = 0;
- for (i = 0; i < child_device_num; i++) {
- child = child_device_ptr(defs, i);
- if (!child->device_type) {
- /* skip the device block if device type is invalid */
- continue;
- }
+
+ for (i = 0, count = 0; i < dev_priv->vbt.child_dev_num; i++) {
+ child = dev_priv->vbt.child_dev + i;
+
if (child->slave_addr != SLAVE_ADDR1 &&
child->slave_addr != SLAVE_ADDR2) {
/*
@@ -544,7 +506,6 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
/* No SDVO device info is found */
DRM_DEBUG_KMS("No SDVO device info is found in VBT\n");
}
- return;
}
static void
@@ -1111,7 +1072,7 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv,
}
static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
- const struct bdb_header *bdb)
+ u8 bdb_version)
{
struct child_device_config *it, *child = NULL;
struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port];
@@ -1215,7 +1176,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
sanitize_aux_ch(dev_priv, port);
}
- if (bdb->version >= 158) {
+ if (bdb_version >= 158) {
/* The VBT HDMI level shift values match the table we have. */
hdmi_level_shift = child->hdmi_level_shifter_value;
DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n",
@@ -1225,7 +1186,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
}
/* Parse the I_boost config for SKL and above */
- if (bdb->version >= 196 && child->iboost) {
+ if (bdb_version >= 196 && child->iboost) {
info->dp_boost_level = translate_iboost(child->dp_iboost_level);
DRM_DEBUG_KMS("VBT (e)DP boost level for port %c: %d\n",
port_name(port), info->dp_boost_level);
@@ -1235,40 +1196,52 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
}
}
-static void parse_ddi_ports(struct drm_i915_private *dev_priv,
- const struct bdb_header *bdb)
+static void parse_ddi_ports(struct drm_i915_private *dev_priv, u8 bdb_version)
{
enum port port;
- if (!HAS_DDI(dev_priv))
+ if (!HAS_DDI(dev_priv) && !IS_CHERRYVIEW(dev_priv))
return;
if (!dev_priv->vbt.child_dev_num)
return;
- if (bdb->version < 155)
+ if (bdb_version < 155)
return;
for (port = PORT_A; port < I915_MAX_PORTS; port++)
- parse_ddi_port(dev_priv, port, bdb);
+ parse_ddi_port(dev_priv, port, bdb_version);
}
static void
-parse_device_mapping(struct drm_i915_private *dev_priv,
- const struct bdb_header *bdb)
+parse_general_definitions(struct drm_i915_private *dev_priv,
+ const struct bdb_header *bdb)
{
const struct bdb_general_definitions *defs;
const struct child_device_config *child;
- struct child_device_config *child_dev_ptr;
int i, child_device_num, count;
u8 expected_size;
u16 block_size;
+ int bus_pin;
defs = find_section(bdb, BDB_GENERAL_DEFINITIONS);
if (!defs) {
DRM_DEBUG_KMS("No general definition block is found, no devices defined.\n");
return;
}
+
+ block_size = get_blocksize(defs);
+ if (block_size < sizeof(*defs)) {
+ DRM_DEBUG_KMS("General definitions block too small (%u)\n",
+ block_size);
+ return;
+ }
+
+ bus_pin = defs->crt_ddc_gmbus_pin;
+ DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin);
+ if (intel_gmbus_is_valid_pin(dev_priv, bus_pin))
+ dev_priv->vbt.crt_ddc_pin = bus_pin;
+
if (bdb->version < 106) {
expected_size = 22;
} else if (bdb->version < 111) {
@@ -1298,18 +1271,14 @@ parse_device_mapping(struct drm_i915_private *dev_priv,
return;
}
- /* get the block size of general definitions */
- block_size = get_blocksize(defs);
/* get the number of child device */
child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size;
count = 0;
/* get the number of child device that is present */
for (i = 0; i < child_device_num; i++) {
child = child_device_ptr(defs, i);
- if (!child->device_type) {
- /* skip the device block if device type is invalid */
+ if (!child->device_type)
continue;
- }
count++;
}
if (!count) {
@@ -1326,36 +1295,18 @@ parse_device_mapping(struct drm_i915_private *dev_priv,
count = 0;
for (i = 0; i < child_device_num; i++) {
child = child_device_ptr(defs, i);
- if (!child->device_type) {
- /* skip the device block if device type is invalid */
+ if (!child->device_type)
continue;
- }
-
- child_dev_ptr = dev_priv->vbt.child_dev + count;
- count++;
/*
* Copy as much as we know (sizeof) and is available
* (child_dev_size) of the child device. Accessing the data must
* depend on VBT version.
*/
- memcpy(child_dev_ptr, child,
+ memcpy(dev_priv->vbt.child_dev + count, child,
min_t(size_t, defs->child_dev_size, sizeof(*child)));
-
- /*
- * copied full block, now init values when they are not
- * available in current version
- */
- if (bdb->version < 196) {
- /* Set default values for bits added from v196 */
- child_dev_ptr->iboost = 0;
- child_dev_ptr->hpd_invert = 0;
- }
-
- if (bdb->version < 192)
- child_dev_ptr->lspcon = 0;
+ count++;
}
- return;
}
/* Common defaults which may be overridden by VBT. */
@@ -1536,14 +1487,15 @@ void intel_bios_init(struct drm_i915_private *dev_priv)
parse_lfp_panel_data(dev_priv, bdb);
parse_lfp_backlight(dev_priv, bdb);
parse_sdvo_panel_data(dev_priv, bdb);
- parse_sdvo_device_mapping(dev_priv, bdb);
- parse_device_mapping(dev_priv, bdb);
parse_driver_features(dev_priv, bdb);
parse_edp(dev_priv, bdb);
parse_psr(dev_priv, bdb);
parse_mipi_config(dev_priv, bdb);
parse_mipi_sequence(dev_priv, bdb);
- parse_ddi_ports(dev_priv, bdb);
+
+ /* Further processing on pre-parsed data */
+ parse_sdvo_device_mapping(dev_priv, bdb->version);
+ parse_ddi_ports(dev_priv, bdb->version);
out:
if (!vbt) {
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 87fc42b19336..b2a6d62b71c0 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -503,7 +503,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
else
cmd = 0;
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
val &= ~DSPFREQGUAR_MASK;
val |= (cmd << DSPFREQGUAR_SHIFT);
@@ -513,7 +513,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
50)) {
DRM_ERROR("timed out waiting for CDclk change\n");
}
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
mutex_lock(&dev_priv->sb_lock);
@@ -590,7 +590,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
*/
cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1;
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
val &= ~DSPFREQGUAR_MASK_CHV;
val |= (cmd << DSPFREQGUAR_SHIFT_CHV);
@@ -600,7 +600,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
50)) {
DRM_ERROR("timed out waiting for CDclk change\n");
}
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
intel_update_cdclk(dev_priv);
@@ -656,10 +656,10 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv,
"trying to change cdclk frequency with cdclk not enabled\n"))
return;
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
ret = sandybridge_pcode_write(dev_priv,
BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
DRM_ERROR("failed to inform pcode about cdclk change\n");
return;
@@ -712,9 +712,9 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv,
LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
DRM_ERROR("Switching back to LCPLL failed\n");
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1);
@@ -928,12 +928,12 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
WARN_ON((cdclk == 24000) != (vco == 0));
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
SKL_CDCLK_PREPARE_FOR_CHANGE,
SKL_CDCLK_READY_FOR_CHANGE,
SKL_CDCLK_READY_FOR_CHANGE, 3);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
ret);
@@ -975,9 +975,9 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
POSTING_READ(CDCLK_CTL);
/* inform PCU of the change */
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
intel_update_cdclk(dev_priv);
}
@@ -1268,10 +1268,10 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
}
/* Inform power controller of upcoming frequency change */
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
0x80000000);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n",
@@ -1300,10 +1300,10 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE;
I915_WRITE(CDCLK_CTL, val);
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
DIV_ROUND_UP(cdclk, 25000));
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n",
@@ -1518,12 +1518,12 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
u32 val, divider, pcu_ack;
int ret;
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
SKL_CDCLK_PREPARE_FOR_CHANGE,
SKL_CDCLK_READY_FOR_CHANGE,
SKL_CDCLK_READY_FOR_CHANGE, 3);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
ret);
@@ -1575,9 +1575,9 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
I915_WRITE(CDCLK_CTL, val);
/* inform PCU of the change */
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
intel_update_cdclk(dev_priv);
}
diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
index ff9ecd211abb..b8315bca852b 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -74,7 +74,7 @@
#define I9XX_CSC_COEFF_1_0 \
((7 << 12) | I9XX_CSC_COEFF_FP(CTM_COEFF_1_0, 8))
-static bool crtc_state_is_legacy(struct drm_crtc_state *state)
+static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state)
{
return !state->degamma_lut &&
!state->ctm &&
@@ -288,7 +288,7 @@ static void cherryview_load_csc_matrix(struct drm_crtc_state *state)
}
mode = (state->ctm ? CGM_PIPE_MODE_CSC : 0);
- if (!crtc_state_is_legacy(state)) {
+ if (!crtc_state_is_legacy_gamma(state)) {
mode |= (state->degamma_lut ? CGM_PIPE_MODE_DEGAMMA : 0) |
(state->gamma_lut ? CGM_PIPE_MODE_GAMMA : 0);
}
@@ -469,7 +469,7 @@ static void broadwell_load_luts(struct drm_crtc_state *state)
struct intel_crtc_state *intel_state = to_intel_crtc_state(state);
enum pipe pipe = to_intel_crtc(state->crtc)->pipe;
- if (crtc_state_is_legacy(state)) {
+ if (crtc_state_is_legacy_gamma(state)) {
haswell_load_luts(state);
return;
}
@@ -529,7 +529,7 @@ static void glk_load_luts(struct drm_crtc_state *state)
glk_load_degamma_lut(state);
- if (crtc_state_is_legacy(state)) {
+ if (crtc_state_is_legacy_gamma(state)) {
haswell_load_luts(state);
return;
}
@@ -551,7 +551,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state)
uint32_t i, lut_size;
uint32_t word0, word1;
- if (crtc_state_is_legacy(state)) {
+ if (crtc_state_is_legacy_gamma(state)) {
/* Turn off degamma/gamma on CGM block. */
I915_WRITE(CGM_PIPE_MODE(pipe),
(state->ctm ? CGM_PIPE_MODE_CSC : 0));
@@ -632,12 +632,10 @@ int intel_color_check(struct drm_crtc *crtc,
return 0;
/*
- * We also allow no degamma lut and a gamma lut at the legacy
+ * We also allow no degamma lut/ctm and a gamma lut at the legacy
* size (256 entries).
*/
- if (!crtc_state->degamma_lut &&
- crtc_state->gamma_lut &&
- crtc_state->gamma_lut->length == LEGACY_LUT_LENGTH)
+ if (crtc_state_is_legacy_gamma(crtc_state))
return 0;
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 954070255b4d..668e8c3e791d 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -213,6 +213,19 @@ static void pch_post_disable_crt(struct intel_encoder *encoder,
intel_disable_crt(encoder, old_crtc_state, old_conn_state);
}
+static void hsw_disable_crt(struct intel_encoder *encoder,
+ const struct intel_crtc_state *old_crtc_state,
+ const struct drm_connector_state *old_conn_state)
+{
+ struct drm_crtc *crtc = old_crtc_state->base.crtc;
+ struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+ WARN_ON(!intel_crtc->config->has_pch_encoder);
+
+ intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+}
+
static void hsw_post_disable_crt(struct intel_encoder *encoder,
const struct intel_crtc_state *old_crtc_state,
const struct drm_connector_state *old_conn_state)
@@ -225,6 +238,58 @@ static void hsw_post_disable_crt(struct intel_encoder *encoder,
lpt_disable_iclkip(dev_priv);
intel_ddi_fdi_post_disable(encoder, old_crtc_state, old_conn_state);
+
+ WARN_ON(!old_crtc_state->has_pch_encoder);
+
+ intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true);
+}
+
+static void hsw_pre_pll_enable_crt(struct intel_encoder *encoder,
+ const struct intel_crtc_state *pipe_config,
+ const struct drm_connector_state *conn_state)
+{
+ struct drm_crtc *crtc = pipe_config->base.crtc;
+ struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+ WARN_ON(!intel_crtc->config->has_pch_encoder);
+
+ intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+}
+
+static void hsw_pre_enable_crt(struct intel_encoder *encoder,
+ const struct intel_crtc_state *pipe_config,
+ const struct drm_connector_state *conn_state)
+{
+ struct drm_crtc *crtc = pipe_config->base.crtc;
+ struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ int pipe = intel_crtc->pipe;
+
+ WARN_ON(!intel_crtc->config->has_pch_encoder);
+
+ intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
+
+ dev_priv->display.fdi_link_train(intel_crtc, pipe_config);
+}
+
+static void hsw_enable_crt(struct intel_encoder *encoder,
+ const struct intel_crtc_state *pipe_config,
+ const struct drm_connector_state *conn_state)
+{
+ struct drm_crtc *crtc = pipe_config->base.crtc;
+ struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ int pipe = intel_crtc->pipe;
+
+ WARN_ON(!intel_crtc->config->has_pch_encoder);
+
+ intel_crt_set_dpms(encoder, pipe_config, DRM_MODE_DPMS_ON);
+
+ intel_wait_for_vblank(dev_priv, pipe);
+ intel_wait_for_vblank(dev_priv, pipe);
+ intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+ intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true);
}
static void intel_enable_crt(struct intel_encoder *encoder,
@@ -890,26 +955,31 @@ void intel_crt_init(struct drm_i915_private *dev_priv)
crt->base.power_domain = POWER_DOMAIN_PORT_CRT;
- crt->base.compute_config = intel_crt_compute_config;
- if (HAS_PCH_SPLIT(dev_priv)) {
- crt->base.disable = pch_disable_crt;
- crt->base.post_disable = pch_post_disable_crt;
- } else {
- crt->base.disable = intel_disable_crt;
- }
- crt->base.enable = intel_enable_crt;
if (I915_HAS_HOTPLUG(dev_priv) &&
!dmi_check_system(intel_spurious_crt_detect))
crt->base.hpd_pin = HPD_CRT;
+
+ crt->base.compute_config = intel_crt_compute_config;
if (HAS_DDI(dev_priv)) {
crt->base.port = PORT_E;
crt->base.get_config = hsw_crt_get_config;
crt->base.get_hw_state = intel_ddi_get_hw_state;
+ crt->base.pre_pll_enable = hsw_pre_pll_enable_crt;
+ crt->base.pre_enable = hsw_pre_enable_crt;
+ crt->base.enable = hsw_enable_crt;
+ crt->base.disable = hsw_disable_crt;
crt->base.post_disable = hsw_post_disable_crt;
} else {
+ if (HAS_PCH_SPLIT(dev_priv)) {
+ crt->base.disable = pch_disable_crt;
+ crt->base.post_disable = pch_post_disable_crt;
+ } else {
+ crt->base.disable = intel_disable_crt;
+ }
crt->base.port = PORT_NONE;
crt->base.get_config = intel_crt_get_config;
crt->base.get_hw_state = intel_crt_get_hw_state;
+ crt->base.enable = intel_enable_crt;
}
intel_connector->get_hw_state = intel_connector_get_hw_state;
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index cdfb624eb82d..ea5d5c9645a4 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -216,7 +216,7 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv)
mask = DC_STATE_DEBUG_MASK_MEMORY_UP;
- if (IS_BROXTON(dev_priv))
+ if (IS_GEN9_LP(dev_priv))
mask |= DC_STATE_DEBUG_MASK_CORES;
/* The below bit doesn't need to be cleared ever afterwards */
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 93cbbcbbc193..b307b6fe1ce3 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -602,8 +602,10 @@ cnl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries)
} else if (voltage == VOLTAGE_INFO_1_05V) {
*n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_1_05V);
return cnl_ddi_translations_hdmi_1_05V;
- } else
+ } else {
+ *n_entries = 1; /* shut up gcc */
MISSING_CASE(voltage);
+ }
return NULL;
}
@@ -621,8 +623,10 @@ cnl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries)
} else if (voltage == VOLTAGE_INFO_1_05V) {
*n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_1_05V);
return cnl_ddi_translations_dp_1_05V;
- } else
+ } else {
+ *n_entries = 1; /* shut up gcc */
MISSING_CASE(voltage);
+ }
return NULL;
}
@@ -641,8 +645,10 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries)
} else if (voltage == VOLTAGE_INFO_1_05V) {
*n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_1_05V);
return cnl_ddi_translations_edp_1_05V;
- } else
+ } else {
+ *n_entries = 1; /* shut up gcc */
MISSING_CASE(voltage);
+ }
return NULL;
} else {
return cnl_get_buf_trans_dp(dev_priv, n_entries);
@@ -1214,6 +1220,9 @@ static int cnl_calc_wrpll_link(struct drm_i915_private *dev_priv,
dco_freq += (((cfgcr0 & DPLL_CFGCR0_DCO_FRACTION_MASK) >>
DPLL_CFGCR0_DCO_FRACTION_SHIFT) * ref_clock) / 0x8000;
+ if (WARN_ON(p0 == 0 || p1 == 0 || p2 == 0))
+ return 0;
+
return dco_freq / (p0 * p1 * p2 * 5);
}
@@ -1713,7 +1722,8 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
out:
if (ret && IS_GEN9_LP(dev_priv)) {
tmp = I915_READ(BXT_PHY_CTL(port));
- if ((tmp & (BXT_PHY_LANE_POWERDOWN_ACK |
+ if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK |
+ BXT_PHY_LANE_POWERDOWN_ACK |
BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED)
DRM_ERROR("Port %c enabled but PHY powered down? "
"(PHY_CTL %08x)\n", port_name(port), tmp);
@@ -2161,7 +2171,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
intel_prepare_dp_ddi_buffers(encoder);
intel_ddi_init_dp_buf_reg(encoder);
- intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+ if (!link_mst)
+ intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
intel_dp_start_link_train(intel_dp);
if (port != PORT_A || INTEL_GEN(dev_priv) >= 9)
intel_dp_stop_link_train(intel_dp);
@@ -2205,8 +2216,16 @@ static void intel_ddi_pre_enable(struct intel_encoder *encoder,
const struct intel_crtc_state *pipe_config,
const struct drm_connector_state *conn_state)
{
+ struct drm_crtc *crtc = pipe_config->base.crtc;
+ struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ int pipe = intel_crtc->pipe;
int type = encoder->type;
+ WARN_ON(intel_crtc->config->has_pch_encoder);
+
+ intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+
if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) {
intel_ddi_pre_enable_dp(encoder,
pipe_config->port_clock,
@@ -2235,12 +2254,21 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder,
uint32_t val;
bool wait = false;
- /* old_crtc_state and old_conn_state are NULL when called from DP_MST */
-
if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) {
+ /*
+ * old_crtc_state and old_conn_state are NULL when called from
+ * DP_MST. The main connector associated with this port is never
+ * bound to a crtc for MST.
+ */
+ bool is_mst = !old_crtc_state;
struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
- intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
+ /*
+ * Power down sink before disabling the port, otherwise we end
+ * up getting interrupts from the sink on detecting link loss.
+ */
+ if (!is_mst)
+ intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
}
val = I915_READ(DDI_BUF_CTL(port));
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 615c58e48613..7e91dc9a0fcf 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1539,7 +1539,7 @@ static void chv_enable_pll(struct intel_crtc *crtc,
* DPLLCMD is AWOL. Use chicken bits to propagate
* the value from DPLLBMD to either pipe B or C.
*/
- I915_WRITE(CBR4_VLV, pipe == PIPE_B ? CBR_DPLLBMD_PIPE_B : CBR_DPLLBMD_PIPE_C);
+ I915_WRITE(CBR4_VLV, CBR_DPLLBMD_PIPE(pipe));
I915_WRITE(DPLL_MD(PIPE_B), pipe_config->dpll_hw_state.dpll_md);
I915_WRITE(CBR4_VLV, 0);
dev_priv->chv_dpll_md[pipe] = pipe_config->dpll_hw_state.dpll_md;
@@ -1568,11 +1568,12 @@ static int intel_num_dvo_pipes(struct drm_i915_private *dev_priv)
return count;
}
-static void i9xx_enable_pll(struct intel_crtc *crtc)
+static void i9xx_enable_pll(struct intel_crtc *crtc,
+ const struct intel_crtc_state *crtc_state)
{
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
i915_reg_t reg = DPLL(crtc->pipe);
- u32 dpll = crtc->config->dpll_hw_state.dpll;
+ u32 dpll = crtc_state->dpll_hw_state.dpll;
int i;
assert_pipe_disabled(dev_priv, crtc->pipe);
@@ -1609,7 +1610,7 @@ static void i9xx_enable_pll(struct intel_crtc *crtc)
if (INTEL_GEN(dev_priv) >= 4) {
I915_WRITE(DPLL_MD(crtc->pipe),
- crtc->config->dpll_hw_state.dpll_md);
+ crtc_state->dpll_hw_state.dpll_md);
} else {
/* The pixel multiplier can only be updated once the
* DPLL is enabled and the clocks are stable.
@@ -1627,15 +1628,6 @@ static void i9xx_enable_pll(struct intel_crtc *crtc)
}
}
-/**
- * i9xx_disable_pll - disable a PLL
- * @dev_priv: i915 private structure
- * @pipe: pipe PLL to disable
- *
- * Disable the PLL for @pipe, making sure the pipe is off first.
- *
- * Note! This is for pre-ILK only.
- */
static void i9xx_disable_pll(struct intel_crtc *crtc)
{
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -2219,8 +2211,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
* something and try to run the system in a "less than optimal"
* mode that matches the user configuration.
*/
- if (i915_vma_get_fence(vma) == 0)
- i915_vma_pin_fence(vma);
+ i915_vma_pin_fence(vma);
}
i915_vma_get(vma);
@@ -4955,10 +4946,10 @@ void hsw_enable_ips(struct intel_crtc *crtc)
assert_plane_enabled(dev_priv, crtc->plane);
if (IS_BROADWELL(dev_priv)) {
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL,
IPS_ENABLE | IPS_PCODE_CONTROL));
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
/* Quoting Art Runyan: "its not safe to expect any particular
* value in IPS_CTL bit 31 after enabling IPS through the
* mailbox." Moreover, the mailbox may return a bogus state,
@@ -4988,9 +4979,9 @@ void hsw_disable_ips(struct intel_crtc *crtc)
assert_plane_enabled(dev_priv, crtc->plane);
if (IS_BROADWELL(dev_priv)) {
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0));
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
/* wait for pcode to finish disabling IPS, which may take up to 42ms */
if (intel_wait_for_register(dev_priv,
IPS_CTL, IPS_ENABLE, 0,
@@ -5459,6 +5450,20 @@ static bool hsw_crtc_supports_ips(struct intel_crtc *crtc)
return HAS_IPS(to_i915(crtc->base.dev)) && crtc->pipe == PIPE_A;
}
+static void glk_pipe_scaler_clock_gating_wa(struct drm_i915_private *dev_priv,
+ enum pipe pipe, bool apply)
+{
+ u32 val = I915_READ(CLKGATE_DIS_PSL(pipe));
+ u32 mask = DPF_GATING_DIS | DPF_RAM_GATING_DIS | DPFR_GATING_DIS;
+
+ if (apply)
+ val |= mask;
+ else
+ val &= ~mask;
+
+ I915_WRITE(CLKGATE_DIS_PSL(pipe), val);
+}
+
static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
struct drm_atomic_state *old_state)
{
@@ -5469,13 +5474,11 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
struct intel_atomic_state *old_intel_state =
to_intel_atomic_state(old_state);
+ bool psl_clkgate_wa;
if (WARN_ON(intel_crtc->active))
return;
- if (intel_crtc->config->has_pch_encoder)
- intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false);
-
intel_encoders_pre_pll_enable(crtc, pipe_config, old_state);
if (intel_crtc->config->shared_dpll)
@@ -5509,19 +5512,17 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
intel_crtc->active = true;
- if (intel_crtc->config->has_pch_encoder)
- intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
- else
- intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
-
intel_encoders_pre_enable(crtc, pipe_config, old_state);
- if (intel_crtc->config->has_pch_encoder)
- dev_priv->display.fdi_link_train(intel_crtc, pipe_config);
-
if (!transcoder_is_dsi(cpu_transcoder))
intel_ddi_enable_pipe_clock(pipe_config);
+ /* Display WA #1180: WaDisableScalarClockGating: glk, cnl */
+ psl_clkgate_wa = (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) &&
+ intel_crtc->config->pch_pfit.enabled;
+ if (psl_clkgate_wa)
+ glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true);
+
if (INTEL_GEN(dev_priv) >= 9)
skylake_pfit_enable(intel_crtc);
else
@@ -5555,11 +5556,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
intel_encoders_enable(crtc, pipe_config, old_state);
- if (intel_crtc->config->has_pch_encoder) {
+ if (psl_clkgate_wa) {
intel_wait_for_vblank(dev_priv, pipe);
- intel_wait_for_vblank(dev_priv, pipe);
- intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
- intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true);
+ glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, false);
}
/* If we change the relative order between pipe/planes enabling, we need
@@ -5655,9 +5654,6 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state,
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
- if (intel_crtc->config->has_pch_encoder)
- intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false);
-
intel_encoders_disable(crtc, old_crtc_state, old_state);
drm_crtc_vblank_off(crtc);
@@ -5682,9 +5678,6 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state,
intel_ddi_disable_pipe_clock(intel_crtc->config);
intel_encoders_post_disable(crtc, old_crtc_state, old_state);
-
- if (old_crtc_state->has_pch_encoder)
- intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true);
}
static void i9xx_pfit_enable(struct intel_crtc *crtc)
@@ -5894,7 +5887,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
intel_encoders_pre_enable(crtc, pipe_config, old_state);
- i9xx_enable_pll(intel_crtc);
+ i9xx_enable_pll(intel_crtc, pipe_config);
i9xx_pfit_enable(intel_crtc);
@@ -8846,11 +8839,11 @@ static uint32_t hsw_read_dcomp(struct drm_i915_private *dev_priv)
static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val)
{
if (IS_HASWELL(dev_priv)) {
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP,
val))
DRM_DEBUG_KMS("Failed to write to D_COMP\n");
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
} else {
I915_WRITE(D_COMP_BDW, val);
POSTING_READ(D_COMP_BDW);
@@ -10245,58 +10238,44 @@ static void ironlake_pch_clock_get(struct intel_crtc *crtc,
&pipe_config->fdi_m_n);
}
-/** Returns the currently programmed mode of the given pipe. */
-struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev,
- struct drm_crtc *crtc)
+/* Returns the currently programmed mode of the given encoder. */
+struct drm_display_mode *
+intel_encoder_current_mode(struct intel_encoder *encoder)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
+ struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+ struct intel_crtc_state *crtc_state;
struct drm_display_mode *mode;
- struct intel_crtc_state *pipe_config;
- int htot = I915_READ(HTOTAL(cpu_transcoder));
- int hsync = I915_READ(HSYNC(cpu_transcoder));
- int vtot = I915_READ(VTOTAL(cpu_transcoder));
- int vsync = I915_READ(VSYNC(cpu_transcoder));
- enum pipe pipe = intel_crtc->pipe;
+ struct intel_crtc *crtc;
+ enum pipe pipe;
+
+ if (!encoder->get_hw_state(encoder, &pipe))
+ return NULL;
+
+ crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
mode = kzalloc(sizeof(*mode), GFP_KERNEL);
if (!mode)
return NULL;
- pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL);
- if (!pipe_config) {
+ crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL);
+ if (!crtc_state) {
kfree(mode);
return NULL;
}
- /*
- * Construct a pipe_config sufficient for getting the clock info
- * back out of crtc_clock_get.
- *
- * Note, if LVDS ever uses a non-1 pixel multiplier, we'll need
- * to use a real value here instead.
- */
- pipe_config->cpu_transcoder = (enum transcoder) pipe;
- pipe_config->pixel_multiplier = 1;
- pipe_config->dpll_hw_state.dpll = I915_READ(DPLL(pipe));
- pipe_config->dpll_hw_state.fp0 = I915_READ(FP0(pipe));
- pipe_config->dpll_hw_state.fp1 = I915_READ(FP1(pipe));
- i9xx_crtc_clock_get(intel_crtc, pipe_config);
-
- mode->clock = pipe_config->port_clock / pipe_config->pixel_multiplier;
- mode->hdisplay = (htot & 0xffff) + 1;
- mode->htotal = ((htot & 0xffff0000) >> 16) + 1;
- mode->hsync_start = (hsync & 0xffff) + 1;
- mode->hsync_end = ((hsync & 0xffff0000) >> 16) + 1;
- mode->vdisplay = (vtot & 0xffff) + 1;
- mode->vtotal = ((vtot & 0xffff0000) >> 16) + 1;
- mode->vsync_start = (vsync & 0xffff) + 1;
- mode->vsync_end = ((vsync & 0xffff0000) >> 16) + 1;
+ crtc_state->base.crtc = &crtc->base;
- drm_mode_set_name(mode);
+ if (!dev_priv->display.get_pipe_config(crtc, crtc_state)) {
+ kfree(crtc_state);
+ kfree(mode);
+ return NULL;
+ }
- kfree(pipe_config);
+ encoder->get_config(encoder, crtc_state);
+
+ intel_mode_from_pipe_config(mode, crtc_state);
+
+ kfree(crtc_state);
return mode;
}
@@ -11336,6 +11315,18 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
PIPE_CONF_CHECK_X(dpll_hw_state.ctrl1);
PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr1);
PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr2);
+ PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr0);
+ PIPE_CONF_CHECK_X(dpll_hw_state.ebb0);
+ PIPE_CONF_CHECK_X(dpll_hw_state.ebb4);
+ PIPE_CONF_CHECK_X(dpll_hw_state.pll0);
+ PIPE_CONF_CHECK_X(dpll_hw_state.pll1);
+ PIPE_CONF_CHECK_X(dpll_hw_state.pll2);
+ PIPE_CONF_CHECK_X(dpll_hw_state.pll3);
+ PIPE_CONF_CHECK_X(dpll_hw_state.pll6);
+ PIPE_CONF_CHECK_X(dpll_hw_state.pll8);
+ PIPE_CONF_CHECK_X(dpll_hw_state.pll9);
+ PIPE_CONF_CHECK_X(dpll_hw_state.pll10);
+ PIPE_CONF_CHECK_X(dpll_hw_state.pcsdw12);
PIPE_CONF_CHECK_X(dsi_pll.ctrl);
PIPE_CONF_CHECK_X(dsi_pll.div);
@@ -12220,7 +12211,10 @@ static void skl_update_crtcs(struct drm_atomic_state *state)
if (updated & cmask || !cstate->base.active)
continue;
- if (skl_ddb_allocation_overlaps(entries, &cstate->wm.skl.ddb, i))
+ if (skl_ddb_allocation_overlaps(dev_priv,
+ entries,
+ &cstate->wm.skl.ddb,
+ i))
continue;
updated |= cmask;
@@ -12515,21 +12509,10 @@ static int intel_atomic_commit(struct drm_device *dev,
struct drm_i915_private *dev_priv = to_i915(dev);
int ret = 0;
- ret = drm_atomic_helper_setup_commit(state, nonblock);
- if (ret)
- return ret;
-
drm_atomic_state_get(state);
i915_sw_fence_init(&intel_state->commit_ready,
intel_atomic_commit_ready);
- ret = intel_atomic_prepare_commit(dev, state);
- if (ret) {
- DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
- i915_sw_fence_commit(&intel_state->commit_ready);
- return ret;
- }
-
/*
* The intel_legacy_cursor_update() fast path takes care
* of avoiding the vblank waits for simple cursor
@@ -12538,19 +12521,37 @@ static int intel_atomic_commit(struct drm_device *dev,
* updates happen during the correct frames. Gen9+ have
* double buffered watermarks and so shouldn't need this.
*
- * Do this after drm_atomic_helper_setup_commit() and
- * intel_atomic_prepare_commit() because we still want
- * to skip the flip and fb cleanup waits. Although that
- * does risk yanking the mapping from under the display
- * engine.
+ * Unset state->legacy_cursor_update before the call to
+ * drm_atomic_helper_setup_commit() because otherwise
+ * drm_atomic_helper_wait_for_flip_done() is a noop and
+ * we get FIFO underruns because we didn't wait
+ * for vblank.
*
* FIXME doing watermarks and fb cleanup from a vblank worker
* (assuming we had any) would solve these problems.
*/
- if (INTEL_GEN(dev_priv) < 9)
- state->legacy_cursor_update = false;
+ if (INTEL_GEN(dev_priv) < 9 && state->legacy_cursor_update) {
+ struct intel_crtc_state *new_crtc_state;
+ struct intel_crtc *crtc;
+ int i;
+
+ for_each_new_intel_crtc_in_state(intel_state, crtc, new_crtc_state, i)
+ if (new_crtc_state->wm.need_postvbl_update ||
+ new_crtc_state->update_wm_post)
+ state->legacy_cursor_update = false;
+ }
+
+ ret = intel_atomic_prepare_commit(dev, state);
+ if (ret) {
+ DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
+ i915_sw_fence_commit(&intel_state->commit_ready);
+ return ret;
+ }
+
+ ret = drm_atomic_helper_setup_commit(state, nonblock);
+ if (!ret)
+ ret = drm_atomic_helper_swap_state(state, true);
- ret = drm_atomic_helper_swap_state(state, true);
if (ret) {
i915_sw_fence_commit(&intel_state->commit_ready);
@@ -14748,10 +14749,10 @@ static struct intel_connector *intel_encoder_find_connector(struct intel_encoder
}
static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
- enum transcoder pch_transcoder)
+ enum pipe pch_transcoder)
{
return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) ||
- (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
+ (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == PIPE_A);
}
static void intel_sanitize_crtc(struct intel_crtc *crtc,
@@ -14834,7 +14835,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
* PCH transcoders B and C would prevent enabling the south
* error interrupt (see cpt_can_enable_serr_int()).
*/
- if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe))
+ if (has_pch_trancoder(dev_priv, crtc->pipe))
crtc->pch_fifo_underrun_disabled = true;
}
}
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 90e756c76f10..b0f446b68f42 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -137,32 +137,20 @@ static void vlv_steal_power_sequencer(struct drm_device *dev,
enum pipe pipe);
static void intel_dp_unset_edid(struct intel_dp *intel_dp);
-static int intel_dp_num_rates(u8 link_bw_code)
-{
- switch (link_bw_code) {
- default:
- WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n",
- link_bw_code);
- case DP_LINK_BW_1_62:
- return 1;
- case DP_LINK_BW_2_7:
- return 2;
- case DP_LINK_BW_5_4:
- return 3;
- }
-}
-
/* update sink rates from dpcd */
static void intel_dp_set_sink_rates(struct intel_dp *intel_dp)
{
- int i, num_rates;
+ int i, max_rate;
- num_rates = intel_dp_num_rates(intel_dp->dpcd[DP_MAX_LINK_RATE]);
+ max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]);
- for (i = 0; i < num_rates; i++)
+ for (i = 0; i < ARRAY_SIZE(default_rates); i++) {
+ if (default_rates[i] > max_rate)
+ break;
intel_dp->sink_rates[i] = default_rates[i];
+ }
- intel_dp->num_sink_rates = num_rates;
+ intel_dp->num_sink_rates = i;
}
/* Theoretical max between source and sink */
@@ -254,15 +242,15 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp)
} else if (IS_GEN9_BC(dev_priv)) {
source_rates = skl_rates;
size = ARRAY_SIZE(skl_rates);
- } else {
+ } else if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) ||
+ IS_BROADWELL(dev_priv)) {
source_rates = default_rates;
size = ARRAY_SIZE(default_rates);
+ } else {
+ source_rates = default_rates;
+ size = ARRAY_SIZE(default_rates) - 1;
}
- /* This depends on the fact that 5.4 is last value in the array */
- if (!intel_dp_source_supports_hbr2(intel_dp))
- size--;
-
intel_dp->source_rates = source_rates;
intel_dp->num_source_rates = size;
}
@@ -1482,14 +1470,9 @@ intel_dp_aux_init(struct intel_dp *intel_dp)
bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp)
{
- struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
- struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
+ int max_rate = intel_dp->source_rates[intel_dp->num_source_rates - 1];
- if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) ||
- IS_BROADWELL(dev_priv) || (INTEL_GEN(dev_priv) >= 9))
- return true;
- else
- return false;
+ return max_rate >= 540000;
}
static void
@@ -2308,8 +2291,8 @@ static void edp_panel_off(struct intel_dp *intel_dp)
I915_WRITE(pp_ctrl_reg, pp);
POSTING_READ(pp_ctrl_reg);
- intel_dp->panel_power_off_time = ktime_get_boottime();
wait_panel_off(intel_dp);
+ intel_dp->panel_power_off_time = ktime_get_boottime();
/* We got a reference when we enabled the VDD. */
intel_display_power_put(dev_priv, intel_dp->aux_power_domain);
@@ -5286,7 +5269,7 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev,
* seems sufficient to avoid this problem.
*/
if (dev_priv->quirks & QUIRK_INCREASE_T12_DELAY) {
- vbt.t11_t12 = max_t(u16, vbt.t11_t12, 900 * 10);
+ vbt.t11_t12 = max_t(u16, vbt.t11_t12, 1300 * 10);
DRM_DEBUG_KMS("Increasing T12 panel delay as per the quirk to %d\n",
vbt.t11_t12);
}
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 9a396f483f8b..3c131e2544cf 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -162,14 +162,19 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder,
drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port);
+ /*
+ * Power down mst path before disabling the port, otherwise we end
+ * up getting interrupts from the sink upon detecting link loss.
+ */
+ drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port,
+ false);
+
intel_dp->active_mst_links--;
intel_mst->connector = NULL;
if (intel_dp->active_mst_links == 0) {
intel_dig_port->base.post_disable(&intel_dig_port->base,
NULL, NULL);
-
- intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
}
DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links);
}
@@ -196,6 +201,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links);
+ drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true);
if (intel_dp->active_mst_links == 0)
intel_dig_port->base.pre_enable(&intel_dig_port->base,
pipe_config, NULL);
diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c
index 09b670929786..de38d014ed39 100644
--- a/drivers/gpu/drm/i915/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/intel_dpio_phy.c
@@ -208,12 +208,6 @@ static const struct bxt_ddi_phy_info glk_ddi_phy_info[] = {
},
};
-static u32 bxt_phy_port_mask(const struct bxt_ddi_phy_info *phy_info)
-{
- return (phy_info->dual_channel * BIT(phy_info->channel[DPIO_CH1].port)) |
- BIT(phy_info->channel[DPIO_CH0].port);
-}
-
static const struct bxt_ddi_phy_info *
bxt_get_phy_list(struct drm_i915_private *dev_priv, int *count)
{
@@ -313,7 +307,6 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv,
enum dpio_phy phy)
{
const struct bxt_ddi_phy_info *phy_info;
- enum port port;
phy_info = bxt_get_phy_info(dev_priv, phy);
@@ -335,19 +328,6 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv,
return false;
}
- for_each_port_masked(port, bxt_phy_port_mask(phy_info)) {
- u32 tmp = I915_READ(BXT_PHY_CTL(port));
-
- if (tmp & BXT_PHY_CMNLANE_POWERDOWN_ACK) {
- DRM_DEBUG_DRIVER("DDI PHY %d powered, but common lane "
- "for port %c powered down "
- "(PHY_CTL %08x)\n",
- phy, port_name(port), tmp);
-
- return false;
- }
- }
-
return true;
}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 0cab667fff57..b87946dcc53f 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1243,7 +1243,7 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv);
static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915,
u32 mask)
{
- return mask & ~i915->rps.pm_intrmsk_mbz;
+ return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz;
}
void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv);
@@ -1254,7 +1254,7 @@ static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv)
* We only use drm_irq_uninstall() at unload and VT switch, so
* this is the only thing we need to check.
*/
- return dev_priv->pm.irqs_enabled;
+ return dev_priv->runtime_pm.irqs_enabled;
}
int intel_get_crtc_scanline(struct intel_crtc *crtc);
@@ -1363,8 +1363,9 @@ struct intel_connector *intel_connector_alloc(void);
bool intel_connector_get_hw_state(struct intel_connector *connector);
void intel_connector_attach_encoder(struct intel_connector *connector,
struct intel_encoder *encoder);
-struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev,
- struct drm_crtc *crtc);
+struct drm_display_mode *
+intel_encoder_current_mode(struct intel_encoder *encoder);
+
enum pipe intel_get_pipe_from_connector(struct intel_connector *connector);
int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data,
struct drm_file *file_priv);
@@ -1790,7 +1791,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
static inline void
assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv)
{
- WARN_ONCE(dev_priv->pm.suspended,
+ WARN_ONCE(dev_priv->runtime_pm.suspended,
"Device suspended during HW access\n");
}
@@ -1798,7 +1799,7 @@ static inline void
assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
{
assert_rpm_device_not_suspended(dev_priv);
- WARN_ONCE(!atomic_read(&dev_priv->pm.wakeref_count),
+ WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count),
"RPM wakelock ref not held during HW access");
}
@@ -1823,7 +1824,7 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
static inline void
disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
{
- atomic_inc(&dev_priv->pm.wakeref_count);
+ atomic_inc(&dev_priv->runtime_pm.wakeref_count);
}
/**
@@ -1840,7 +1841,7 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
static inline void
enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
{
- atomic_dec(&dev_priv->pm.wakeref_count);
+ atomic_dec(&dev_priv->runtime_pm.wakeref_count);
}
void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
@@ -1893,7 +1894,8 @@ int intel_enable_sagv(struct drm_i915_private *dev_priv);
int intel_disable_sagv(struct drm_i915_private *dev_priv);
bool skl_wm_level_equals(const struct skl_wm_level *l1,
const struct skl_wm_level *l2);
-bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries,
+bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv,
+ const struct skl_ddb_entry **entries,
const struct skl_ddb_entry *ddb,
int ignore);
bool ilk_disable_lp_wm(struct drm_device *dev);
@@ -1902,7 +1904,7 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
struct intel_crtc_state *cstate);
void intel_init_ipc(struct drm_i915_private *dev_priv);
void intel_enable_ipc(struct drm_i915_private *dev_priv);
-static inline int intel_enable_rc6(void)
+static inline int intel_rc6_enabled(void)
{
return i915_modparams.enable_rc6;
}
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 20a7b004ffd7..66bbedc5fa01 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -790,14 +790,19 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder,
const struct intel_crtc_state *pipe_config,
const struct drm_connector_state *conn_state)
{
- struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+ struct drm_crtc *crtc = pipe_config->base.crtc;
+ struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ int pipe = intel_crtc->pipe;
enum port port;
u32 val;
bool glk_cold_boot = false;
DRM_DEBUG_KMS("\n");
+ intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+
/*
* The BIOS may leave the PLL in a wonky state where it doesn't
* lock. It needs to be fully powered down to fix it.
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 5c562e1f56ae..53c9b763f4ce 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -379,32 +379,15 @@ static const struct drm_encoder_funcs intel_dvo_enc_funcs = {
* chip being on DVOB/C and having multiple pipes.
*/
static struct drm_display_mode *
-intel_dvo_get_current_mode(struct drm_connector *connector)
+intel_dvo_get_current_mode(struct intel_encoder *encoder)
{
- struct drm_device *dev = connector->dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct intel_dvo *intel_dvo = intel_attached_dvo(connector);
- uint32_t dvo_val = I915_READ(intel_dvo->dev.dvo_reg);
- struct drm_display_mode *mode = NULL;
+ struct drm_display_mode *mode;
- /* If the DVO port is active, that'll be the LVDS, so we can pull out
- * its timings to get how the BIOS set up the panel.
- */
- if (dvo_val & DVO_ENABLE) {
- struct intel_crtc *crtc;
- int pipe = (dvo_val & DVO_PIPE_B_SELECT) ? 1 : 0;
-
- crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
- if (crtc) {
- mode = intel_crtc_mode_get(dev, &crtc->base);
- if (mode) {
- mode->type |= DRM_MODE_TYPE_PREFERRED;
- if (dvo_val & DVO_HSYNC_ACTIVE_HIGH)
- mode->flags |= DRM_MODE_FLAG_PHSYNC;
- if (dvo_val & DVO_VSYNC_ACTIVE_HIGH)
- mode->flags |= DRM_MODE_FLAG_PVSYNC;
- }
- }
+ mode = intel_encoder_current_mode(encoder);
+ if (mode) {
+ DRM_DEBUG_KMS("using current (BIOS) mode: ");
+ drm_mode_debug_printmodeline(mode);
+ mode->type |= DRM_MODE_TYPE_PREFERRED;
}
return mode;
@@ -551,7 +534,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv)
* mode being output through DVO.
*/
intel_panel_init(&intel_connector->panel,
- intel_dvo_get_current_mode(connector),
+ intel_dvo_get_current_mode(intel_encoder),
NULL, NULL);
intel_dvo->panel_wants_dither = true;
}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index a28e2a864cf1..a59b2a30ff5a 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -22,6 +22,8 @@
*
*/
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
#include "intel_ringbuffer.h"
#include "intel_lrc.h"
@@ -39,7 +41,7 @@
#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
-#define GEN10_LR_CONTEXT_RENDER_SIZE (19 * PAGE_SIZE)
+#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE)
#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE)
@@ -613,9 +615,22 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
if (IS_ERR(ring))
return PTR_ERR(ring);
+ /*
+ * Similarly the preempt context must always be available so that
+ * we can interrupt the engine at any time.
+ */
+ if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) {
+ ring = engine->context_pin(engine,
+ engine->i915->preempt_context);
+ if (IS_ERR(ring)) {
+ ret = PTR_ERR(ring);
+ goto err_unpin_kernel;
+ }
+ }
+
ret = intel_engine_init_breadcrumbs(engine);
if (ret)
- goto err_unpin;
+ goto err_unpin_preempt;
ret = i915_gem_render_state_init(engine);
if (ret)
@@ -634,7 +649,10 @@ err_rs_fini:
i915_gem_render_state_fini(engine);
err_breadcrumbs:
intel_engine_fini_breadcrumbs(engine);
-err_unpin:
+err_unpin_preempt:
+ if (INTEL_INFO(engine->i915)->has_logical_ring_preemption)
+ engine->context_unpin(engine, engine->i915->preempt_context);
+err_unpin_kernel:
engine->context_unpin(engine, engine->i915->kernel_context);
return ret;
}
@@ -660,6 +678,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
intel_engine_cleanup_cmd_parser(engine);
i915_gem_batch_pool_fini(&engine->batch_pool);
+ if (INTEL_INFO(engine->i915)->has_logical_ring_preemption)
+ engine->context_unpin(engine, engine->i915->preempt_context);
engine->context_unpin(engine, engine->i915->kernel_context);
}
@@ -830,11 +850,6 @@ static int wa_add(struct drm_i915_private *dev_priv,
#define WA_SET_FIELD_MASKED(addr, mask, value) \
WA_REG(addr, mask, _MASKED_FIELD(mask, value))
-#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
-#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
-
-#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
-
static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
i915_reg_t reg)
{
@@ -845,8 +860,8 @@ static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
return -EINVAL;
- WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
- i915_mmio_reg_offset(reg));
+ I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
+ i915_mmio_reg_offset(reg));
wa->hw_whitelist_count[engine->id]++;
return 0;
@@ -980,7 +995,11 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
GEN9_PBE_COMPRESSED_HASH_SELECTION);
WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
- WA_SET_BIT(MMCD_MISC_CTRL, MMCD_PCLA | MMCD_HOTSPOT_EN);
+
+ I915_WRITE(MMCD_MISC_CTRL,
+ I915_READ(MMCD_MISC_CTRL) |
+ MMCD_PCLA |
+ MMCD_HOTSPOT_EN);
}
/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
@@ -1071,13 +1090,33 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
GEN8_LQSC_FLUSH_COHERENT_LINES));
+ /*
+ * Supporting preemption with fine-granularity requires changes in the
+ * batch buffer programming. Since we can't break old userspace, we
+ * need to set our default preemption level to safe value. Userspace is
+ * still able to use more fine-grained preemption levels, since in
+ * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
+ * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
+ * not real HW workarounds, but merely a way to start using preemption
+ * while maintaining old contract with userspace.
+ */
+
+ /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
+ WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
+
+ /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
+ WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+ GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
+
/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
if (ret)
return ret;
- /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl */
- ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
+ /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
+ I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
+ _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
+ ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
if (ret)
return ret;
@@ -1139,14 +1178,6 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
if (ret)
return ret;
- /*
- * Actual WA is to disable percontext preemption granularity control
- * until D0 which is the default case so this is equivalent to
- * !WaDisablePerCtxtPreemptionGranularityControl:skl
- */
- I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
- _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
-
/* WaEnableGapsTsvCreditFix:skl */
I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
GEN9_GAPS_TSV_CREDIT_DISABLE));
@@ -1278,7 +1309,16 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine)
/* FtrEnableFastAnisoL1BankingFix: cnl */
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
+ /* WaDisable3DMidCmdPreemption:cnl */
+ WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
+
+ /* WaDisableGPGPUMidCmdPreemption:cnl */
+ WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+ GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
+
/* WaEnablePreemptionGranularityControlByUMD:cnl */
+ I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
+ _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
if (ret)
return ret;
@@ -1578,6 +1618,164 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
}
}
+static void print_request(struct drm_printer *m,
+ struct drm_i915_gem_request *rq,
+ const char *prefix)
+{
+ drm_printf(m, "%s%x [%x:%x] prio=%d @ %dms: %s\n", prefix,
+ rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno,
+ rq->priotree.priority,
+ jiffies_to_msecs(jiffies - rq->emitted_jiffies),
+ rq->timeline->common->name);
+}
+
+void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ struct i915_gpu_error *error = &engine->i915->gpu_error;
+ struct drm_i915_private *dev_priv = engine->i915;
+ struct drm_i915_gem_request *rq;
+ struct rb_node *rb;
+ u64 addr;
+
+ drm_printf(m, "%s\n", engine->name);
+ drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n",
+ intel_engine_get_seqno(engine),
+ intel_engine_last_submit(engine),
+ engine->hangcheck.seqno,
+ jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp),
+ engine->timeline->inflight_seqnos);
+ drm_printf(m, "\tReset count: %d\n",
+ i915_reset_engine_count(error, engine));
+
+ rcu_read_lock();
+
+ drm_printf(m, "\tRequests:\n");
+
+ rq = list_first_entry(&engine->timeline->requests,
+ struct drm_i915_gem_request, link);
+ if (&rq->link != &engine->timeline->requests)
+ print_request(m, rq, "\t\tfirst ");
+
+ rq = list_last_entry(&engine->timeline->requests,
+ struct drm_i915_gem_request, link);
+ if (&rq->link != &engine->timeline->requests)
+ print_request(m, rq, "\t\tlast ");
+
+ rq = i915_gem_find_active_request(engine);
+ if (rq) {
+ print_request(m, rq, "\t\tactive ");
+ drm_printf(m,
+ "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n",
+ rq->head, rq->postfix, rq->tail,
+ rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
+ rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
+ }
+
+ drm_printf(m, "\tRING_START: 0x%08x [0x%08x]\n",
+ I915_READ(RING_START(engine->mmio_base)),
+ rq ? i915_ggtt_offset(rq->ring->vma) : 0);
+ drm_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n",
+ I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR,
+ rq ? rq->ring->head : 0);
+ drm_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n",
+ I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR,
+ rq ? rq->ring->tail : 0);
+ drm_printf(m, "\tRING_CTL: 0x%08x [%s]\n",
+ I915_READ(RING_CTL(engine->mmio_base)),
+ I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : "");
+
+ rcu_read_unlock();
+
+ addr = intel_engine_get_active_head(engine);
+ drm_printf(m, "\tACTHD: 0x%08x_%08x\n",
+ upper_32_bits(addr), lower_32_bits(addr));
+ addr = intel_engine_get_last_batch_head(engine);
+ drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
+ upper_32_bits(addr), lower_32_bits(addr));
+
+ if (i915_modparams.enable_execlists) {
+ const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ u32 ptr, read, write;
+ unsigned int idx;
+
+ drm_printf(m, "\tExeclist status: 0x%08x %08x\n",
+ I915_READ(RING_EXECLIST_STATUS_LO(engine)),
+ I915_READ(RING_EXECLIST_STATUS_HI(engine)));
+
+ ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
+ read = GEN8_CSB_READ_PTR(ptr);
+ write = GEN8_CSB_WRITE_PTR(ptr);
+ drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n",
+ read, execlists->csb_head,
+ write,
+ intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
+ yesno(test_bit(ENGINE_IRQ_EXECLIST,
+ &engine->irq_posted)));
+ if (read >= GEN8_CSB_ENTRIES)
+ read = 0;
+ if (write >= GEN8_CSB_ENTRIES)
+ write = 0;
+ if (read > write)
+ write += GEN8_CSB_ENTRIES;
+ while (read < write) {
+ idx = ++read % GEN8_CSB_ENTRIES;
+ drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n",
+ idx,
+ I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
+ hws[idx * 2],
+ I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)),
+ hws[idx * 2 + 1]);
+ }
+
+ rcu_read_lock();
+ for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
+ unsigned int count;
+
+ rq = port_unpack(&execlists->port[idx], &count);
+ if (rq) {
+ drm_printf(m, "\t\tELSP[%d] count=%d, ",
+ idx, count);
+ print_request(m, rq, "rq: ");
+ } else {
+ drm_printf(m, "\t\tELSP[%d] idle\n",
+ idx);
+ }
+ }
+ rcu_read_unlock();
+
+ spin_lock_irq(&engine->timeline->lock);
+ for (rb = execlists->first; rb; rb = rb_next(rb)) {
+ struct i915_priolist *p =
+ rb_entry(rb, typeof(*p), node);
+
+ list_for_each_entry(rq, &p->requests,
+ priotree.link)
+ print_request(m, rq, "\t\tQ ");
+ }
+ spin_unlock_irq(&engine->timeline->lock);
+ } else if (INTEL_GEN(dev_priv) > 6) {
+ drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
+ I915_READ(RING_PP_DIR_BASE(engine)));
+ drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
+ I915_READ(RING_PP_DIR_BASE_READ(engine)));
+ drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
+ I915_READ(RING_PP_DIR_DCLV(engine)));
+ }
+
+ spin_lock_irq(&b->rb_lock);
+ for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
+ struct intel_wait *w = rb_entry(rb, typeof(*w), node);
+
+ drm_printf(m, "\t%s [%d] waiting for %x\n",
+ w->tsk->comm, w->tsk->pid, w->seqno);
+ }
+ spin_unlock_irq(&b->rb_lock);
+
+ drm_printf(m, "\n");
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_engine.c"
#endif
diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
new file mode 100644
index 000000000000..9e18c4fb9909
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright © 2014-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "intel_guc.h"
+#include "i915_drv.h"
+
+static void gen8_guc_raise_irq(struct intel_guc *guc)
+{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+ I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
+}
+
+static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i)
+{
+ GEM_BUG_ON(!guc->send_regs.base);
+ GEM_BUG_ON(!guc->send_regs.count);
+ GEM_BUG_ON(i >= guc->send_regs.count);
+
+ return _MMIO(guc->send_regs.base + 4 * i);
+}
+
+void intel_guc_init_send_regs(struct intel_guc *guc)
+{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+ enum forcewake_domains fw_domains = 0;
+ unsigned int i;
+
+ guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0));
+ guc->send_regs.count = SOFT_SCRATCH_COUNT - 1;
+
+ for (i = 0; i < guc->send_regs.count; i++) {
+ fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
+ guc_send_reg(guc, i),
+ FW_REG_READ | FW_REG_WRITE);
+ }
+ guc->send_regs.fw_domains = fw_domains;
+}
+
+void intel_guc_init_early(struct intel_guc *guc)
+{
+ intel_guc_ct_init_early(&guc->ct);
+
+ mutex_init(&guc->send_mutex);
+ guc->send = intel_guc_send_nop;
+ guc->notify = gen8_guc_raise_irq;
+}
+
+int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len)
+{
+ WARN(1, "Unexpected send: action=%#x\n", *action);
+ return -ENODEV;
+}
+
+/*
+ * This function implements the MMIO based host to GuC interface.
+ */
+int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
+{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+ u32 status;
+ int i;
+ int ret;
+
+ GEM_BUG_ON(!len);
+ GEM_BUG_ON(len > guc->send_regs.count);
+
+ /* If CT is available, we expect to use MMIO only during init/fini */
+ GEM_BUG_ON(HAS_GUC_CT(dev_priv) &&
+ *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
+ *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER);
+
+ mutex_lock(&guc->send_mutex);
+ intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains);
+
+ for (i = 0; i < len; i++)
+ I915_WRITE(guc_send_reg(guc, i), action[i]);
+
+ POSTING_READ(guc_send_reg(guc, i - 1));
+
+ intel_guc_notify(guc);
+
+ /*
+ * No GuC command should ever take longer than 10ms.
+ * Fast commands should still complete in 10us.
+ */
+ ret = __intel_wait_for_register_fw(dev_priv,
+ guc_send_reg(guc, 0),
+ INTEL_GUC_RECV_MASK,
+ INTEL_GUC_RECV_MASK,
+ 10, 10, &status);
+ if (status != INTEL_GUC_STATUS_SUCCESS) {
+ /*
+ * Either the GuC explicitly returned an error (which
+ * we convert to -EIO here) or no response at all was
+ * received within the timeout limit (-ETIMEDOUT)
+ */
+ if (ret != -ETIMEDOUT)
+ ret = -EIO;
+
+ DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;"
+ " ret=%d status=0x%08X response=0x%08X\n",
+ action[0], ret, status, I915_READ(SOFT_SCRATCH(15)));
+ }
+
+ intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains);
+ mutex_unlock(&guc->send_mutex);
+
+ return ret;
+}
+
+int intel_guc_sample_forcewake(struct intel_guc *guc)
+{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+ u32 action[2];
+
+ action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE;
+ /* WaRsDisableCoarsePowerGating:skl,bxt */
+ if (!intel_rc6_enabled() ||
+ NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
+ action[1] = 0;
+ else
+ /* bit 0 and 1 are for Render and Media domain separately */
+ action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA;
+
+ return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+/**
+ * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode
+ * @guc: intel_guc structure
+ * @rsa_offset: rsa offset w.r.t ggtt base of huc vma
+ *
+ * Triggers a HuC firmware authentication request to the GuC via intel_guc_send
+ * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by
+ * intel_huc_auth().
+ *
+ * Return: non-zero code on error
+ */
+int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_AUTHENTICATE_HUC,
+ rsa_offset
+ };
+
+ return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+/**
+ * intel_guc_suspend() - notify GuC entering suspend state
+ * @dev_priv: i915 device private
+ */
+int intel_guc_suspend(struct drm_i915_private *dev_priv)
+{
+ struct intel_guc *guc = &dev_priv->guc;
+ struct i915_gem_context *ctx;
+ u32 data[3];
+
+ if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
+ return 0;
+
+ gen9_disable_guc_interrupts(dev_priv);
+
+ ctx = dev_priv->kernel_context;
+
+ data[0] = INTEL_GUC_ACTION_ENTER_S_STATE;
+ /* any value greater than GUC_POWER_D0 */
+ data[1] = GUC_POWER_D1;
+ /* first page is shared data with GuC */
+ data[2] = guc_ggtt_offset(ctx->engine[RCS].state) +
+ LRC_GUCSHR_PN * PAGE_SIZE;
+
+ return intel_guc_send(guc, data, ARRAY_SIZE(data));
+}
+
+/**
+ * intel_guc_resume() - notify GuC resuming from suspend state
+ * @dev_priv: i915 device private
+ */
+int intel_guc_resume(struct drm_i915_private *dev_priv)
+{
+ struct intel_guc *guc = &dev_priv->guc;
+ struct i915_gem_context *ctx;
+ u32 data[3];
+
+ if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
+ return 0;
+
+ if (i915_modparams.guc_log_level >= 0)
+ gen9_enable_guc_interrupts(dev_priv);
+
+ ctx = dev_priv->kernel_context;
+
+ data[0] = INTEL_GUC_ACTION_EXIT_S_STATE;
+ data[1] = GUC_POWER_D0;
+ /* first page is shared data with GuC */
+ data[2] = guc_ggtt_offset(ctx->engine[RCS].state) +
+ LRC_GUCSHR_PN * PAGE_SIZE;
+
+ return intel_guc_send(guc, data, ARRAY_SIZE(data));
+}
+
+/**
+ * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage
+ * @guc: the guc
+ * @size: size of area to allocate (both virtual space and memory)
+ *
+ * This is a wrapper to create an object for use with the GuC. In order to
+ * use it inside the GuC, an object needs to be pinned lifetime, so we allocate
+ * both some backing storage and a range inside the Global GTT. We must pin
+ * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that
+ * range is reserved inside GuC.
+ *
+ * Return: A i915_vma if successful, otherwise an ERR_PTR.
+ */
+struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
+{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int ret;
+
+ obj = i915_gem_object_create(dev_priv, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
+ if (IS_ERR(vma))
+ goto err;
+
+ ret = i915_vma_pin(vma, 0, PAGE_SIZE,
+ PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
+ if (ret) {
+ vma = ERR_PTR(ret);
+ goto err;
+ }
+
+ return vma;
+
+err:
+ i915_gem_object_put(obj);
+ return vma;
+}
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
new file mode 100644
index 000000000000..aa9a7b55be6e
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright © 2014-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _INTEL_GUC_H_
+#define _INTEL_GUC_H_
+
+#include "intel_uncore.h"
+#include "intel_guc_fwif.h"
+#include "intel_guc_ct.h"
+#include "intel_guc_log.h"
+#include "intel_uc_fw.h"
+#include "i915_guc_reg.h"
+#include "i915_vma.h"
+
+struct intel_guc {
+ struct intel_uc_fw fw;
+ struct intel_guc_log log;
+ struct intel_guc_ct ct;
+
+ /* Log snapshot if GuC errors during load */
+ struct drm_i915_gem_object *load_err_log;
+
+ /* intel_guc_recv interrupt related state */
+ bool interrupts_enabled;
+
+ struct i915_vma *ads_vma;
+ struct i915_vma *stage_desc_pool;
+ void *stage_desc_pool_vaddr;
+ struct ida stage_ids;
+
+ struct i915_guc_client *execbuf_client;
+
+ DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
+ /* Cyclic counter mod pagesize */
+ u32 db_cacheline;
+
+ /* GuC's FW specific registers used in MMIO send */
+ struct {
+ u32 base;
+ unsigned int count;
+ enum forcewake_domains fw_domains;
+ } send_regs;
+
+ /* To serialize the intel_guc_send actions */
+ struct mutex send_mutex;
+
+ /* GuC's FW specific send function */
+ int (*send)(struct intel_guc *guc, const u32 *data, u32 len);
+
+ /* GuC's FW specific notify function */
+ void (*notify)(struct intel_guc *guc);
+};
+
+static
+inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
+{
+ return guc->send(guc, action, len);
+}
+
+static inline void intel_guc_notify(struct intel_guc *guc)
+{
+ guc->notify(guc);
+}
+
+static inline u32 guc_ggtt_offset(struct i915_vma *vma)
+{
+ u32 offset = i915_ggtt_offset(vma);
+
+ GEM_BUG_ON(offset < GUC_WOPCM_TOP);
+ GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP));
+
+ return offset;
+}
+
+void intel_guc_init_early(struct intel_guc *guc);
+void intel_guc_init_send_regs(struct intel_guc *guc);
+int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
+int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
+int intel_guc_sample_forcewake(struct intel_guc *guc);
+int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset);
+int intel_guc_suspend(struct drm_i915_private *dev_priv);
+int intel_guc_resume(struct drm_i915_private *dev_priv);
+struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
+
+int intel_guc_select_fw(struct intel_guc *guc);
+int intel_guc_init_hw(struct intel_guc *guc);
+u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 7eb6b4fa1d6f..1c0a2a3de121 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -178,49 +178,49 @@
*/
struct uc_css_header {
- uint32_t module_type;
+ u32 module_type;
/* header_size includes all non-uCode bits, including css_header, rsa
* key, modulus key and exponent data. */
- uint32_t header_size_dw;
- uint32_t header_version;
- uint32_t module_id;
- uint32_t module_vendor;
+ u32 header_size_dw;
+ u32 header_version;
+ u32 module_id;
+ u32 module_vendor;
union {
struct {
- uint8_t day;
- uint8_t month;
- uint16_t year;
+ u8 day;
+ u8 month;
+ u16 year;
};
- uint32_t date;
+ u32 date;
};
- uint32_t size_dw; /* uCode plus header_size_dw */
- uint32_t key_size_dw;
- uint32_t modulus_size_dw;
- uint32_t exponent_size_dw;
+ u32 size_dw; /* uCode plus header_size_dw */
+ u32 key_size_dw;
+ u32 modulus_size_dw;
+ u32 exponent_size_dw;
union {
struct {
- uint8_t hour;
- uint8_t min;
- uint16_t sec;
+ u8 hour;
+ u8 min;
+ u16 sec;
};
- uint32_t time;
+ u32 time;
};
char username[8];
char buildnumber[12];
union {
struct {
- uint32_t branch_client_version;
- uint32_t sw_version;
+ u32 branch_client_version;
+ u32 sw_version;
} guc;
struct {
- uint32_t sw_version;
- uint32_t reserved;
+ u32 sw_version;
+ u32 reserved;
} huc;
};
- uint32_t prod_preprod_fw;
- uint32_t reserved[12];
- uint32_t header_info;
+ u32 prod_preprod_fw;
+ u32 reserved[12];
+ u32 header_info;
} __packed;
struct guc_doorbell_info {
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index c9e25be4db40..c7a800a3798d 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -386,10 +386,7 @@ int intel_guc_select_fw(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
- guc->fw.path = NULL;
- guc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE;
- guc->fw.load_status = INTEL_UC_FIRMWARE_NONE;
- guc->fw.type = INTEL_UC_FW_TYPE_GUC;
+ intel_uc_fw_init(&guc->fw, INTEL_UC_FW_TYPE_GUC);
if (i915_modparams.guc_firmware_path) {
guc->fw.path = i915_modparams.guc_firmware_path;
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 6571d96704ad..76d3eb1e4614 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -21,8 +21,11 @@
* IN THE SOFTWARE.
*
*/
+
#include <linux/debugfs.h>
#include <linux/relay.h>
+
+#include "intel_guc_log.h"
#include "i915_drv.h"
static void guc_log_capture_logs(struct intel_guc *guc);
@@ -525,7 +528,8 @@ int intel_guc_log_create(struct intel_guc *guc)
{
struct i915_vma *vma;
unsigned long offset;
- uint32_t size, flags;
+ u32 flags;
+ u32 size;
int ret;
GEM_BUG_ON(guc->log.vma);
diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h
new file mode 100644
index 000000000000..f512cf79339b
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_log.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2014-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _INTEL_GUC_LOG_H_
+#define _INTEL_GUC_LOG_H_
+
+#include <linux/workqueue.h>
+
+#include "intel_guc_fwif.h"
+
+struct drm_i915_private;
+struct intel_guc;
+
+struct intel_guc_log {
+ u32 flags;
+ struct i915_vma *vma;
+ /* The runtime stuff gets created only when GuC logging gets enabled */
+ struct {
+ void *buf_addr;
+ struct workqueue_struct *flush_wq;
+ struct work_struct flush_work;
+ struct rchan *relay_chan;
+ } runtime;
+ /* logging related stats */
+ u32 capture_miss_count;
+ u32 flush_interrupt_count;
+ u32 prev_overflow_count[GUC_MAX_LOG_BUFFER];
+ u32 total_overflow_count[GUC_MAX_LOG_BUFFER];
+ u32 flush_count[GUC_MAX_LOG_BUFFER];
+};
+
+int intel_guc_log_create(struct intel_guc *guc);
+void intel_guc_log_destroy(struct intel_guc *guc);
+int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
+void i915_guc_log_register(struct drm_i915_private *dev_priv);
+void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index 8b4b53525422..4b4cf56d29ad 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -21,9 +21,11 @@
* IN THE SOFTWARE.
*
*/
-#include <linux/firmware.h>
+
+#include <linux/types.h>
+
+#include "intel_huc.h"
#include "i915_drv.h"
-#include "intel_uc.h"
/**
* DOC: HuC Firmware
@@ -150,10 +152,7 @@ void intel_huc_select_fw(struct intel_huc *huc)
{
struct drm_i915_private *dev_priv = huc_to_i915(huc);
- huc->fw.path = NULL;
- huc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE;
- huc->fw.load_status = INTEL_UC_FIRMWARE_NONE;
- huc->fw.type = INTEL_UC_FW_TYPE_HUC;
+ intel_uc_fw_init(&huc->fw, INTEL_UC_FW_TYPE_HUC);
if (i915_modparams.huc_firmware_path) {
huc->fw.path = i915_modparams.huc_firmware_path;
diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h
new file mode 100644
index 000000000000..aaa38b9e5817
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_huc.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2014-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _INTEL_HUC_H_
+#define _INTEL_HUC_H_
+
+#include "intel_uc_fw.h"
+
+struct intel_huc {
+ /* Generic uC firmware management */
+ struct intel_uc_fw fw;
+
+ /* HuC-specific additions */
+};
+
+void intel_huc_select_fw(struct intel_huc *huc);
+void intel_huc_init_hw(struct intel_huc *huc);
+void intel_huc_auth(struct intel_huc *huc);
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 61cac26a8b05..fbfcf88d7fe3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -208,8 +208,9 @@
/* Typical size of the average request (2 pipecontrols and a MI_BB) */
#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
-
#define WA_TAIL_DWORDS 2
+#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
+#define PREEMPT_ID 0x1
static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
struct intel_engine_cs *engine);
@@ -243,8 +244,7 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enabl
return 0;
if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) &&
- USES_PPGTT(dev_priv) &&
- i915_modparams.use_mmio_flip >= 0)
+ USES_PPGTT(dev_priv))
return 1;
return 0;
@@ -348,6 +348,43 @@ find_priolist:
return ptr_pack_bits(p, first, 1);
}
+static void unwind_wa_tail(struct drm_i915_gem_request *rq)
+{
+ rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+}
+
+static void unwind_incomplete_requests(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_request *rq, *rn;
+ struct i915_priolist *uninitialized_var(p);
+ int last_prio = I915_PRIORITY_INVALID;
+
+ lockdep_assert_held(&engine->timeline->lock);
+
+ list_for_each_entry_safe_reverse(rq, rn,
+ &engine->timeline->requests,
+ link) {
+ if (i915_gem_request_completed(rq))
+ return;
+
+ __i915_gem_request_unsubmit(rq);
+ unwind_wa_tail(rq);
+
+ GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
+ if (rq->priotree.priority != last_prio) {
+ p = lookup_priolist(engine,
+ &rq->priotree,
+ rq->priotree.priority);
+ p = ptr_mask_bits(p, 1);
+
+ last_prio = rq->priotree.priority;
+ }
+
+ list_add(&rq->priotree.link, &p->requests);
+ }
+}
+
static inline void
execlists_context_status_change(struct drm_i915_gem_request *rq,
unsigned long status)
@@ -392,6 +429,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
return ce->lrc_desc;
}
+static inline void elsp_write(u64 desc, u32 __iomem *elsp)
+{
+ writel(upper_32_bits(desc), elsp);
+ writel(lower_32_bits(desc), elsp);
+}
+
static void execlists_submit_ports(struct intel_engine_cs *engine)
{
struct execlist_port *port = engine->execlists.port;
@@ -417,8 +460,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
desc = 0;
}
- writel(upper_32_bits(desc), elsp);
- writel(lower_32_bits(desc), elsp);
+ elsp_write(desc, elsp);
}
}
@@ -451,26 +493,43 @@ static void port_assign(struct execlist_port *port,
port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
}
+static void inject_preempt_context(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce =
+ &engine->i915->preempt_context->engine[engine->id];
+ u32 __iomem *elsp =
+ engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+ unsigned int n;
+
+ GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID);
+ GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES));
+
+ memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES);
+ ce->ring->tail += WA_TAIL_BYTES;
+ ce->ring->tail &= (ce->ring->size - 1);
+ ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
+
+ for (n = execlists_num_ports(&engine->execlists); --n; )
+ elsp_write(0, elsp);
+
+ elsp_write(ce->lrc_desc, elsp);
+}
+
+static bool can_preempt(struct intel_engine_cs *engine)
+{
+ return INTEL_INFO(engine->i915)->has_logical_ring_preemption;
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *last;
struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port *port = execlists->port;
const struct execlist_port * const last_port =
&execlists->port[execlists->port_mask];
+ struct drm_i915_gem_request *last = port_request(port);
struct rb_node *rb;
bool submit = false;
- last = port_request(port);
- if (last)
- /* WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
- * as we resubmit the request. See gen8_emit_breadcrumb()
- * for where we prepare the padding after the end of the
- * request.
- */
- last->tail = last->wa_tail;
-
/* Hardware submission is through 2 ports. Conceptually each port
* has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
* static for a context, and unique to each, so we only execute
@@ -495,7 +554,65 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
spin_lock_irq(&engine->timeline->lock);
rb = execlists->first;
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
- while (rb) {
+ if (!rb)
+ goto unlock;
+
+ if (last) {
+ /*
+ * Don't resubmit or switch until all outstanding
+ * preemptions (lite-restore) are seen. Then we
+ * know the next preemption status we see corresponds
+ * to this ELSP update.
+ */
+ if (port_count(&port[0]) > 1)
+ goto unlock;
+
+ if (can_preempt(engine) &&
+ rb_entry(rb, struct i915_priolist, node)->priority >
+ max(last->priotree.priority, 0)) {
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ inject_preempt_context(engine);
+ execlists->preempt = true;
+ goto unlock;
+ } else {
+ /*
+ * In theory, we could coalesce more requests onto
+ * the second port (the first port is active, with
+ * no preemptions pending). However, that means we
+ * then have to deal with the possible lite-restore
+ * of the second port (as we submit the ELSP, there
+ * may be a context-switch) but also we may complete
+ * the resubmission before the context-switch. Ergo,
+ * coalescing onto the second port will cause a
+ * preemption event, but we cannot predict whether
+ * that will affect port[0] or port[1].
+ *
+ * If the second port is already active, we can wait
+ * until the next context-switch before contemplating
+ * new requests. The GPU will be busy and we should be
+ * able to resubmit the new ELSP before it idles,
+ * avoiding pipeline bubbles (momentary pauses where
+ * the driver is unable to keep up the supply of new
+ * work).
+ */
+ if (port_count(&port[1]))
+ goto unlock;
+
+ /* WaIdleLiteRestore:bdw,skl
+ * Apply the wa NOOPs to prevent
+ * ring:HEAD == req:TAIL as we resubmit the
+ * request. See gen8_emit_breadcrumb() for
+ * where we prepare the padding after the
+ * end of the request.
+ */
+ last->tail = last->wa_tail;
+ }
+ }
+
+ do {
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
struct drm_i915_gem_request *rq, *rn;
@@ -547,8 +664,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
INIT_LIST_HEAD(&rq->priotree.link);
- rq->priotree.priority = INT_MAX;
-
__i915_gem_request_submit(rq);
trace_i915_gem_request_in(rq, port_index(port, execlists));
last = rq;
@@ -560,11 +675,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
- }
+ } while (rb);
done:
execlists->first = rb;
if (submit)
port_assign(port, last);
+unlock:
spin_unlock_irq(&engine->timeline->lock);
if (submit)
@@ -575,12 +691,12 @@ static void
execlist_cancel_port_requests(struct intel_engine_execlists *execlists)
{
struct execlist_port *port = execlists->port;
- unsigned int num_ports = ARRAY_SIZE(execlists->port);
+ unsigned int num_ports = execlists_num_ports(execlists);
while (num_ports-- && port_isset(port)) {
struct drm_i915_gem_request *rq = port_request(port);
- execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED);
i915_gem_request_put(rq);
memset(port, 0, sizeof(*port));
@@ -645,13 +761,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
spin_unlock_irqrestore(&engine->timeline->lock, flags);
}
-static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
-{
- const struct execlist_port *port = engine->execlists.port;
-
- return port_count(&port[0]) + port_count(&port[1]) < 2;
-}
-
/*
* Check the unread Context Status Buffers and manage the submission of new
* contexts to the ELSP accordingly.
@@ -660,7 +769,7 @@ static void intel_lrc_irq_handler(unsigned long data)
{
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
+ struct execlist_port * const port = execlists->port;
struct drm_i915_private *dev_priv = engine->i915;
/* We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -745,6 +854,23 @@ static void intel_lrc_irq_handler(unsigned long data)
if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
continue;
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE &&
+ buf[2*head + 1] == PREEMPT_ID) {
+ execlist_cancel_port_requests(execlists);
+
+ spin_lock_irq(&engine->timeline->lock);
+ unwind_incomplete_requests(engine);
+ spin_unlock_irq(&engine->timeline->lock);
+
+ GEM_BUG_ON(!execlists->preempt);
+ execlists->preempt = false;
+ continue;
+ }
+
+ if (status & GEN8_CTX_STATUS_PREEMPTED &&
+ execlists->preempt)
+ continue;
+
/* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
@@ -756,6 +882,7 @@ static void intel_lrc_irq_handler(unsigned long data)
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
trace_i915_gem_request_out(rq);
+ rq->priotree.priority = INT_MAX;
i915_gem_request_put(rq);
execlists_port_complete(execlists, port);
@@ -775,7 +902,7 @@ static void intel_lrc_irq_handler(unsigned long data)
}
}
- if (execlists_elsp_ready(engine))
+ if (!execlists->preempt)
execlists_dequeue(engine);
intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
@@ -788,7 +915,7 @@ static void insert_request(struct intel_engine_cs *engine,
struct i915_priolist *p = lookup_priolist(engine, pt, prio);
list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
- if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine))
+ if (ptr_unmask_bits(p, 1))
tasklet_hi_schedule(&engine->execlists.irq_tasklet);
}
@@ -808,11 +935,15 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
spin_unlock_irqrestore(&engine->timeline->lock, flags);
}
+static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt)
+{
+ return container_of(pt, struct drm_i915_gem_request, priotree);
+}
+
static struct intel_engine_cs *
pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
{
- struct intel_engine_cs *engine =
- container_of(pt, struct drm_i915_gem_request, priotree)->engine;
+ struct intel_engine_cs *engine = pt_to_request(pt)->engine;
GEM_BUG_ON(!locked);
@@ -831,6 +962,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
struct i915_dependency stack;
LIST_HEAD(dfs);
+ GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
+
if (prio <= READ_ONCE(request->priotree.priority))
return;
@@ -866,6 +999,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
* engines.
*/
list_for_each_entry(p, &pt->signalers_list, signal_link) {
+ if (i915_gem_request_completed(pt_to_request(p->signaler)))
+ continue;
+
GEM_BUG_ON(p->signaler->priority < pt->priority);
if (prio > READ_ONCE(p->signaler->priority))
list_move_tail(&p->dfs_link, &dfs);
@@ -879,7 +1015,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
* execlists_submit_request()), we can set our own priority and skip
* acquiring the engine locks.
*/
- if (request->priotree.priority == INT_MIN) {
+ if (request->priotree.priority == I915_PRIORITY_INVALID) {
GEM_BUG_ON(!list_empty(&request->priotree.link));
request->priotree.priority = prio;
if (stack.dfs_link.next == stack.dfs_link.prev)
@@ -909,8 +1045,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
}
spin_unlock_irq(&engine->timeline->lock);
-
- /* XXX Do we need to preempt to make room for us and our deps? */
}
static struct intel_ring *
@@ -1106,6 +1240,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
i915_ggtt_offset(engine->scratch) +
2 * CACHELINE_BYTES);
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
/* Pad to end of cacheline */
while ((unsigned long)batch % CACHELINE_BYTES)
*batch++ = MI_NOOP;
@@ -1119,26 +1255,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
return batch;
}
-/*
- * This batch is started immediately after indirect_ctx batch. Since we ensure
- * that indirect_ctx ends on a cacheline this batch is aligned automatically.
- *
- * The number of DWORDS written are returned using this field.
- *
- * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
- * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
- */
-static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
- /* WaDisableCtxRestoreArbitration:bdw,chv */
- *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- *batch++ = MI_BATCH_BUFFER_END;
-
- return batch;
-}
-
static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
{
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
@@ -1184,6 +1304,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
*batch++ = 0;
}
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
/* Pad to end of cacheline */
while ((unsigned long)batch % CACHELINE_BYTES)
*batch++ = MI_NOOP;
@@ -1251,7 +1373,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
break;
case 8:
wa_bb_fn[0] = gen8_init_indirectctx_bb;
- wa_bb_fn[1] = gen8_init_perctx_bb;
+ wa_bb_fn[1] = NULL;
break;
default:
MISSING_CASE(INTEL_GEN(engine->i915));
@@ -1337,6 +1459,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
execlists->csb_head = -1;
+ execlists->preempt = false;
/* After a GPU reset, we may have requests to replay */
if (!i915_modparams.enable_guc_submission && execlists->first)
@@ -1382,7 +1505,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct drm_i915_gem_request *rq, *rn;
struct intel_context *ce;
unsigned long flags;
@@ -1400,21 +1522,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
execlist_cancel_port_requests(execlists);
/* Push back any incomplete requests for replay after the reset. */
- list_for_each_entry_safe_reverse(rq, rn,
- &engine->timeline->requests, link) {
- struct i915_priolist *p;
-
- if (i915_gem_request_completed(rq))
- break;
-
- __i915_gem_request_unsubmit(rq);
-
- p = lookup_priolist(engine,
- &rq->priotree,
- rq->priotree.priority);
- list_add(&rq->priotree.link,
- &ptr_mask_bits(p, 1)->requests);
- }
+ unwind_incomplete_requests(engine);
spin_unlock_irqrestore(&engine->timeline->lock, flags);
@@ -1451,10 +1559,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
intel_ring_update_space(request->ring);
/* Reset WaIdleLiteRestore:bdw,skl as well */
- request->tail =
- intel_ring_wrap(request->ring,
- request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
- assert_ring_tail_valid(request->ring, request->tail);
+ unwind_wa_tail(request);
}
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
@@ -1513,13 +1618,31 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
if (IS_ERR(cs))
return PTR_ERR(cs);
+ /*
+ * WaDisableCtxRestoreArbitration:bdw,chv
+ *
+ * We don't need to perform MI_ARB_ENABLE as often as we do (in
+ * particular all the gen that do not need the w/a at all!), if we
+ * took care to make sure that on every switch into this context
+ * (both ordinary and for preemption) that arbitrartion was enabled
+ * we would be fine. However, there doesn't seem to be a downside to
+ * being paranoid and making sure it is set before each batch and
+ * every context-switch.
+ *
+ * Note that if we fail to enable arbitration before the request
+ * is complete, then we do not see the context-switch interrupt and
+ * the engine hangs (with RING_HEAD == RING_TAIL).
+ *
+ * That satisfies both the GPGPU w/a and our heavy-handed paranoia.
+ */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
/* FIXME(BDW): Address space and security selectors. */
*cs++ = MI_BATCH_BUFFER_START_GEN8 |
(flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) |
(flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
- *cs++ = MI_NOOP;
intel_ring_advance(req, cs);
return 0;
@@ -1648,7 +1771,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
*/
static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
{
- *cs++ = MI_NOOP;
+ /* Ensure there's always at least one preemption point per-request. */
+ *cs++ = MI_ARB_CHECK;
*cs++ = MI_NOOP;
request->wa_tail = intel_ring_offset(request, cs);
}
@@ -1669,7 +1793,6 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
gen8_emit_wa_tail(request, cs);
}
-
static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
@@ -1697,7 +1820,6 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
gen8_emit_wa_tail(request, cs);
}
-
static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 314adee7127a..689fde1a63a9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -61,6 +61,7 @@
enum {
INTEL_CONTEXT_SCHEDULE_IN = 0,
INTEL_CONTEXT_SCHEDULE_OUT,
+ INTEL_CONTEXT_SCHEDULE_PREEMPTED,
};
/* Logical Rings */
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index a55954a89148..38572d65e46e 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -939,10 +939,8 @@ void intel_lvds_init(struct drm_i915_private *dev_priv)
struct drm_display_mode *fixed_mode = NULL;
struct drm_display_mode *downclock_mode = NULL;
struct edid *edid;
- struct intel_crtc *crtc;
i915_reg_t lvds_reg;
u32 lvds;
- int pipe;
u8 pin;
u32 allowed_scalers;
@@ -1113,22 +1111,11 @@ void intel_lvds_init(struct drm_i915_private *dev_priv)
* on. If so, assume that whatever is currently programmed is the
* correct mode.
*/
-
- /* Ironlake: FIXME if still fail, not try pipe mode now */
- if (HAS_PCH_SPLIT(dev_priv))
- goto failed;
-
- pipe = (lvds & LVDS_PIPEB_SELECT) ? 1 : 0;
- crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
-
- if (crtc && (lvds & LVDS_PORT_EN)) {
- fixed_mode = intel_crtc_mode_get(dev, &crtc->base);
- if (fixed_mode) {
- DRM_DEBUG_KMS("using current (BIOS) mode: ");
- drm_mode_debug_printmodeline(fixed_mode);
- fixed_mode->type |= DRM_MODE_TYPE_PREFERRED;
- goto out;
- }
+ fixed_mode = intel_encoder_current_mode(intel_encoder);
+ if (fixed_mode) {
+ DRM_DEBUG_KMS("using current (BIOS) mode: ");
+ drm_mode_debug_printmodeline(fixed_mode);
+ fixed_mode->type |= DRM_MODE_TYPE_PREFERRED;
}
/* If we still don't have a mode after all that, give up. */
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c
index 96043a51c1bf..899839f2f7c6 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
@@ -206,11 +206,11 @@ static const char *pipe_crc_source_name(enum intel_pipe_crc_source source)
static int display_crc_ctl_show(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = m->private;
- int i;
+ enum pipe pipe;
- for (i = 0; i < I915_MAX_PIPES; i++)
- seq_printf(m, "%c %s\n", pipe_name(i),
- pipe_crc_source_name(dev_priv->pipe_crc[i].source));
+ for_each_pipe(dev_priv, pipe)
+ seq_printf(m, "%c %s\n", pipe_name(pipe),
+ pipe_crc_source_name(dev_priv->pipe_crc[pipe].source));
return 0;
}
@@ -775,11 +775,12 @@ display_crc_ctl_parse_object(const char *buf, enum intel_pipe_crc_object *o)
return -EINVAL;
}
-static int display_crc_ctl_parse_pipe(const char *buf, enum pipe *pipe)
+static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv,
+ const char *buf, enum pipe *pipe)
{
const char name = buf[0];
- if (name < 'A' || name >= pipe_name(I915_MAX_PIPES))
+ if (name < 'A' || name >= pipe_name(INTEL_INFO(dev_priv)->num_pipes))
return -EINVAL;
*pipe = name - 'A';
@@ -828,7 +829,7 @@ static int display_crc_ctl_parse(struct drm_i915_private *dev_priv,
return -EINVAL;
}
- if (display_crc_ctl_parse_pipe(words[1], &pipe) < 0) {
+ if (display_crc_ctl_parse_pipe(dev_priv, words[1], &pipe) < 0) {
DRM_DEBUG_DRIVER("unknown pipe %s\n", words[1]);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c66af09e27a7..2fcff9788b6f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -322,7 +322,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
{
u32 val;
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
if (enable)
@@ -337,14 +337,14 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
}
static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
{
u32 val;
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
if (enable)
@@ -353,7 +353,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
val &= ~DSP_MAXFIFO_PM5_ENABLE;
vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
}
#define FW_WM(value, plane) \
@@ -2790,11 +2790,11 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
/* read the first set of memory latencies[0:3] */
val = 0; /* data0 to be programmed to 0 for first set */
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
ret = sandybridge_pcode_read(dev_priv,
GEN9_PCODE_READ_MEM_LATENCY,
&val);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
DRM_ERROR("SKL Mailbox read error = %d\n", ret);
@@ -2811,11 +2811,11 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
/* read the second set of memory latencies[4:7] */
val = 1; /* data0 to be programmed to 1 for second set */
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
ret = sandybridge_pcode_read(dev_priv,
GEN9_PCODE_READ_MEM_LATENCY,
&val);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
DRM_ERROR("SKL Mailbox read error = %d\n", ret);
return;
@@ -3608,13 +3608,13 @@ intel_enable_sagv(struct drm_i915_private *dev_priv)
return 0;
DRM_DEBUG_KMS("Enabling the SAGV\n");
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
GEN9_SAGV_ENABLE);
/* We don't need to wait for the SAGV when enabling */
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
/*
* Some skl systems, pre-release machines in particular,
@@ -3645,14 +3645,14 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
return 0;
DRM_DEBUG_KMS("Disabling the SAGV\n");
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
/* bspec says to keep retrying for at least 1 ms */
ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
GEN9_SAGV_DISABLE,
GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
1);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
/*
* Some skl systems, pre-release machines in particular,
@@ -4820,16 +4820,18 @@ static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
return a->start < b->end && b->start < a->end;
}
-bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries,
+bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv,
+ const struct skl_ddb_entry **entries,
const struct skl_ddb_entry *ddb,
int ignore)
{
- int i;
+ enum pipe pipe;
- for (i = 0; i < I915_MAX_PIPES; i++)
- if (i != ignore && entries[i] &&
- skl_ddb_entries_overlap(ddb, entries[i]))
+ for_each_pipe(dev_priv, pipe) {
+ if (pipe != ignore && entries[pipe] &&
+ skl_ddb_entries_overlap(ddb, entries[pipe]))
return true;
+ }
return false;
}
@@ -5619,7 +5621,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
wm->level = VLV_WM_LEVEL_PM2;
if (IS_CHERRYVIEW(dev_priv)) {
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
if (val & DSP_MAXFIFO_PM5_ENABLE)
@@ -5649,7 +5651,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
wm->level = VLV_WM_LEVEL_DDR_DVFS;
}
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
}
for_each_intel_crtc(dev, crtc) {
@@ -5827,6 +5829,12 @@ void intel_enable_ipc(struct drm_i915_private *dev_priv)
{
u32 val;
+ /* Display WA #0477 WaDisableIPC: skl */
+ if (IS_SKYLAKE(dev_priv)) {
+ dev_priv->ipc_enabled = false;
+ return;
+ }
+
val = I915_READ(DISP_ARB_CTL2);
if (dev_priv->ipc_enabled)
@@ -5980,6 +5988,7 @@ static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
*/
static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 limits;
/* Only set the down limit when we've reached the lowest level to avoid
@@ -5989,13 +5998,13 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
* frequency, if the down threshold expires in that window we will not
* receive a down interrupt. */
if (INTEL_GEN(dev_priv) >= 9) {
- limits = (dev_priv->rps.max_freq_softlimit) << 23;
- if (val <= dev_priv->rps.min_freq_softlimit)
- limits |= (dev_priv->rps.min_freq_softlimit) << 14;
+ limits = (rps->max_freq_softlimit) << 23;
+ if (val <= rps->min_freq_softlimit)
+ limits |= (rps->min_freq_softlimit) << 14;
} else {
- limits = dev_priv->rps.max_freq_softlimit << 24;
- if (val <= dev_priv->rps.min_freq_softlimit)
- limits |= dev_priv->rps.min_freq_softlimit << 16;
+ limits = rps->max_freq_softlimit << 24;
+ if (val <= rps->min_freq_softlimit)
+ limits |= rps->min_freq_softlimit << 16;
}
return limits;
@@ -6003,39 +6012,40 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
int new_power;
u32 threshold_up = 0, threshold_down = 0; /* in % */
u32 ei_up = 0, ei_down = 0;
- new_power = dev_priv->rps.power;
- switch (dev_priv->rps.power) {
+ new_power = rps->power;
+ switch (rps->power) {
case LOW_POWER:
- if (val > dev_priv->rps.efficient_freq + 1 &&
- val > dev_priv->rps.cur_freq)
+ if (val > rps->efficient_freq + 1 &&
+ val > rps->cur_freq)
new_power = BETWEEN;
break;
case BETWEEN:
- if (val <= dev_priv->rps.efficient_freq &&
- val < dev_priv->rps.cur_freq)
+ if (val <= rps->efficient_freq &&
+ val < rps->cur_freq)
new_power = LOW_POWER;
- else if (val >= dev_priv->rps.rp0_freq &&
- val > dev_priv->rps.cur_freq)
+ else if (val >= rps->rp0_freq &&
+ val > rps->cur_freq)
new_power = HIGH_POWER;
break;
case HIGH_POWER:
- if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 &&
- val < dev_priv->rps.cur_freq)
+ if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
+ val < rps->cur_freq)
new_power = BETWEEN;
break;
}
/* Max/min bins are special */
- if (val <= dev_priv->rps.min_freq_softlimit)
+ if (val <= rps->min_freq_softlimit)
new_power = LOW_POWER;
- if (val >= dev_priv->rps.max_freq_softlimit)
+ if (val >= rps->max_freq_softlimit)
new_power = HIGH_POWER;
- if (new_power == dev_priv->rps.power)
+ if (new_power == rps->power)
return;
/* Note the units here are not exactly 1us, but 1280ns. */
@@ -6098,20 +6108,21 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
GEN6_RP_DOWN_IDLE_AVG);
skip_hw_write:
- dev_priv->rps.power = new_power;
- dev_priv->rps.up_threshold = threshold_up;
- dev_priv->rps.down_threshold = threshold_down;
- dev_priv->rps.last_adj = 0;
+ rps->power = new_power;
+ rps->up_threshold = threshold_up;
+ rps->down_threshold = threshold_down;
+ rps->last_adj = 0;
}
static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 mask = 0;
/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
- if (val > dev_priv->rps.min_freq_softlimit)
+ if (val > rps->min_freq_softlimit)
mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
- if (val < dev_priv->rps.max_freq_softlimit)
+ if (val < rps->max_freq_softlimit)
mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
mask &= dev_priv->pm_rps_events;
@@ -6124,10 +6135,12 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
* update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
/* min/max delay may still have been modified so be sure to
* write the limits value.
*/
- if (val != dev_priv->rps.cur_freq) {
+ if (val != rps->cur_freq) {
gen6_set_rps_thresholds(dev_priv, val);
if (INTEL_GEN(dev_priv) >= 9)
@@ -6149,7 +6162,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
- dev_priv->rps.cur_freq = val;
+ rps->cur_freq = val;
trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
return 0;
@@ -6165,7 +6178,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
- if (val != dev_priv->rps.cur_freq) {
+ if (val != dev_priv->gt_pm.rps.cur_freq) {
err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
if (err)
return err;
@@ -6173,7 +6186,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
gen6_set_rps_thresholds(dev_priv, val);
}
- dev_priv->rps.cur_freq = val;
+ dev_priv->gt_pm.rps.cur_freq = val;
trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
return 0;
@@ -6188,10 +6201,11 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
*/
static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
{
- u32 val = dev_priv->rps.idle_freq;
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+ u32 val = rps->idle_freq;
int err;
- if (dev_priv->rps.cur_freq <= val)
+ if (rps->cur_freq <= val)
return;
/* The punit delays the write of the frequency and voltage until it
@@ -6216,34 +6230,38 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
void gen6_rps_busy(struct drm_i915_private *dev_priv)
{
- mutex_lock(&dev_priv->rps.hw_lock);
- if (dev_priv->rps.enabled) {
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+ mutex_lock(&dev_priv->pcu_lock);
+ if (rps->enabled) {
u8 freq;
if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
gen6_rps_reset_ei(dev_priv);
I915_WRITE(GEN6_PMINTRMSK,
- gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+ gen6_rps_pm_mask(dev_priv, rps->cur_freq));
gen6_enable_rps_interrupts(dev_priv);
/* Use the user's desired frequency as a guide, but for better
* performance, jump directly to RPe as our starting frequency.
*/
- freq = max(dev_priv->rps.cur_freq,
- dev_priv->rps.efficient_freq);
+ freq = max(rps->cur_freq,
+ rps->efficient_freq);
if (intel_set_rps(dev_priv,
clamp(freq,
- dev_priv->rps.min_freq_softlimit,
- dev_priv->rps.max_freq_softlimit)))
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit)))
DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
}
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
}
void gen6_rps_idle(struct drm_i915_private *dev_priv)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
/* Flush our bottom-half so that it does not race with us
* setting the idle frequency and so that it is bounded by
* our rpm wakeref. And then disable the interrupts to stop any
@@ -6251,36 +6269,36 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
*/
gen6_disable_rps_interrupts(dev_priv);
- mutex_lock(&dev_priv->rps.hw_lock);
- if (dev_priv->rps.enabled) {
+ mutex_lock(&dev_priv->pcu_lock);
+ if (rps->enabled) {
if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
vlv_set_rps_idle(dev_priv);
else
- gen6_set_rps(dev_priv, dev_priv->rps.idle_freq);
- dev_priv->rps.last_adj = 0;
+ gen6_set_rps(dev_priv, rps->idle_freq);
+ rps->last_adj = 0;
I915_WRITE(GEN6_PMINTRMSK,
gen6_sanitize_rps_pm_mask(dev_priv, ~0));
}
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
}
void gen6_rps_boost(struct drm_i915_gem_request *rq,
- struct intel_rps_client *rps)
+ struct intel_rps_client *rps_client)
{
- struct drm_i915_private *i915 = rq->i915;
+ struct intel_rps *rps = &rq->i915->gt_pm.rps;
unsigned long flags;
bool boost;
/* This is intentionally racy! We peek at the state here, then
* validate inside the RPS worker.
*/
- if (!i915->rps.enabled)
+ if (!rps->enabled)
return;
boost = false;
spin_lock_irqsave(&rq->lock, flags);
if (!rq->waitboost && !i915_gem_request_completed(rq)) {
- atomic_inc(&i915->rps.num_waiters);
+ atomic_inc(&rps->num_waiters);
rq->waitboost = true;
boost = true;
}
@@ -6288,22 +6306,23 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq,
if (!boost)
return;
- if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq)
- schedule_work(&i915->rps.work);
+ if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
+ schedule_work(&rps->work);
- atomic_inc(rps ? &rps->boosts : &i915->rps.boosts);
+ atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
}
int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
int err;
- lockdep_assert_held(&dev_priv->rps.hw_lock);
- GEM_BUG_ON(val > dev_priv->rps.max_freq);
- GEM_BUG_ON(val < dev_priv->rps.min_freq);
+ lockdep_assert_held(&dev_priv->pcu_lock);
+ GEM_BUG_ON(val > rps->max_freq);
+ GEM_BUG_ON(val < rps->min_freq);
- if (!dev_priv->rps.enabled) {
- dev_priv->rps.cur_freq = val;
+ if (!rps->enabled) {
+ rps->cur_freq = val;
return 0;
}
@@ -6326,21 +6345,30 @@ static void gen9_disable_rps(struct drm_i915_private *dev_priv)
I915_WRITE(GEN6_RP_CONTROL, 0);
}
-static void gen6_disable_rps(struct drm_i915_private *dev_priv)
+static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
{
I915_WRITE(GEN6_RC_CONTROL, 0);
+}
+
+static void gen6_disable_rps(struct drm_i915_private *dev_priv)
+{
I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
I915_WRITE(GEN6_RP_CONTROL, 0);
}
-static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
+static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
{
I915_WRITE(GEN6_RC_CONTROL, 0);
}
-static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
+static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
{
- /* we're doing forcewake before Disabling RC6,
+ I915_WRITE(GEN6_RP_CONTROL, 0);
+}
+
+static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
+{
+ /* We're doing forcewake before Disabling RC6,
* This what the BIOS expects when going into suspend */
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
@@ -6349,6 +6377,11 @@ static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}
+static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
+{
+ I915_WRITE(GEN6_RP_CONTROL, 0);
+}
+
static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode)
{
if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
@@ -6471,24 +6504,26 @@ int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6)
static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
/* All of these values are in units of 50MHz */
/* static values from HW: RP0 > RP1 > RPn (min_freq) */
if (IS_GEN9_LP(dev_priv)) {
u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
- dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff;
- dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff;
- dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff;
+ rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
+ rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
+ rps->min_freq = (rp_state_cap >> 0) & 0xff;
} else {
u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
- dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff;
- dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff;
- dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff;
+ rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
+ rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
+ rps->min_freq = (rp_state_cap >> 16) & 0xff;
}
/* hw_max = RP0 until we check for overclocking */
- dev_priv->rps.max_freq = dev_priv->rps.rp0_freq;
+ rps->max_freq = rps->rp0_freq;
- dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
+ rps->efficient_freq = rps->rp1_freq;
if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
u32 ddcc_status = 0;
@@ -6496,33 +6531,34 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
if (sandybridge_pcode_read(dev_priv,
HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
&ddcc_status) == 0)
- dev_priv->rps.efficient_freq =
+ rps->efficient_freq =
clamp_t(u8,
((ddcc_status >> 8) & 0xff),
- dev_priv->rps.min_freq,
- dev_priv->rps.max_freq);
+ rps->min_freq,
+ rps->max_freq);
}
if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
/* Store the frequency values in 16.66 MHZ units, which is
* the natural hardware unit for SKL
*/
- dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
- dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER;
- dev_priv->rps.min_freq *= GEN9_FREQ_SCALER;
- dev_priv->rps.max_freq *= GEN9_FREQ_SCALER;
- dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER;
+ rps->rp0_freq *= GEN9_FREQ_SCALER;
+ rps->rp1_freq *= GEN9_FREQ_SCALER;
+ rps->min_freq *= GEN9_FREQ_SCALER;
+ rps->max_freq *= GEN9_FREQ_SCALER;
+ rps->efficient_freq *= GEN9_FREQ_SCALER;
}
}
static void reset_rps(struct drm_i915_private *dev_priv,
int (*set)(struct drm_i915_private *, u8))
{
- u8 freq = dev_priv->rps.cur_freq;
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+ u8 freq = rps->cur_freq;
/* force a reset */
- dev_priv->rps.power = -1;
- dev_priv->rps.cur_freq = -1;
+ rps->power = -1;
+ rps->cur_freq = -1;
if (set(dev_priv, freq))
DRM_ERROR("Failed to reset RPS to initial values\n");
@@ -6535,7 +6571,7 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv)
/* Program defaults and thresholds for RPS*/
I915_WRITE(GEN6_RC_VIDEO_FREQ,
- GEN9_FREQUENCY(dev_priv->rps.rp1_freq));
+ GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
/* 1 second timeout*/
I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
@@ -6589,7 +6625,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
/* 3a: Enable RC6 */
- if (intel_enable_rc6() & INTEL_RC6_ENABLE)
+ if (intel_rc6_enabled() & INTEL_RC6_ENABLE)
rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE));
I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
@@ -6609,7 +6645,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}
-static void gen8_enable_rps(struct drm_i915_private *dev_priv)
+static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -6618,7 +6654,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv)
/* 1a: Software RC state - RC0 */
I915_WRITE(GEN6_RC_STATE, 0);
- /* 1c & 1d: Get forcewake during program sequence. Although the driver
+ /* 1b: Get forcewake during program sequence. Although the driver
* hasn't enabled a state yet where we need forcewake, BIOS may have.*/
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
@@ -6632,36 +6668,38 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv)
for_each_engine(engine, dev_priv, id)
I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
I915_WRITE(GEN6_RC_SLEEP, 0);
- if (IS_BROADWELL(dev_priv))
- I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
- else
- I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
+ I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
/* 3: Enable RC6 */
- if (intel_enable_rc6() & INTEL_RC6_ENABLE)
+ if (intel_rc6_enabled() & INTEL_RC6_ENABLE)
rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
intel_print_rc6_info(dev_priv, rc6_mask);
- if (IS_BROADWELL(dev_priv))
- I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
- GEN7_RC_CTL_TO_MODE |
- rc6_mask);
- else
- I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
- GEN6_RC_CTL_EI_MODE(1) |
- rc6_mask);
- /* 4 Program defaults and thresholds for RPS*/
+ I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
+ GEN7_RC_CTL_TO_MODE |
+ rc6_mask);
+
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen8_enable_rps(struct drm_i915_private *dev_priv)
+{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+ /* 1 Program defaults and thresholds for RPS*/
I915_WRITE(GEN6_RPNSWREQ,
- HSW_FREQUENCY(dev_priv->rps.rp1_freq));
+ HSW_FREQUENCY(rps->rp1_freq));
I915_WRITE(GEN6_RC_VIDEO_FREQ,
- HSW_FREQUENCY(dev_priv->rps.rp1_freq));
+ HSW_FREQUENCY(rps->rp1_freq));
/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
/* Docs recommend 900MHz, and 300 MHz respectively */
I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
- dev_priv->rps.max_freq_softlimit << 24 |
- dev_priv->rps.min_freq_softlimit << 16);
+ rps->max_freq_softlimit << 24 |
+ rps->min_freq_softlimit << 16);
I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
@@ -6670,7 +6708,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv)
I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
- /* 5: Enable RPS */
+ /* 2: Enable RPS */
I915_WRITE(GEN6_RP_CONTROL,
GEN6_RP_MEDIA_TURBO |
GEN6_RP_MEDIA_HW_NORMAL_MODE |
@@ -6679,14 +6717,12 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv)
GEN6_RP_UP_BUSY_AVG |
GEN6_RP_DOWN_IDLE_AVG);
- /* 6: Ring frequency + overclocking (our driver does this later */
-
reset_rps(dev_priv, gen6_set_rps);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}
-static void gen6_enable_rps(struct drm_i915_private *dev_priv)
+static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -6695,14 +6731,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv)
int rc6_mode;
int ret;
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
-
- /* Here begins a magic sequence of register writes to enable
- * auto-downclocking.
- *
- * Perhaps there might be some value in exposing these to
- * userspace...
- */
I915_WRITE(GEN6_RC_STATE, 0);
/* Clear the DBG now so we don't confuse earlier errors */
@@ -6736,7 +6764,7 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv)
I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
/* Check if we are enabling RC6 */
- rc6_mode = intel_enable_rc6();
+ rc6_mode = intel_rc6_enabled();
if (rc6_mode & INTEL_RC6_ENABLE)
rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
@@ -6756,12 +6784,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv)
GEN6_RC_CTL_EI_MODE(1) |
GEN6_RC_CTL_HW_ENABLE);
- /* Power down if completely idle for over 50ms */
- I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
- I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
- reset_rps(dev_priv, gen6_set_rps);
-
rc6vids = 0;
ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
if (IS_GEN6(dev_priv) && ret) {
@@ -6779,8 +6801,28 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}
+static void gen6_enable_rps(struct drm_i915_private *dev_priv)
+{
+ /* Here begins a magic sequence of register writes to enable
+ * auto-downclocking.
+ *
+ * Perhaps there might be some value in exposing these to
+ * userspace...
+ */
+ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+ /* Power down if completely idle for over 50ms */
+ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
+ I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ reset_rps(dev_priv, gen6_set_rps);
+
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
int min_freq = 15;
unsigned int gpu_freq;
unsigned int max_ia_freq, min_ring_freq;
@@ -6788,7 +6830,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
int scaling_factor = 180;
struct cpufreq_policy *policy;
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+ WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
policy = cpufreq_cpu_get(0);
if (policy) {
@@ -6811,11 +6853,11 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
/* Convert GT frequency to 50 HZ units */
- min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER;
- max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER;
+ min_gpu_freq = rps->min_freq / GEN9_FREQ_SCALER;
+ max_gpu_freq = rps->max_freq / GEN9_FREQ_SCALER;
} else {
- min_gpu_freq = dev_priv->rps.min_freq;
- max_gpu_freq = dev_priv->rps.max_freq;
+ min_gpu_freq = rps->min_freq;
+ max_gpu_freq = rps->max_freq;
}
/*
@@ -7066,17 +7108,18 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
{
- dev_priv->rps.gpll_ref_freq =
+ dev_priv->gt_pm.rps.gpll_ref_freq =
vlv_get_cck_clock(dev_priv, "GPLL ref",
CCK_GPLL_CLOCK_CONTROL,
dev_priv->czclk_freq);
DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
- dev_priv->rps.gpll_ref_freq);
+ dev_priv->gt_pm.rps.gpll_ref_freq);
}
static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 val;
valleyview_setup_pctx(dev_priv);
@@ -7098,30 +7141,31 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
}
DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
- dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
- dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
+ rps->max_freq = valleyview_rps_max_freq(dev_priv);
+ rps->rp0_freq = rps->max_freq;
DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
- dev_priv->rps.max_freq);
+ intel_gpu_freq(dev_priv, rps->max_freq),
+ rps->max_freq);
- dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
+ rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
- dev_priv->rps.efficient_freq);
+ intel_gpu_freq(dev_priv, rps->efficient_freq),
+ rps->efficient_freq);
- dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv);
+ rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
- dev_priv->rps.rp1_freq);
+ intel_gpu_freq(dev_priv, rps->rp1_freq),
+ rps->rp1_freq);
- dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
+ rps->min_freq = valleyview_rps_min_freq(dev_priv);
DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
- dev_priv->rps.min_freq);
+ intel_gpu_freq(dev_priv, rps->min_freq),
+ rps->min_freq);
}
static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 val;
cherryview_setup_pctx(dev_priv);
@@ -7142,31 +7186,29 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
}
DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
- dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
- dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
+ rps->max_freq = cherryview_rps_max_freq(dev_priv);
+ rps->rp0_freq = rps->max_freq;
DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
- dev_priv->rps.max_freq);
+ intel_gpu_freq(dev_priv, rps->max_freq),
+ rps->max_freq);
- dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv);
+ rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
- dev_priv->rps.efficient_freq);
+ intel_gpu_freq(dev_priv, rps->efficient_freq),
+ rps->efficient_freq);
- dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv);
+ rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
- dev_priv->rps.rp1_freq);
+ intel_gpu_freq(dev_priv, rps->rp1_freq),
+ rps->rp1_freq);
- dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv);
+ rps->min_freq = cherryview_rps_min_freq(dev_priv);
DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
- dev_priv->rps.min_freq);
+ intel_gpu_freq(dev_priv, rps->min_freq),
+ rps->min_freq);
- WARN_ONCE((dev_priv->rps.max_freq |
- dev_priv->rps.efficient_freq |
- dev_priv->rps.rp1_freq |
- dev_priv->rps.min_freq) & 1,
+ WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
+ rps->min_freq) & 1,
"Odd GPU freq values\n");
}
@@ -7175,13 +7217,11 @@ static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
valleyview_cleanup_pctx(dev_priv);
}
-static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
+static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- u32 gtfifodbg, val, rc6_mode = 0, pcbr;
-
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+ u32 gtfifodbg, rc6_mode = 0, pcbr;
gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
GT_FIFO_FREE_ENTRIES_CHV);
@@ -7212,7 +7252,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
/* TO threshold set to 500 us ( 0x186 * 1.28 us) */
I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
- /* allows RC6 residency counter to work */
+ /* Allows RC6 residency counter to work */
I915_WRITE(VLV_COUNTER_CONTROL,
_MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
VLV_MEDIA_RC6_COUNT_EN |
@@ -7222,13 +7262,22 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
pcbr = I915_READ(VLV_PCBR);
/* 3: Enable RC6 */
- if ((intel_enable_rc6() & INTEL_RC6_ENABLE) &&
+ if ((intel_rc6_enabled() & INTEL_RC6_ENABLE) &&
(pcbr >> VLV_PCBR_ADDR_SHIFT))
rc6_mode = GEN7_RC_CTL_TO_MODE;
I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
- /* 4 Program defaults and thresholds for RPS*/
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
+{
+ u32 val;
+
+ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+ /* 1: Program defaults and thresholds for RPS*/
I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
@@ -7237,7 +7286,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
- /* 5: Enable RPS */
+ /* 2: Enable RPS */
I915_WRITE(GEN6_RP_CONTROL,
GEN6_RP_MEDIA_HW_NORMAL_MODE |
GEN6_RP_MEDIA_IS_GFX |
@@ -7264,13 +7313,11 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}
-static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
+static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- u32 gtfifodbg, val, rc6_mode = 0;
-
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+ u32 gtfifodbg, rc6_mode = 0;
valleyview_check_pctx(dev_priv);
@@ -7281,28 +7328,11 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
I915_WRITE(GTFIFODBG, gtfifodbg);
}
- /* If VLV, Forcewake all wells, else re-direct to regular path */
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
/* Disable RC states. */
I915_WRITE(GEN6_RC_CONTROL, 0);
- I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
- I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
- I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
- I915_WRITE(GEN6_RP_UP_EI, 66000);
- I915_WRITE(GEN6_RP_DOWN_EI, 350000);
-
- I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
- I915_WRITE(GEN6_RP_CONTROL,
- GEN6_RP_MEDIA_TURBO |
- GEN6_RP_MEDIA_HW_NORMAL_MODE |
- GEN6_RP_MEDIA_IS_GFX |
- GEN6_RP_ENABLE |
- GEN6_RP_UP_BUSY_AVG |
- GEN6_RP_DOWN_IDLE_CONT);
-
I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
@@ -7312,7 +7342,7 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
- /* allows RC6 residency counter to work */
+ /* Allows RC6 residency counter to work */
I915_WRITE(VLV_COUNTER_CONTROL,
_MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
VLV_MEDIA_RC0_COUNT_EN |
@@ -7320,13 +7350,38 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
VLV_MEDIA_RC6_COUNT_EN |
VLV_RENDER_RC6_COUNT_EN));
- if (intel_enable_rc6() & INTEL_RC6_ENABLE)
+ if (intel_rc6_enabled() & INTEL_RC6_ENABLE)
rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
intel_print_rc6_info(dev_priv, rc6_mode);
I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
+{
+ u32 val;
+
+ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
+ I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
+ I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
+ I915_WRITE(GEN6_RP_UP_EI, 66000);
+ I915_WRITE(GEN6_RP_DOWN_EI, 350000);
+
+ I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ I915_WRITE(GEN6_RP_CONTROL,
+ GEN6_RP_MEDIA_TURBO |
+ GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_IS_GFX |
+ GEN6_RP_ENABLE |
+ GEN6_RP_UP_BUSY_AVG |
+ GEN6_RP_DOWN_IDLE_CONT);
+
/* Setting Fixed Bias */
val = VLV_OVERRIDE_EN |
VLV_SOC_TDP_EN |
@@ -7534,7 +7589,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
lockdep_assert_held(&mchdev_lock);
- pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq));
+ pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
pxvid = (pxvid >> 24) & 0x7f;
ext_v = pvid_to_extvid(dev_priv, pxvid);
@@ -7821,6 +7876,8 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
/*
* RPM depends on RC6 to save restore the GT HW context, so make RC6 a
* requirement.
@@ -7831,7 +7888,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
}
mutex_lock(&dev_priv->drm.struct_mutex);
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
/* Initialize RPS limits (for userspace) */
if (IS_CHERRYVIEW(dev_priv))
@@ -7842,16 +7899,16 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
gen6_init_rps_frequencies(dev_priv);
/* Derive initial user preferences/limits from the hardware limits */
- dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
- dev_priv->rps.cur_freq = dev_priv->rps.idle_freq;
+ rps->idle_freq = rps->min_freq;
+ rps->cur_freq = rps->idle_freq;
- dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
- dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
+ rps->max_freq_softlimit = rps->max_freq;
+ rps->min_freq_softlimit = rps->min_freq;
if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
- dev_priv->rps.min_freq_softlimit =
+ rps->min_freq_softlimit =
max_t(int,
- dev_priv->rps.efficient_freq,
+ rps->efficient_freq,
intel_freq_opcode(dev_priv, 450));
/* After setting max-softlimit, find the overclock max freq */
@@ -7862,16 +7919,16 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
if (params & BIT(31)) { /* OC supported */
DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
- (dev_priv->rps.max_freq & 0xff) * 50,
+ (rps->max_freq & 0xff) * 50,
(params & 0xff) * 50);
- dev_priv->rps.max_freq = params & 0xff;
+ rps->max_freq = params & 0xff;
}
}
/* Finally allow us to boost to max by default */
- dev_priv->rps.boost_freq = dev_priv->rps.max_freq;
+ rps->boost_freq = rps->max_freq;
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
mutex_unlock(&dev_priv->drm.struct_mutex);
intel_autoenable_gt_powersave(dev_priv);
@@ -7899,7 +7956,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
if (INTEL_GEN(dev_priv) < 6)
return;
- if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work))
+ if (cancel_delayed_work_sync(&dev_priv->gt_pm.autoenable_work))
intel_runtime_pm_put(dev_priv);
/* gen6_rps_idle() will be called later to disable interrupts */
@@ -7907,90 +7964,168 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
{
- dev_priv->rps.enabled = true; /* force disabling */
+ dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
+ dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
intel_disable_gt_powersave(dev_priv);
gen6_reset_rps_interrupts(dev_priv);
}
-void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
+static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
{
- if (!READ_ONCE(dev_priv->rps.enabled))
+ lockdep_assert_held(&i915->pcu_lock);
+
+ if (!i915->gt_pm.llc_pstate.enabled)
return;
- mutex_lock(&dev_priv->rps.hw_lock);
+ /* Currently there is no HW configuration to be done to disable. */
- if (INTEL_GEN(dev_priv) >= 9) {
+ i915->gt_pm.llc_pstate.enabled = false;
+}
+
+static void intel_disable_rc6(struct drm_i915_private *dev_priv)
+{
+ lockdep_assert_held(&dev_priv->pcu_lock);
+
+ if (!dev_priv->gt_pm.rc6.enabled)
+ return;
+
+ if (INTEL_GEN(dev_priv) >= 9)
gen9_disable_rc6(dev_priv);
+ else if (IS_CHERRYVIEW(dev_priv))
+ cherryview_disable_rc6(dev_priv);
+ else if (IS_VALLEYVIEW(dev_priv))
+ valleyview_disable_rc6(dev_priv);
+ else if (INTEL_GEN(dev_priv) >= 6)
+ gen6_disable_rc6(dev_priv);
+
+ dev_priv->gt_pm.rc6.enabled = false;
+}
+
+static void intel_disable_rps(struct drm_i915_private *dev_priv)
+{
+ lockdep_assert_held(&dev_priv->pcu_lock);
+
+ if (!dev_priv->gt_pm.rps.enabled)
+ return;
+
+ if (INTEL_GEN(dev_priv) >= 9)
gen9_disable_rps(dev_priv);
- } else if (IS_CHERRYVIEW(dev_priv)) {
+ else if (IS_CHERRYVIEW(dev_priv))
cherryview_disable_rps(dev_priv);
- } else if (IS_VALLEYVIEW(dev_priv)) {
+ else if (IS_VALLEYVIEW(dev_priv))
valleyview_disable_rps(dev_priv);
- } else if (INTEL_GEN(dev_priv) >= 6) {
+ else if (INTEL_GEN(dev_priv) >= 6)
gen6_disable_rps(dev_priv);
- } else if (IS_IRONLAKE_M(dev_priv)) {
+ else if (IS_IRONLAKE_M(dev_priv))
ironlake_disable_drps(dev_priv);
- }
- dev_priv->rps.enabled = false;
- mutex_unlock(&dev_priv->rps.hw_lock);
+ dev_priv->gt_pm.rps.enabled = false;
}
-void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
+void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
{
- /* We shouldn't be disabling as we submit, so this should be less
- * racy than it appears!
- */
- if (READ_ONCE(dev_priv->rps.enabled))
+ mutex_lock(&dev_priv->pcu_lock);
+
+ intel_disable_rc6(dev_priv);
+ intel_disable_rps(dev_priv);
+ if (HAS_LLC(dev_priv))
+ intel_disable_llc_pstate(dev_priv);
+
+ mutex_unlock(&dev_priv->pcu_lock);
+}
+
+static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
+{
+ lockdep_assert_held(&i915->pcu_lock);
+
+ if (i915->gt_pm.llc_pstate.enabled)
return;
- /* Powersaving is controlled by the host when inside a VM */
- if (intel_vgpu_active(dev_priv))
+ gen6_update_ring_freq(i915);
+
+ i915->gt_pm.llc_pstate.enabled = true;
+}
+
+static void intel_enable_rc6(struct drm_i915_private *dev_priv)
+{
+ lockdep_assert_held(&dev_priv->pcu_lock);
+
+ if (dev_priv->gt_pm.rc6.enabled)
return;
- mutex_lock(&dev_priv->rps.hw_lock);
+ if (IS_CHERRYVIEW(dev_priv))
+ cherryview_enable_rc6(dev_priv);
+ else if (IS_VALLEYVIEW(dev_priv))
+ valleyview_enable_rc6(dev_priv);
+ else if (INTEL_GEN(dev_priv) >= 9)
+ gen9_enable_rc6(dev_priv);
+ else if (IS_BROADWELL(dev_priv))
+ gen8_enable_rc6(dev_priv);
+ else if (INTEL_GEN(dev_priv) >= 6)
+ gen6_enable_rc6(dev_priv);
+
+ dev_priv->gt_pm.rc6.enabled = true;
+}
+
+static void intel_enable_rps(struct drm_i915_private *dev_priv)
+{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+ lockdep_assert_held(&dev_priv->pcu_lock);
+
+ if (rps->enabled)
+ return;
if (IS_CHERRYVIEW(dev_priv)) {
cherryview_enable_rps(dev_priv);
} else if (IS_VALLEYVIEW(dev_priv)) {
valleyview_enable_rps(dev_priv);
} else if (INTEL_GEN(dev_priv) >= 9) {
- gen9_enable_rc6(dev_priv);
gen9_enable_rps(dev_priv);
- if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv))
- gen6_update_ring_freq(dev_priv);
} else if (IS_BROADWELL(dev_priv)) {
gen8_enable_rps(dev_priv);
- gen6_update_ring_freq(dev_priv);
} else if (INTEL_GEN(dev_priv) >= 6) {
gen6_enable_rps(dev_priv);
- gen6_update_ring_freq(dev_priv);
} else if (IS_IRONLAKE_M(dev_priv)) {
ironlake_enable_drps(dev_priv);
intel_init_emon(dev_priv);
}
- WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq);
- WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq);
+ WARN_ON(rps->max_freq < rps->min_freq);
+ WARN_ON(rps->idle_freq > rps->max_freq);
+
+ WARN_ON(rps->efficient_freq < rps->min_freq);
+ WARN_ON(rps->efficient_freq > rps->max_freq);
+
+ rps->enabled = true;
+}
+
+void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
+{
+ /* Powersaving is controlled by the host when inside a VM */
+ if (intel_vgpu_active(dev_priv))
+ return;
+
+ mutex_lock(&dev_priv->pcu_lock);
- WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq);
- WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq);
+ intel_enable_rc6(dev_priv);
+ intel_enable_rps(dev_priv);
+ if (HAS_LLC(dev_priv))
+ intel_enable_llc_pstate(dev_priv);
- dev_priv->rps.enabled = true;
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
}
static void __intel_autoenable_gt_powersave(struct work_struct *work)
{
struct drm_i915_private *dev_priv =
- container_of(work, typeof(*dev_priv), rps.autoenable_work.work);
+ container_of(work,
+ typeof(*dev_priv),
+ gt_pm.autoenable_work.work);
struct intel_engine_cs *rcs;
struct drm_i915_gem_request *req;
- if (READ_ONCE(dev_priv->rps.enabled))
- goto out;
-
rcs = dev_priv->engine[RCS];
if (rcs->last_retired_context)
goto out;
@@ -8018,9 +8153,6 @@ out:
void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv)
{
- if (READ_ONCE(dev_priv->rps.enabled))
- return;
-
if (IS_IRONLAKE_M(dev_priv)) {
ironlake_enable_drps(dev_priv);
intel_init_emon(dev_priv);
@@ -8038,7 +8170,7 @@ void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv)
* runtime resume it's necessary).
*/
if (queue_delayed_work(dev_priv->wq,
- &dev_priv->rps.autoenable_work,
+ &dev_priv->gt_pm.autoenable_work,
round_jiffies_up_relative(HZ)))
intel_runtime_pm_get_noresume(dev_priv);
}
@@ -8447,6 +8579,9 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
{
+ /* The GTT cache must be disabled if the system is using 2M pages. */
+ bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
+ I915_GTT_PAGE_SIZE_2M);
enum pipe pipe;
ilk_init_lp_watermarks(dev_priv);
@@ -8481,12 +8616,8 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
/* WaProgramL3SqcReg1Default:bdw */
gen8_set_l3sqc_credits(dev_priv, 30, 2);
- /*
- * WaGttCachingOffByDefault:bdw
- * GTT cache may not work with big pages, so if those
- * are ever enabled GTT cache may need to be disabled.
- */
- I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
+ /* WaGttCachingOffByDefault:bdw */
+ I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
/* WaKVMNotificationOnConfigChange:bdw */
I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
@@ -9067,7 +9198,7 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
{
int status;
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+ WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
/* GEN6_PCODE_* are outside of the forcewake domain, we can
* use te fw I915_READ variants to reduce the amount of work
@@ -9114,7 +9245,7 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
{
int status;
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+ WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
/* GEN6_PCODE_* are outside of the forcewake domain, we can
* use te fw I915_READ variants to reduce the amount of work
@@ -9191,7 +9322,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
u32 status;
int ret;
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+ WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
#define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
&status)
@@ -9233,31 +9364,39 @@ out:
static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
/*
* N = val - 0xb7
* Slow = Fast = GPLL ref * N
*/
- return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * (val - 0xb7), 1000);
+ return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
}
static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
{
- return DIV_ROUND_CLOSEST(1000 * val, dev_priv->rps.gpll_ref_freq) + 0xb7;
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+ return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
}
static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
/*
* N = val / 2
* CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
*/
- return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * val, 2 * 2 * 1000);
+ return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
}
static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
/* CHV needs even values */
- return DIV_ROUND_CLOSEST(2 * 1000 * val, dev_priv->rps.gpll_ref_freq) * 2;
+ return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
}
int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
@@ -9288,14 +9427,14 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
void intel_pm_setup(struct drm_i915_private *dev_priv)
{
- mutex_init(&dev_priv->rps.hw_lock);
+ mutex_init(&dev_priv->pcu_lock);
- INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work,
+ INIT_DELAYED_WORK(&dev_priv->gt_pm.autoenable_work,
__intel_autoenable_gt_powersave);
- atomic_set(&dev_priv->rps.num_waiters, 0);
+ atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
- dev_priv->pm.suspended = false;
- atomic_set(&dev_priv->pm.wakeref_count, 0);
+ dev_priv->runtime_pm.suspended = false;
+ atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
}
static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
@@ -9348,7 +9487,7 @@ u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
{
u64 time_hw, units, div;
- if (!intel_enable_rc6())
+ if (!intel_rc6_enabled())
return 0;
intel_runtime_pm_get(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 05c08b0bc172..4285f09ff8b8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -579,7 +579,16 @@ out:
static void reset_ring_common(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request)
{
- /* Try to restore the logical GPU state to match the continuation
+ /*
+ * RC6 must be prevented until the reset is complete and the engine
+ * reinitialised. If it occurs in the middle of this sequence, the
+ * state written to/loaded from the power context is ill-defined (e.g.
+ * the PP_BASE_DIR may be lost).
+ */
+ assert_forcewakes_active(engine->i915, FORCEWAKE_ALL);
+
+ /*
+ * Try to restore the logical GPU state to match the continuation
* of the request queue. If we skip the context/PD restore, then
* the next request may try to execute assuming that its context
* is valid and loaded on the GPU and so may try to access invalid
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 56d7ae9f298b..17186f067408 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -7,6 +7,8 @@
#include "i915_gem_timeline.h"
#include "i915_selftest.h"
+struct drm_printer;
+
#define I915_CMD_HASH_ORDER 9
/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
@@ -239,6 +241,11 @@ struct intel_engine_execlists {
} port[EXECLIST_MAX_PORTS];
/**
+ * @preempt: are we currently handling a preempting context switch?
+ */
+ bool preempt;
+
+ /**
* @port_mask: number of execlist ports - 1
*/
unsigned int port_mask;
@@ -834,4 +841,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915);
bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
+void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p);
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 7933d1bc6a1c..8af286c63d3b 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -187,7 +187,7 @@ bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
struct i915_power_well *power_well;
bool is_enabled;
- if (dev_priv->pm.suspended)
+ if (dev_priv->runtime_pm.suspended)
return false;
is_enabled = true;
@@ -368,7 +368,7 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv,
{
enum i915_power_well_id id = power_well->id;
bool wait_fuses = power_well->hsw.has_fuses;
- enum skl_power_gate pg;
+ enum skl_power_gate uninitialized_var(pg);
u32 val;
if (wait_fuses) {
@@ -785,7 +785,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) :
PUNIT_PWRGT_PWR_GATE(power_well_id);
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
#define COND \
((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state)
@@ -806,7 +806,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
#undef COND
out:
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
}
static void vlv_power_well_enable(struct drm_i915_private *dev_priv,
@@ -833,7 +833,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
mask = PUNIT_PWRGT_MASK(power_well_id);
ctrl = PUNIT_PWRGT_PWR_ON(power_well_id);
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask;
/*
@@ -852,7 +852,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask;
WARN_ON(ctrl != state);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
return enabled;
}
@@ -1364,7 +1364,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
bool enabled;
u32 state, ctrl;
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe);
/*
@@ -1381,7 +1381,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe);
WARN_ON(ctrl << 16 != state);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
return enabled;
}
@@ -1396,7 +1396,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe);
- mutex_lock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->pcu_lock);
#define COND \
((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state)
@@ -1417,7 +1417,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
#undef COND
out:
- mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_unlock(&dev_priv->pcu_lock);
}
static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv,
@@ -2809,6 +2809,9 @@ static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume
/* 6. Enable DBUF */
gen9_dbuf_enable(dev_priv);
+
+ if (resume && dev_priv->csr.dmc_payload)
+ intel_csr_load_program(dev_priv);
}
static void cnl_display_core_uninit(struct drm_i915_private *dev_priv)
@@ -3125,7 +3128,7 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
ret = pm_runtime_get_sync(kdev);
WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
- atomic_inc(&dev_priv->pm.wakeref_count);
+ atomic_inc(&dev_priv->runtime_pm.wakeref_count);
assert_rpm_wakelock_held(dev_priv);
}
@@ -3159,7 +3162,7 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
return false;
}
- atomic_inc(&dev_priv->pm.wakeref_count);
+ atomic_inc(&dev_priv->runtime_pm.wakeref_count);
assert_rpm_wakelock_held(dev_priv);
return true;
@@ -3190,7 +3193,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
assert_rpm_wakelock_held(dev_priv);
pm_runtime_get_noresume(kdev);
- atomic_inc(&dev_priv->pm.wakeref_count);
+ atomic_inc(&dev_priv->runtime_pm.wakeref_count);
}
/**
@@ -3207,7 +3210,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
struct device *kdev = &pdev->dev;
assert_rpm_wakelock_held(dev_priv);
- atomic_dec(&dev_priv->pm.wakeref_count);
+ atomic_dec(&dev_priv->runtime_pm.wakeref_count);
pm_runtime_mark_last_busy(kdev);
pm_runtime_put_autosuspend(kdev);
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 7d971cb56116..75c872bb8cc9 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -81,7 +81,7 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
{
u32 val = 0;
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+ WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
mutex_lock(&dev_priv->sb_lock);
vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
@@ -95,7 +95,7 @@ int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
{
int err;
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+ WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
mutex_lock(&dev_priv->sb_lock);
err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
@@ -125,7 +125,7 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr)
{
u32 val = 0;
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+ WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
mutex_lock(&dev_priv->sb_lock);
vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC,
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 28a1209d87e2..86fc9b529f2d 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -66,7 +66,13 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode,
1000 * adjusted_mode->crtc_htotal);
}
+/* FIXME: We should instead only take spinlocks once for the entire update
+ * instead of once per mmio. */
+#if IS_ENABLED(CONFIG_PROVE_LOCKING)
+#define VBLANK_EVASION_TIME_US 250
+#else
#define VBLANK_EVASION_TIME_US 100
+#endif
/**
* intel_pipe_update_start() - start update of a set of display registers
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 277477890240..7b938e822fde 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -22,22 +22,9 @@
*
*/
-#include "i915_drv.h"
#include "intel_uc.h"
-#include <linux/firmware.h>
-
-/* Cleans up uC firmware by releasing the firmware GEM obj.
- */
-static void __intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
-{
- struct drm_i915_gem_object *obj;
-
- obj = fetch_and_zero(&uc_fw->obj);
- if (obj)
- i915_gem_object_put(obj);
-
- uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE;
-}
+#include "i915_drv.h"
+#include "i915_guc_submission.h"
/* Reset GuC providing us with fresh state for both GuC and HuC.
*/
@@ -94,198 +81,34 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv)
i915_modparams.enable_guc_submission = HAS_GUC_SCHED(dev_priv);
}
-static void gen8_guc_raise_irq(struct intel_guc *guc)
-{
- struct drm_i915_private *dev_priv = guc_to_i915(guc);
-
- I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
-}
-
void intel_uc_init_early(struct drm_i915_private *dev_priv)
{
- struct intel_guc *guc = &dev_priv->guc;
-
- intel_guc_ct_init_early(&guc->ct);
-
- mutex_init(&guc->send_mutex);
- guc->send = intel_guc_send_nop;
- guc->notify = gen8_guc_raise_irq;
-}
-
-static void fetch_uc_fw(struct drm_i915_private *dev_priv,
- struct intel_uc_fw *uc_fw)
-{
- struct pci_dev *pdev = dev_priv->drm.pdev;
- struct drm_i915_gem_object *obj;
- const struct firmware *fw = NULL;
- struct uc_css_header *css;
- size_t size;
- int err;
-
- if (!uc_fw->path)
- return;
-
- uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING;
-
- DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n",
- intel_uc_fw_status_repr(uc_fw->fetch_status));
-
- err = request_firmware(&fw, uc_fw->path, &pdev->dev);
- if (err)
- goto fail;
- if (!fw)
- goto fail;
-
- DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n",
- uc_fw->path, fw);
-
- /* Check the size of the blob before examining buffer contents */
- if (fw->size < sizeof(struct uc_css_header)) {
- DRM_NOTE("Firmware header is missing\n");
- goto fail;
- }
-
- css = (struct uc_css_header *)fw->data;
-
- /* Firmware bits always start from header */
- uc_fw->header_offset = 0;
- uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw -
- css->key_size_dw - css->exponent_size_dw) * sizeof(u32);
-
- if (uc_fw->header_size != sizeof(struct uc_css_header)) {
- DRM_NOTE("CSS header definition mismatch\n");
- goto fail;
- }
-
- /* then, uCode */
- uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size;
- uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
-
- /* now RSA */
- if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) {
- DRM_NOTE("RSA key size is bad\n");
- goto fail;
- }
- uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size;
- uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
-
- /* At least, it should have header, uCode and RSA. Size of all three. */
- size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size;
- if (fw->size < size) {
- DRM_NOTE("Missing firmware components\n");
- goto fail;
- }
-
- /*
- * The GuC firmware image has the version number embedded at a
- * well-known offset within the firmware blob; note that major / minor
- * version are TWO bytes each (i.e. u16), although all pointers and
- * offsets are defined in terms of bytes (u8).
- */
- switch (uc_fw->type) {
- case INTEL_UC_FW_TYPE_GUC:
- /* Header and uCode will be loaded to WOPCM. Size of the two. */
- size = uc_fw->header_size + uc_fw->ucode_size;
-
- /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */
- if (size > intel_guc_wopcm_size(dev_priv)) {
- DRM_ERROR("Firmware is too large to fit in WOPCM\n");
- goto fail;
- }
- uc_fw->major_ver_found = css->guc.sw_version >> 16;
- uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF;
- break;
-
- case INTEL_UC_FW_TYPE_HUC:
- uc_fw->major_ver_found = css->huc.sw_version >> 16;
- uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF;
- break;
-
- default:
- DRM_ERROR("Unknown firmware type %d\n", uc_fw->type);
- err = -ENOEXEC;
- goto fail;
- }
-
- if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) {
- DRM_NOTE("Skipping %s firmware version check\n",
- intel_uc_fw_type_repr(uc_fw->type));
- } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
- uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
- DRM_NOTE("%s firmware version %d.%d, required %d.%d\n",
- intel_uc_fw_type_repr(uc_fw->type),
- uc_fw->major_ver_found, uc_fw->minor_ver_found,
- uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
- err = -ENOEXEC;
- goto fail;
- }
-
- DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n",
- uc_fw->major_ver_found, uc_fw->minor_ver_found,
- uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
-
- obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size);
- if (IS_ERR(obj)) {
- err = PTR_ERR(obj);
- goto fail;
- }
-
- uc_fw->obj = obj;
- uc_fw->size = fw->size;
-
- DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n",
- uc_fw->obj);
-
- release_firmware(fw);
- uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS;
- return;
-
-fail:
- DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n",
- uc_fw->path, err);
- DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n",
- err, fw, uc_fw->obj);
-
- release_firmware(fw); /* OK even if fw is NULL */
- uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL;
+ intel_guc_init_early(&dev_priv->guc);
}
void intel_uc_init_fw(struct drm_i915_private *dev_priv)
{
- fetch_uc_fw(dev_priv, &dev_priv->huc.fw);
- fetch_uc_fw(dev_priv, &dev_priv->guc.fw);
+ intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw);
+ intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw);
}
void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
{
- __intel_uc_fw_fini(&dev_priv->guc.fw);
- __intel_uc_fw_fini(&dev_priv->huc.fw);
-}
-
-static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i)
-{
- GEM_BUG_ON(!guc->send_regs.base);
- GEM_BUG_ON(!guc->send_regs.count);
- GEM_BUG_ON(i >= guc->send_regs.count);
-
- return _MMIO(guc->send_regs.base + 4 * i);
+ intel_uc_fw_fini(&dev_priv->guc.fw);
+ intel_uc_fw_fini(&dev_priv->huc.fw);
}
-static void guc_init_send_regs(struct intel_guc *guc)
+/**
+ * intel_uc_init_mmio - setup uC MMIO access
+ *
+ * @dev_priv: device private
+ *
+ * Setup minimal state necessary for MMIO accesses later in the
+ * initialization sequence.
+ */
+void intel_uc_init_mmio(struct drm_i915_private *dev_priv)
{
- struct drm_i915_private *dev_priv = guc_to_i915(guc);
- enum forcewake_domains fw_domains = 0;
- unsigned int i;
-
- guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0));
- guc->send_regs.count = SOFT_SCRATCH_COUNT - 1;
-
- for (i = 0; i < guc->send_regs.count; i++) {
- fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
- guc_send_reg(guc, i),
- FW_REG_READ | FW_REG_WRITE);
- }
- guc->send_regs.fw_domains = fw_domains;
+ intel_guc_init_send_regs(&dev_priv->guc);
}
static void guc_capture_load_err_log(struct intel_guc *guc)
@@ -309,8 +132,6 @@ static int guc_enable_communication(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
- guc_init_send_regs(guc);
-
if (HAS_GUC_CT(dev_priv))
return intel_guc_enable_ct(guc);
@@ -328,27 +149,6 @@ static void guc_disable_communication(struct intel_guc *guc)
guc->send = intel_guc_send_nop;
}
-/**
- * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode
- * @guc: intel_guc structure
- * @rsa_offset: rsa offset w.r.t ggtt base of huc vma
- *
- * Triggers a HuC firmware authentication request to the GuC via intel_guc_send
- * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by
- * intel_huc_auth().
- *
- * Return: non-zero code on error
- */
-int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset)
-{
- u32 action[] = {
- INTEL_GUC_ACTION_AUTHENTICATE_HUC,
- rsa_offset
- };
-
- return intel_guc_send(guc, action, ARRAY_SIZE(action));
-}
-
int intel_uc_init_hw(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
@@ -480,82 +280,3 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
i915_ggtt_disable_guc(dev_priv);
}
-
-int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len)
-{
- WARN(1, "Unexpected send: action=%#x\n", *action);
- return -ENODEV;
-}
-
-/*
- * This function implements the MMIO based host to GuC interface.
- */
-int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
-{
- struct drm_i915_private *dev_priv = guc_to_i915(guc);
- u32 status;
- int i;
- int ret;
-
- GEM_BUG_ON(!len);
- GEM_BUG_ON(len > guc->send_regs.count);
-
- /* If CT is available, we expect to use MMIO only during init/fini */
- GEM_BUG_ON(HAS_GUC_CT(dev_priv) &&
- *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
- *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER);
-
- mutex_lock(&guc->send_mutex);
- intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains);
-
- for (i = 0; i < len; i++)
- I915_WRITE(guc_send_reg(guc, i), action[i]);
-
- POSTING_READ(guc_send_reg(guc, i - 1));
-
- intel_guc_notify(guc);
-
- /*
- * No GuC command should ever take longer than 10ms.
- * Fast commands should still complete in 10us.
- */
- ret = __intel_wait_for_register_fw(dev_priv,
- guc_send_reg(guc, 0),
- INTEL_GUC_RECV_MASK,
- INTEL_GUC_RECV_MASK,
- 10, 10, &status);
- if (status != INTEL_GUC_STATUS_SUCCESS) {
- /*
- * Either the GuC explicitly returned an error (which
- * we convert to -EIO here) or no response at all was
- * received within the timeout limit (-ETIMEDOUT)
- */
- if (ret != -ETIMEDOUT)
- ret = -EIO;
-
- DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;"
- " ret=%d status=0x%08X response=0x%08X\n",
- action[0], ret, status, I915_READ(SOFT_SCRATCH(15)));
- }
-
- intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains);
- mutex_unlock(&guc->send_mutex);
-
- return ret;
-}
-
-int intel_guc_sample_forcewake(struct intel_guc *guc)
-{
- struct drm_i915_private *dev_priv = guc_to_i915(guc);
- u32 action[2];
-
- action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE;
- /* WaRsDisableCoarsePowerGating:skl,bxt */
- if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
- action[1] = 0;
- else
- /* bit 0 and 1 are for Render and Media domain separately */
- action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA;
-
- return intel_guc_send(guc, action, ARRAY_SIZE(action));
-}
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 6966349ed737..e18d3bb02088 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -24,237 +24,15 @@
#ifndef _INTEL_UC_H_
#define _INTEL_UC_H_
-#include "intel_guc_fwif.h"
-#include "i915_guc_reg.h"
-#include "intel_ringbuffer.h"
-#include "intel_guc_ct.h"
-#include "i915_vma.h"
+#include "intel_guc.h"
+#include "intel_huc.h"
-struct drm_i915_gem_request;
-
-/*
- * This structure primarily describes the GEM object shared with the GuC.
- * The specs sometimes refer to this object as a "GuC context", but we use
- * the term "client" to avoid confusion with hardware contexts. This
- * GEM object is held for the entire lifetime of our interaction with
- * the GuC, being allocated before the GuC is loaded with its firmware.
- * Because there's no way to update the address used by the GuC after
- * initialisation, the shared object must stay pinned into the GGTT as
- * long as the GuC is in use. We also keep the first page (only) mapped
- * into kernel address space, as it includes shared data that must be
- * updated on every request submission.
- *
- * The single GEM object described here is actually made up of several
- * separate areas, as far as the GuC is concerned. The first page (kept
- * kmap'd) includes the "process descriptor" which holds sequence data for
- * the doorbell, and one cacheline which actually *is* the doorbell; a
- * write to this will "ring the doorbell" (i.e. send an interrupt to the
- * GuC). The subsequent pages of the client object constitute the work
- * queue (a circular array of work items), again described in the process
- * descriptor. Work queue pages are mapped momentarily as required.
- */
-struct i915_guc_client {
- struct i915_vma *vma;
- void *vaddr;
- struct i915_gem_context *owner;
- struct intel_guc *guc;
-
- uint32_t engines; /* bitmap of (host) engine ids */
- uint32_t priority;
- u32 stage_id;
- uint32_t proc_desc_offset;
-
- u16 doorbell_id;
- unsigned long doorbell_offset;
-
- spinlock_t wq_lock;
- /* Per-engine counts of GuC submissions */
- uint64_t submissions[I915_NUM_ENGINES];
-};
-
-enum intel_uc_fw_status {
- INTEL_UC_FIRMWARE_FAIL = -1,
- INTEL_UC_FIRMWARE_NONE = 0,
- INTEL_UC_FIRMWARE_PENDING,
- INTEL_UC_FIRMWARE_SUCCESS
-};
-
-/* User-friendly representation of an enum */
-static inline
-const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
-{
- switch (status) {
- case INTEL_UC_FIRMWARE_FAIL:
- return "FAIL";
- case INTEL_UC_FIRMWARE_NONE:
- return "NONE";
- case INTEL_UC_FIRMWARE_PENDING:
- return "PENDING";
- case INTEL_UC_FIRMWARE_SUCCESS:
- return "SUCCESS";
- }
- return "<invalid>";
-}
-
-enum intel_uc_fw_type {
- INTEL_UC_FW_TYPE_GUC,
- INTEL_UC_FW_TYPE_HUC
-};
-
-/* User-friendly representation of an enum */
-static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type)
-{
- switch (type) {
- case INTEL_UC_FW_TYPE_GUC:
- return "GuC";
- case INTEL_UC_FW_TYPE_HUC:
- return "HuC";
- }
- return "uC";
-}
-
-/*
- * This structure encapsulates all the data needed during the process
- * of fetching, caching, and loading the firmware image into the GuC.
- */
-struct intel_uc_fw {
- const char *path;
- size_t size;
- struct drm_i915_gem_object *obj;
- enum intel_uc_fw_status fetch_status;
- enum intel_uc_fw_status load_status;
-
- uint16_t major_ver_wanted;
- uint16_t minor_ver_wanted;
- uint16_t major_ver_found;
- uint16_t minor_ver_found;
-
- enum intel_uc_fw_type type;
- uint32_t header_size;
- uint32_t header_offset;
- uint32_t rsa_size;
- uint32_t rsa_offset;
- uint32_t ucode_size;
- uint32_t ucode_offset;
-};
-
-struct intel_guc_log {
- uint32_t flags;
- struct i915_vma *vma;
- /* The runtime stuff gets created only when GuC logging gets enabled */
- struct {
- void *buf_addr;
- struct workqueue_struct *flush_wq;
- struct work_struct flush_work;
- struct rchan *relay_chan;
- } runtime;
- /* logging related stats */
- u32 capture_miss_count;
- u32 flush_interrupt_count;
- u32 prev_overflow_count[GUC_MAX_LOG_BUFFER];
- u32 total_overflow_count[GUC_MAX_LOG_BUFFER];
- u32 flush_count[GUC_MAX_LOG_BUFFER];
-};
-
-struct intel_guc {
- struct intel_uc_fw fw;
- struct intel_guc_log log;
- struct intel_guc_ct ct;
-
- /* Log snapshot if GuC errors during load */
- struct drm_i915_gem_object *load_err_log;
-
- /* intel_guc_recv interrupt related state */
- bool interrupts_enabled;
-
- struct i915_vma *ads_vma;
- struct i915_vma *stage_desc_pool;
- void *stage_desc_pool_vaddr;
- struct ida stage_ids;
-
- struct i915_guc_client *execbuf_client;
-
- DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
- uint32_t db_cacheline; /* Cyclic counter mod pagesize */
-
- /* GuC's FW specific registers used in MMIO send */
- struct {
- u32 base;
- unsigned int count;
- enum forcewake_domains fw_domains;
- } send_regs;
-
- /* To serialize the intel_guc_send actions */
- struct mutex send_mutex;
-
- /* GuC's FW specific send function */
- int (*send)(struct intel_guc *guc, const u32 *data, u32 len);
-
- /* GuC's FW specific notify function */
- void (*notify)(struct intel_guc *guc);
-};
-
-struct intel_huc {
- /* Generic uC firmware management */
- struct intel_uc_fw fw;
-
- /* HuC-specific additions */
-};
-
-/* intel_uc.c */
void intel_uc_sanitize_options(struct drm_i915_private *dev_priv);
void intel_uc_init_early(struct drm_i915_private *dev_priv);
+void intel_uc_init_mmio(struct drm_i915_private *dev_priv);
void intel_uc_init_fw(struct drm_i915_private *dev_priv);
void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
int intel_uc_init_hw(struct drm_i915_private *dev_priv);
void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
-int intel_guc_sample_forcewake(struct intel_guc *guc);
-int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
-int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
-int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset);
-
-static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
-{
- return guc->send(guc, action, len);
-}
-
-static inline void intel_guc_notify(struct intel_guc *guc)
-{
- guc->notify(guc);
-}
-
-/* intel_guc_loader.c */
-int intel_guc_select_fw(struct intel_guc *guc);
-int intel_guc_init_hw(struct intel_guc *guc);
-int intel_guc_suspend(struct drm_i915_private *dev_priv);
-int intel_guc_resume(struct drm_i915_private *dev_priv);
-u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
-
-/* i915_guc_submission.c */
-int i915_guc_submission_init(struct drm_i915_private *dev_priv);
-int i915_guc_submission_enable(struct drm_i915_private *dev_priv);
-void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
-void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
-struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
-
-/* intel_guc_log.c */
-int intel_guc_log_create(struct intel_guc *guc);
-void intel_guc_log_destroy(struct intel_guc *guc);
-int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
-void i915_guc_log_register(struct drm_i915_private *dev_priv);
-void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
-
-static inline u32 guc_ggtt_offset(struct i915_vma *vma)
-{
- u32 offset = i915_ggtt_offset(vma);
- GEM_BUG_ON(offset < GUC_WOPCM_TOP);
- GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP));
- return offset;
-}
-
-/* intel_huc.c */
-void intel_huc_select_fw(struct intel_huc *huc);
-void intel_huc_init_hw(struct intel_huc *huc);
-void intel_huc_auth(struct intel_huc *huc);
#endif
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
new file mode 100644
index 000000000000..766b1cbdfbd7
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright © 2016-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+
+#include "intel_uc_fw.h"
+#include "i915_drv.h"
+
+/**
+ * intel_uc_fw_fetch - fetch uC firmware
+ *
+ * @dev_priv: device private
+ * @uc_fw: uC firmware
+ *
+ * Fetch uC firmware into GEM obj.
+ */
+void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
+ struct intel_uc_fw *uc_fw)
+{
+ struct pci_dev *pdev = dev_priv->drm.pdev;
+ struct drm_i915_gem_object *obj;
+ const struct firmware *fw = NULL;
+ struct uc_css_header *css;
+ size_t size;
+ int err;
+
+ if (!uc_fw->path)
+ return;
+
+ uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING;
+
+ DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n",
+ intel_uc_fw_status_repr(uc_fw->fetch_status));
+
+ err = request_firmware(&fw, uc_fw->path, &pdev->dev);
+ if (err)
+ goto fail;
+ if (!fw)
+ goto fail;
+
+ DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n",
+ uc_fw->path, fw);
+
+ /* Check the size of the blob before examining buffer contents */
+ if (fw->size < sizeof(struct uc_css_header)) {
+ DRM_NOTE("Firmware header is missing\n");
+ goto fail;
+ }
+
+ css = (struct uc_css_header *)fw->data;
+
+ /* Firmware bits always start from header */
+ uc_fw->header_offset = 0;
+ uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw -
+ css->key_size_dw - css->exponent_size_dw) *
+ sizeof(u32);
+
+ if (uc_fw->header_size != sizeof(struct uc_css_header)) {
+ DRM_NOTE("CSS header definition mismatch\n");
+ goto fail;
+ }
+
+ /* then, uCode */
+ uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size;
+ uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
+
+ /* now RSA */
+ if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) {
+ DRM_NOTE("RSA key size is bad\n");
+ goto fail;
+ }
+ uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size;
+ uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+
+ /* At least, it should have header, uCode and RSA. Size of all three. */
+ size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size;
+ if (fw->size < size) {
+ DRM_NOTE("Missing firmware components\n");
+ goto fail;
+ }
+
+ /*
+ * The GuC firmware image has the version number embedded at a
+ * well-known offset within the firmware blob; note that major / minor
+ * version are TWO bytes each (i.e. u16), although all pointers and
+ * offsets are defined in terms of bytes (u8).
+ */
+ switch (uc_fw->type) {
+ case INTEL_UC_FW_TYPE_GUC:
+ /* Header and uCode will be loaded to WOPCM. Size of the two. */
+ size = uc_fw->header_size + uc_fw->ucode_size;
+
+ /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */
+ if (size > intel_guc_wopcm_size(dev_priv)) {
+ DRM_ERROR("Firmware is too large to fit in WOPCM\n");
+ goto fail;
+ }
+ uc_fw->major_ver_found = css->guc.sw_version >> 16;
+ uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF;
+ break;
+
+ case INTEL_UC_FW_TYPE_HUC:
+ uc_fw->major_ver_found = css->huc.sw_version >> 16;
+ uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF;
+ break;
+
+ default:
+ DRM_ERROR("Unknown firmware type %d\n", uc_fw->type);
+ err = -ENOEXEC;
+ goto fail;
+ }
+
+ if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) {
+ DRM_NOTE("Skipping %s firmware version check\n",
+ intel_uc_fw_type_repr(uc_fw->type));
+ } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
+ uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
+ DRM_NOTE("%s firmware version %d.%d, required %d.%d\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ uc_fw->major_ver_found, uc_fw->minor_ver_found,
+ uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+ err = -ENOEXEC;
+ goto fail;
+ }
+
+ DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n",
+ uc_fw->major_ver_found, uc_fw->minor_ver_found,
+ uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+
+ obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto fail;
+ }
+
+ uc_fw->obj = obj;
+ uc_fw->size = fw->size;
+
+ DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n",
+ uc_fw->obj);
+
+ release_firmware(fw);
+ uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS;
+ return;
+
+fail:
+ DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n",
+ uc_fw->path, err);
+ DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n",
+ err, fw, uc_fw->obj);
+
+ release_firmware(fw); /* OK even if fw is NULL */
+ uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL;
+}
+
+/**
+ * intel_uc_fw_fini - cleanup uC firmware
+ *
+ * @uc_fw: uC firmware
+ *
+ * Cleans up uC firmware by releasing the firmware GEM obj.
+ */
+void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
+{
+ struct drm_i915_gem_object *obj;
+
+ obj = fetch_and_zero(&uc_fw->obj);
+ if (obj)
+ i915_gem_object_put(obj);
+
+ uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE;
+}
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
new file mode 100644
index 000000000000..c3e9af4b9bf0
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_uc_fw.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright © 2014-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _INTEL_UC_FW_H_
+#define _INTEL_UC_FW_H_
+
+struct drm_i915_private;
+
+enum intel_uc_fw_status {
+ INTEL_UC_FIRMWARE_FAIL = -1,
+ INTEL_UC_FIRMWARE_NONE = 0,
+ INTEL_UC_FIRMWARE_PENDING,
+ INTEL_UC_FIRMWARE_SUCCESS
+};
+
+enum intel_uc_fw_type {
+ INTEL_UC_FW_TYPE_GUC,
+ INTEL_UC_FW_TYPE_HUC
+};
+
+/*
+ * This structure encapsulates all the data needed during the process
+ * of fetching, caching, and loading the firmware image into the uC.
+ */
+struct intel_uc_fw {
+ const char *path;
+ size_t size;
+ struct drm_i915_gem_object *obj;
+ enum intel_uc_fw_status fetch_status;
+ enum intel_uc_fw_status load_status;
+
+ u16 major_ver_wanted;
+ u16 minor_ver_wanted;
+ u16 major_ver_found;
+ u16 minor_ver_found;
+
+ enum intel_uc_fw_type type;
+ u32 header_size;
+ u32 header_offset;
+ u32 rsa_size;
+ u32 rsa_offset;
+ u32 ucode_size;
+ u32 ucode_offset;
+};
+
+static inline
+const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
+{
+ switch (status) {
+ case INTEL_UC_FIRMWARE_FAIL:
+ return "FAIL";
+ case INTEL_UC_FIRMWARE_NONE:
+ return "NONE";
+ case INTEL_UC_FIRMWARE_PENDING:
+ return "PENDING";
+ case INTEL_UC_FIRMWARE_SUCCESS:
+ return "SUCCESS";
+ }
+ return "<invalid>";
+}
+
+static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type)
+{
+ switch (type) {
+ case INTEL_UC_FW_TYPE_GUC:
+ return "GuC";
+ case INTEL_UC_FW_TYPE_HUC:
+ return "HuC";
+ }
+ return "uC";
+}
+
+static inline
+void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type)
+{
+ uc_fw->path = NULL;
+ uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE;
+ uc_fw->load_status = INTEL_UC_FIRMWARE_NONE;
+ uc_fw->type = type;
+}
+
+void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
+ struct intel_uc_fw *uc_fw);
+void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index b3c3f94fc7e4..983617b5b338 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -626,7 +626,23 @@ void assert_forcewakes_inactive(struct drm_i915_private *dev_priv)
if (!dev_priv->uncore.funcs.force_wake_get)
return;
- WARN_ON(dev_priv->uncore.fw_domains_active);
+ WARN(dev_priv->uncore.fw_domains_active,
+ "Expected all fw_domains to be inactive, but %08x are still on\n",
+ dev_priv->uncore.fw_domains_active);
+}
+
+void assert_forcewakes_active(struct drm_i915_private *dev_priv,
+ enum forcewake_domains fw_domains)
+{
+ if (!dev_priv->uncore.funcs.force_wake_get)
+ return;
+
+ assert_rpm_wakelock_held(dev_priv);
+
+ fw_domains &= dev_priv->uncore.fw_domains;
+ WARN(fw_domains & ~dev_priv->uncore.fw_domains_active,
+ "Expected %08x fw_domains to be active, but %08x are off\n",
+ fw_domains, fw_domains & ~dev_priv->uncore.fw_domains_active);
}
/* We give fast paths for the really cool registers */
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index 03786f931905..582771251b57 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -25,6 +25,12 @@
#ifndef __INTEL_UNCORE_H__
#define __INTEL_UNCORE_H__
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <linux/hrtimer.h>
+
+#include "i915_reg.h"
+
struct drm_i915_private;
enum forcewake_domain_id {
@@ -131,6 +137,8 @@ void intel_uncore_resume_early(struct drm_i915_private *dev_priv);
u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);
void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
+void assert_forcewakes_active(struct drm_i915_private *dev_priv,
+ enum forcewake_domains fw_domains);
const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id);
enum forcewake_domains
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
index c5c7e8efbdd3..a2632df39173 100644
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
@@ -37,8 +37,7 @@ static void huge_free_pages(struct drm_i915_gem_object *obj,
kfree(pages);
}
-static struct sg_table *
-huge_get_pages(struct drm_i915_gem_object *obj)
+static int huge_get_pages(struct drm_i915_gem_object *obj)
{
#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
const unsigned long nreal = obj->scratch / PAGE_SIZE;
@@ -49,11 +48,11 @@ huge_get_pages(struct drm_i915_gem_object *obj)
pages = kmalloc(sizeof(*pages), GFP);
if (!pages)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
if (sg_alloc_table(pages, npages, GFP)) {
kfree(pages);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
sg = pages->sgl;
@@ -81,11 +80,14 @@ huge_get_pages(struct drm_i915_gem_object *obj)
if (i915_gem_gtt_prepare_pages(obj, pages))
goto err;
- return pages;
+ __i915_gem_object_set_pages(obj, pages, PAGE_SIZE);
+
+ return 0;
err:
huge_free_pages(obj, pages);
- return ERR_PTR(-ENOMEM);
+
+ return -ENOMEM;
#undef GFP
}
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
new file mode 100644
index 000000000000..c53f8474113a
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -0,0 +1,1734 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "../i915_selftest.h"
+
+#include <linux/prime_numbers.h>
+
+#include "mock_drm.h"
+
+static const unsigned int page_sizes[] = {
+ I915_GTT_PAGE_SIZE_2M,
+ I915_GTT_PAGE_SIZE_64K,
+ I915_GTT_PAGE_SIZE_4K,
+};
+
+static unsigned int get_largest_page_size(struct drm_i915_private *i915,
+ u64 rem)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
+ unsigned int page_size = page_sizes[i];
+
+ if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size)
+ return page_size;
+ }
+
+ return 0;
+}
+
+static void huge_pages_free_pages(struct sg_table *st)
+{
+ struct scatterlist *sg;
+
+ for (sg = st->sgl; sg; sg = __sg_next(sg)) {
+ if (sg_page(sg))
+ __free_pages(sg_page(sg), get_order(sg->length));
+ }
+
+ sg_free_table(st);
+ kfree(st);
+}
+
+static int get_huge_pages(struct drm_i915_gem_object *obj)
+{
+#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
+ unsigned int page_mask = obj->mm.page_mask;
+ struct sg_table *st;
+ struct scatterlist *sg;
+ unsigned int sg_page_sizes;
+ u64 rem;
+
+ st = kmalloc(sizeof(*st), GFP);
+ if (!st)
+ return -ENOMEM;
+
+ if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) {
+ kfree(st);
+ return -ENOMEM;
+ }
+
+ rem = obj->base.size;
+ sg = st->sgl;
+ st->nents = 0;
+ sg_page_sizes = 0;
+
+ /*
+ * Our goal here is simple, we want to greedily fill the object from
+ * largest to smallest page-size, while ensuring that we use *every*
+ * page-size as per the given page-mask.
+ */
+ do {
+ unsigned int bit = ilog2(page_mask);
+ unsigned int page_size = BIT(bit);
+ int order = get_order(page_size);
+
+ do {
+ struct page *page;
+
+ GEM_BUG_ON(order >= MAX_ORDER);
+ page = alloc_pages(GFP | __GFP_ZERO, order);
+ if (!page)
+ goto err;
+
+ sg_set_page(sg, page, page_size, 0);
+ sg_page_sizes |= page_size;
+ st->nents++;
+
+ rem -= page_size;
+ if (!rem) {
+ sg_mark_end(sg);
+ break;
+ }
+
+ sg = __sg_next(sg);
+ } while ((rem - ((page_size-1) & page_mask)) >= page_size);
+
+ page_mask &= (page_size-1);
+ } while (page_mask);
+
+ if (i915_gem_gtt_prepare_pages(obj, st))
+ goto err;
+
+ obj->mm.madv = I915_MADV_DONTNEED;
+
+ GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask);
+ __i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+ return 0;
+
+err:
+ sg_set_page(sg, NULL, 0, 0);
+ sg_mark_end(sg);
+ huge_pages_free_pages(st);
+
+ return -ENOMEM;
+}
+
+static void put_huge_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ i915_gem_gtt_finish_pages(obj, pages);
+ huge_pages_free_pages(pages);
+
+ obj->mm.dirty = false;
+ obj->mm.madv = I915_MADV_WILLNEED;
+}
+
+static const struct drm_i915_gem_object_ops huge_page_ops = {
+ .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+ I915_GEM_OBJECT_IS_SHRINKABLE,
+ .get_pages = get_huge_pages,
+ .put_pages = put_huge_pages,
+};
+
+static struct drm_i915_gem_object *
+huge_pages_object(struct drm_i915_private *i915,
+ u64 size,
+ unsigned int page_mask)
+{
+ struct drm_i915_gem_object *obj;
+
+ GEM_BUG_ON(!size);
+ GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask))));
+
+ if (size >> PAGE_SHIFT > INT_MAX)
+ return ERR_PTR(-E2BIG);
+
+ if (overflows_type(size, obj->base.size))
+ return ERR_PTR(-E2BIG);
+
+ obj = i915_gem_object_alloc(i915);
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+
+ drm_gem_private_object_init(&i915->drm, &obj->base, size);
+ i915_gem_object_init(obj, &huge_page_ops);
+
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->cache_level = I915_CACHE_NONE;
+
+ obj->mm.page_mask = page_mask;
+
+ return obj;
+}
+
+static int fake_get_huge_pages(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ const u64 max_len = rounddown_pow_of_two(UINT_MAX);
+ struct sg_table *st;
+ struct scatterlist *sg;
+ unsigned int sg_page_sizes;
+ u64 rem;
+
+ st = kmalloc(sizeof(*st), GFP);
+ if (!st)
+ return -ENOMEM;
+
+ if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) {
+ kfree(st);
+ return -ENOMEM;
+ }
+
+ /* Use optimal page sized chunks to fill in the sg table */
+ rem = obj->base.size;
+ sg = st->sgl;
+ st->nents = 0;
+ sg_page_sizes = 0;
+ do {
+ unsigned int page_size = get_largest_page_size(i915, rem);
+ unsigned int len = min(page_size * div_u64(rem, page_size),
+ max_len);
+
+ GEM_BUG_ON(!page_size);
+
+ sg->offset = 0;
+ sg->length = len;
+ sg_dma_len(sg) = len;
+ sg_dma_address(sg) = page_size;
+
+ sg_page_sizes |= len;
+
+ st->nents++;
+
+ rem -= len;
+ if (!rem) {
+ sg_mark_end(sg);
+ break;
+ }
+
+ sg = sg_next(sg);
+ } while (1);
+
+ obj->mm.madv = I915_MADV_DONTNEED;
+
+ __i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+ return 0;
+}
+
+static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct sg_table *st;
+ struct scatterlist *sg;
+ unsigned int page_size;
+
+ st = kmalloc(sizeof(*st), GFP);
+ if (!st)
+ return -ENOMEM;
+
+ if (sg_alloc_table(st, 1, GFP)) {
+ kfree(st);
+ return -ENOMEM;
+ }
+
+ sg = st->sgl;
+ st->nents = 1;
+
+ page_size = get_largest_page_size(i915, obj->base.size);
+ GEM_BUG_ON(!page_size);
+
+ sg->offset = 0;
+ sg->length = obj->base.size;
+ sg_dma_len(sg) = obj->base.size;
+ sg_dma_address(sg) = page_size;
+
+ obj->mm.madv = I915_MADV_DONTNEED;
+
+ __i915_gem_object_set_pages(obj, st, sg->length);
+
+ return 0;
+#undef GFP
+}
+
+static void fake_free_huge_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ sg_free_table(pages);
+ kfree(pages);
+}
+
+static void fake_put_huge_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ fake_free_huge_pages(obj, pages);
+ obj->mm.dirty = false;
+ obj->mm.madv = I915_MADV_WILLNEED;
+}
+
+static const struct drm_i915_gem_object_ops fake_ops = {
+ .flags = I915_GEM_OBJECT_IS_SHRINKABLE,
+ .get_pages = fake_get_huge_pages,
+ .put_pages = fake_put_huge_pages,
+};
+
+static const struct drm_i915_gem_object_ops fake_ops_single = {
+ .flags = I915_GEM_OBJECT_IS_SHRINKABLE,
+ .get_pages = fake_get_huge_pages_single,
+ .put_pages = fake_put_huge_pages,
+};
+
+static struct drm_i915_gem_object *
+fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
+{
+ struct drm_i915_gem_object *obj;
+
+ GEM_BUG_ON(!size);
+ GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+
+ if (size >> PAGE_SHIFT > UINT_MAX)
+ return ERR_PTR(-E2BIG);
+
+ if (overflows_type(size, obj->base.size))
+ return ERR_PTR(-E2BIG);
+
+ obj = i915_gem_object_alloc(i915);
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+
+ drm_gem_private_object_init(&i915->drm, &obj->base, size);
+
+ if (single)
+ i915_gem_object_init(obj, &fake_ops_single);
+ else
+ i915_gem_object_init(obj, &fake_ops);
+
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->cache_level = I915_CACHE_NONE;
+
+ return obj;
+}
+
+static int igt_check_page_sizes(struct i915_vma *vma)
+{
+ struct drm_i915_private *i915 = to_i915(vma->obj->base.dev);
+ unsigned int supported = INTEL_INFO(i915)->page_sizes;
+ struct drm_i915_gem_object *obj = vma->obj;
+ int err = 0;
+
+ if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) {
+ pr_err("unsupported page_sizes.sg=%u, supported=%u\n",
+ vma->page_sizes.sg & ~supported, supported);
+ err = -EINVAL;
+ }
+
+ if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) {
+ pr_err("unsupported page_sizes.gtt=%u, supported=%u\n",
+ vma->page_sizes.gtt & ~supported, supported);
+ err = -EINVAL;
+ }
+
+ if (vma->page_sizes.phys != obj->mm.page_sizes.phys) {
+ pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n",
+ vma->page_sizes.phys, obj->mm.page_sizes.phys);
+ err = -EINVAL;
+ }
+
+ if (vma->page_sizes.sg != obj->mm.page_sizes.sg) {
+ pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n",
+ vma->page_sizes.sg, obj->mm.page_sizes.sg);
+ err = -EINVAL;
+ }
+
+ if (obj->mm.page_sizes.gtt) {
+ pr_err("obj->page_sizes.gtt(%u) should never be set\n",
+ obj->mm.page_sizes.gtt);
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int igt_mock_exhaust_device_supported_pages(void *arg)
+{
+ struct i915_hw_ppgtt *ppgtt = arg;
+ struct drm_i915_private *i915 = ppgtt->base.i915;
+ unsigned int saved_mask = INTEL_INFO(i915)->page_sizes;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int i, j, single;
+ int err;
+
+ /*
+ * Sanity check creating objects with every valid page support
+ * combination for our mock device.
+ */
+
+ for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) {
+ unsigned int combination = 0;
+
+ for (j = 0; j < ARRAY_SIZE(page_sizes); j++) {
+ if (i & BIT(j))
+ combination |= page_sizes[j];
+ }
+
+ mkwrite_device_info(i915)->page_sizes = combination;
+
+ for (single = 0; single <= 1; ++single) {
+ obj = fake_huge_pages_object(i915, combination, !!single);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out_device;
+ }
+
+ if (obj->base.size != combination) {
+ pr_err("obj->base.size=%zu, expected=%u\n",
+ obj->base.size, combination);
+ err = -EINVAL;
+ goto out_put;
+ }
+
+ vma = i915_vma_instance(obj, &ppgtt->base, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_put;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto out_close;
+
+ err = igt_check_page_sizes(vma);
+
+ if (vma->page_sizes.sg != combination) {
+ pr_err("page_sizes.sg=%u, expected=%u\n",
+ vma->page_sizes.sg, combination);
+ err = -EINVAL;
+ }
+
+ i915_vma_unpin(vma);
+ i915_vma_close(vma);
+
+ i915_gem_object_put(obj);
+
+ if (err)
+ goto out_device;
+ }
+ }
+
+ goto out_device;
+
+out_close:
+ i915_vma_close(vma);
+out_put:
+ i915_gem_object_put(obj);
+out_device:
+ mkwrite_device_info(i915)->page_sizes = saved_mask;
+
+ return err;
+}
+
+static int igt_mock_ppgtt_misaligned_dma(void *arg)
+{
+ struct i915_hw_ppgtt *ppgtt = arg;
+ struct drm_i915_private *i915 = ppgtt->base.i915;
+ unsigned long supported = INTEL_INFO(i915)->page_sizes;
+ struct drm_i915_gem_object *obj;
+ int bit;
+ int err;
+
+ /*
+ * Sanity check dma misalignment for huge pages -- the dma addresses we
+ * insert into the paging structures need to always respect the page
+ * size alignment.
+ */
+
+ bit = ilog2(I915_GTT_PAGE_SIZE_64K);
+
+ for_each_set_bit_from(bit, &supported,
+ ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+ IGT_TIMEOUT(end_time);
+ unsigned int page_size = BIT(bit);
+ unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+ unsigned int offset;
+ unsigned int size =
+ round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1;
+ struct i915_vma *vma;
+
+ obj = fake_huge_pages_object(i915, size, true);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ if (obj->base.size != size) {
+ pr_err("obj->base.size=%zu, expected=%u\n",
+ obj->base.size, size);
+ err = -EINVAL;
+ goto out_put;
+ }
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ goto out_put;
+
+ /* Force the page size for this object */
+ obj->mm.page_sizes.sg = page_size;
+
+ vma = i915_vma_instance(obj, &ppgtt->base, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_unpin;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, flags);
+ if (err) {
+ i915_vma_close(vma);
+ goto out_unpin;
+ }
+
+
+ err = igt_check_page_sizes(vma);
+
+ if (vma->page_sizes.gtt != page_size) {
+ pr_err("page_sizes.gtt=%u, expected %u\n",
+ vma->page_sizes.gtt, page_size);
+ err = -EINVAL;
+ }
+
+ i915_vma_unpin(vma);
+
+ if (err) {
+ i915_vma_close(vma);
+ goto out_unpin;
+ }
+
+ /*
+ * Try all the other valid offsets until the next
+ * boundary -- should always fall back to using 4K
+ * pages.
+ */
+ for (offset = 4096; offset < page_size; offset += 4096) {
+ err = i915_vma_unbind(vma);
+ if (err) {
+ i915_vma_close(vma);
+ goto out_unpin;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, flags | offset);
+ if (err) {
+ i915_vma_close(vma);
+ goto out_unpin;
+ }
+
+ err = igt_check_page_sizes(vma);
+
+ if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) {
+ pr_err("page_sizes.gtt=%u, expected %lu\n",
+ vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K);
+ err = -EINVAL;
+ }
+
+ i915_vma_unpin(vma);
+
+ if (err) {
+ i915_vma_close(vma);
+ goto out_unpin;
+ }
+
+ if (igt_timeout(end_time,
+ "%s timed out at offset %x with page-size %x\n",
+ __func__, offset, page_size))
+ break;
+ }
+
+ i915_vma_close(vma);
+
+ i915_gem_object_unpin_pages(obj);
+ i915_gem_object_put(obj);
+ }
+
+ return 0;
+
+out_unpin:
+ i915_gem_object_unpin_pages(obj);
+out_put:
+ i915_gem_object_put(obj);
+
+ return err;
+}
+
+static void close_object_list(struct list_head *objects,
+ struct i915_hw_ppgtt *ppgtt)
+{
+ struct drm_i915_gem_object *obj, *on;
+
+ list_for_each_entry_safe(obj, on, objects, st_link) {
+ struct i915_vma *vma;
+
+ vma = i915_vma_instance(obj, &ppgtt->base, NULL);
+ if (!IS_ERR(vma))
+ i915_vma_close(vma);
+
+ list_del(&obj->st_link);
+ i915_gem_object_unpin_pages(obj);
+ i915_gem_object_put(obj);
+ }
+}
+
+static int igt_mock_ppgtt_huge_fill(void *arg)
+{
+ struct i915_hw_ppgtt *ppgtt = arg;
+ struct drm_i915_private *i915 = ppgtt->base.i915;
+ unsigned long max_pages = ppgtt->base.total >> PAGE_SHIFT;
+ unsigned long page_num;
+ bool single = false;
+ LIST_HEAD(objects);
+ IGT_TIMEOUT(end_time);
+ int err;
+
+ for_each_prime_number_from(page_num, 1, max_pages) {
+ struct drm_i915_gem_object *obj;
+ u64 size = page_num << PAGE_SHIFT;
+ struct i915_vma *vma;
+ unsigned int expected_gtt = 0;
+ int i;
+
+ obj = fake_huge_pages_object(i915, size, single);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ break;
+ }
+
+ if (obj->base.size != size) {
+ pr_err("obj->base.size=%zd, expected=%llu\n",
+ obj->base.size, size);
+ i915_gem_object_put(obj);
+ err = -EINVAL;
+ break;
+ }
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err) {
+ i915_gem_object_put(obj);
+ break;
+ }
+
+ list_add(&obj->st_link, &objects);
+
+ vma = i915_vma_instance(obj, &ppgtt->base, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ break;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ break;
+
+ err = igt_check_page_sizes(vma);
+ if (err) {
+ i915_vma_unpin(vma);
+ break;
+ }
+
+ /*
+ * Figure out the expected gtt page size knowing that we go from
+ * largest to smallest page size sg chunks, and that we align to
+ * the largest page size.
+ */
+ for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
+ unsigned int page_size = page_sizes[i];
+
+ if (HAS_PAGE_SIZES(i915, page_size) &&
+ size >= page_size) {
+ expected_gtt |= page_size;
+ size &= page_size-1;
+ }
+ }
+
+ GEM_BUG_ON(!expected_gtt);
+ GEM_BUG_ON(size);
+
+ if (expected_gtt & I915_GTT_PAGE_SIZE_4K)
+ expected_gtt &= ~I915_GTT_PAGE_SIZE_64K;
+
+ i915_vma_unpin(vma);
+
+ if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
+ if (!IS_ALIGNED(vma->node.start,
+ I915_GTT_PAGE_SIZE_2M)) {
+ pr_err("node.start(%llx) not aligned to 2M\n",
+ vma->node.start);
+ err = -EINVAL;
+ break;
+ }
+
+ if (!IS_ALIGNED(vma->node.size,
+ I915_GTT_PAGE_SIZE_2M)) {
+ pr_err("node.size(%llx) not aligned to 2M\n",
+ vma->node.size);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ if (vma->page_sizes.gtt != expected_gtt) {
+ pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n",
+ vma->page_sizes.gtt, expected_gtt,
+ obj->base.size, yesno(!!single));
+ err = -EINVAL;
+ break;
+ }
+
+ if (igt_timeout(end_time,
+ "%s timed out at size %zd\n",
+ __func__, obj->base.size))
+ break;
+
+ single = !single;
+ }
+
+ close_object_list(&objects, ppgtt);
+
+ if (err == -ENOMEM || err == -ENOSPC)
+ err = 0;
+
+ return err;
+}
+
+static int igt_mock_ppgtt_64K(void *arg)
+{
+ struct i915_hw_ppgtt *ppgtt = arg;
+ struct drm_i915_private *i915 = ppgtt->base.i915;
+ struct drm_i915_gem_object *obj;
+ const struct object_info {
+ unsigned int size;
+ unsigned int gtt;
+ unsigned int offset;
+ } objects[] = {
+ /* Cases with forced padding/alignment */
+ {
+ .size = SZ_64K,
+ .gtt = I915_GTT_PAGE_SIZE_64K,
+ .offset = 0,
+ },
+ {
+ .size = SZ_64K + SZ_4K,
+ .gtt = I915_GTT_PAGE_SIZE_4K,
+ .offset = 0,
+ },
+ {
+ .size = SZ_64K - SZ_4K,
+ .gtt = I915_GTT_PAGE_SIZE_4K,
+ .offset = 0,
+ },
+ {
+ .size = SZ_2M,
+ .gtt = I915_GTT_PAGE_SIZE_64K,
+ .offset = 0,
+ },
+ {
+ .size = SZ_2M - SZ_4K,
+ .gtt = I915_GTT_PAGE_SIZE_4K,
+ .offset = 0,
+ },
+ {
+ .size = SZ_2M + SZ_4K,
+ .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K,
+ .offset = 0,
+ },
+ {
+ .size = SZ_2M + SZ_64K,
+ .gtt = I915_GTT_PAGE_SIZE_64K,
+ .offset = 0,
+ },
+ {
+ .size = SZ_2M - SZ_64K,
+ .gtt = I915_GTT_PAGE_SIZE_64K,
+ .offset = 0,
+ },
+ /* Try without any forced padding/alignment */
+ {
+ .size = SZ_64K,
+ .offset = SZ_2M,
+ .gtt = I915_GTT_PAGE_SIZE_4K,
+ },
+ {
+ .size = SZ_128K,
+ .offset = SZ_2M - SZ_64K,
+ .gtt = I915_GTT_PAGE_SIZE_4K,
+ },
+ };
+ struct i915_vma *vma;
+ int i, single;
+ int err;
+
+ /*
+ * Sanity check some of the trickiness with 64K pages -- either we can
+ * safely mark the whole page-table(2M block) as 64K, or we have to
+ * always fallback to 4K.
+ */
+
+ if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K))
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(objects); ++i) {
+ unsigned int size = objects[i].size;
+ unsigned int expected_gtt = objects[i].gtt;
+ unsigned int offset = objects[i].offset;
+ unsigned int flags = PIN_USER;
+
+ for (single = 0; single <= 1; single++) {
+ obj = fake_huge_pages_object(i915, size, !!single);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ goto out_object_put;
+
+ /*
+ * Disable 2M pages -- We only want to use 64K/4K pages
+ * for this test.
+ */
+ obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M;
+
+ vma = i915_vma_instance(obj, &ppgtt->base, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_object_unpin;
+ }
+
+ if (offset)
+ flags |= PIN_OFFSET_FIXED | offset;
+
+ err = i915_vma_pin(vma, 0, 0, flags);
+ if (err)
+ goto out_vma_close;
+
+ err = igt_check_page_sizes(vma);
+ if (err)
+ goto out_vma_unpin;
+
+ if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
+ if (!IS_ALIGNED(vma->node.start,
+ I915_GTT_PAGE_SIZE_2M)) {
+ pr_err("node.start(%llx) not aligned to 2M\n",
+ vma->node.start);
+ err = -EINVAL;
+ goto out_vma_unpin;
+ }
+
+ if (!IS_ALIGNED(vma->node.size,
+ I915_GTT_PAGE_SIZE_2M)) {
+ pr_err("node.size(%llx) not aligned to 2M\n",
+ vma->node.size);
+ err = -EINVAL;
+ goto out_vma_unpin;
+ }
+ }
+
+ if (vma->page_sizes.gtt != expected_gtt) {
+ pr_err("gtt=%u, expected=%u, i=%d, single=%s\n",
+ vma->page_sizes.gtt, expected_gtt, i,
+ yesno(!!single));
+ err = -EINVAL;
+ goto out_vma_unpin;
+ }
+
+ i915_vma_unpin(vma);
+ i915_vma_close(vma);
+
+ i915_gem_object_unpin_pages(obj);
+ i915_gem_object_put(obj);
+ }
+ }
+
+ return 0;
+
+out_vma_unpin:
+ i915_vma_unpin(vma);
+out_vma_close:
+ i915_vma_close(vma);
+out_object_unpin:
+ i915_gem_object_unpin_pages(obj);
+out_object_put:
+ i915_gem_object_put(obj);
+
+ return err;
+}
+
+static struct i915_vma *
+gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val)
+{
+ struct drm_i915_private *i915 = to_i915(vma->obj->base.dev);
+ const int gen = INTEL_GEN(vma->vm->i915);
+ unsigned int count = vma->size >> PAGE_SHIFT;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *batch;
+ unsigned int size;
+ u32 *cmd;
+ int n;
+ int err;
+
+ size = (1 + 4 * count) * sizeof(u32);
+ size = round_up(size, PAGE_SIZE);
+ obj = i915_gem_object_create_internal(i915, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto err;
+ }
+
+ offset += vma->node.start;
+
+ for (n = 0; n < count; n++) {
+ if (gen >= 8) {
+ *cmd++ = MI_STORE_DWORD_IMM_GEN4;
+ *cmd++ = lower_32_bits(offset);
+ *cmd++ = upper_32_bits(offset);
+ *cmd++ = val;
+ } else if (gen >= 4) {
+ *cmd++ = MI_STORE_DWORD_IMM_GEN4 |
+ (gen < 6 ? 1 << 22 : 0);
+ *cmd++ = 0;
+ *cmd++ = offset;
+ *cmd++ = val;
+ } else {
+ *cmd++ = MI_STORE_DWORD_IMM | 1 << 22;
+ *cmd++ = offset;
+ *cmd++ = val;
+ }
+
+ offset += PAGE_SIZE;
+ }
+
+ *cmd = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_unpin_map(obj);
+
+ err = i915_gem_object_set_to_gtt_domain(obj, false);
+ if (err)
+ goto err;
+
+ batch = i915_vma_instance(obj, vma->vm, NULL);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto err;
+ }
+
+ err = i915_vma_pin(batch, 0, 0, PIN_USER);
+ if (err)
+ goto err;
+
+ return batch;
+
+err:
+ i915_gem_object_put(obj);
+
+ return ERR_PTR(err);
+}
+
+static int gpu_write(struct i915_vma *vma,
+ struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
+ u32 dword,
+ u32 value)
+{
+ struct drm_i915_gem_request *rq;
+ struct i915_vma *batch;
+ int flags = 0;
+ int err;
+
+ GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+
+ err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
+ if (err)
+ return err;
+
+ rq = i915_gem_request_alloc(engine, ctx);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ batch = gpu_write_dw(vma, dword * sizeof(u32), value);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto err_request;
+ }
+
+ i915_vma_move_to_active(batch, rq, 0);
+ i915_gem_object_set_active_reference(batch->obj);
+ i915_vma_unpin(batch);
+ i915_vma_close(batch);
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto err_request;
+
+ err = i915_switch_context(rq);
+ if (err)
+ goto err_request;
+
+ err = rq->engine->emit_bb_start(rq,
+ batch->node.start, batch->node.size,
+ flags);
+ if (err)
+ goto err_request;
+
+ i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+
+ reservation_object_lock(vma->resv, NULL);
+ reservation_object_add_excl_fence(vma->resv, &rq->fence);
+ reservation_object_unlock(vma->resv);
+
+err_request:
+ __i915_add_request(rq, err == 0);
+
+ return err;
+}
+
+static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+{
+ unsigned int needs_flush;
+ unsigned long n;
+ int err;
+
+ err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+ if (err)
+ return err;
+
+ for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
+ u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
+
+ if (needs_flush & CLFLUSH_BEFORE)
+ drm_clflush_virt_range(ptr, PAGE_SIZE);
+
+ if (ptr[dword] != val) {
+ pr_err("n=%lu ptr[%u]=%u, val=%u\n",
+ n, dword, ptr[dword], val);
+ kunmap_atomic(ptr);
+ err = -EINVAL;
+ break;
+ }
+
+ kunmap_atomic(ptr);
+ }
+
+ i915_gem_obj_finish_shmem_access(obj);
+
+ return err;
+}
+
+static int igt_write_huge(struct i915_gem_context *ctx,
+ struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base;
+ struct intel_engine_cs *engine;
+ struct i915_vma *vma;
+ unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+ unsigned int max_page_size;
+ unsigned int id;
+ u64 max;
+ u64 num;
+ u64 size;
+ int err = 0;
+
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+ size = obj->base.size;
+ if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
+ size = round_up(size, I915_GTT_PAGE_SIZE_2M);
+
+ max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
+ max = div_u64((vm->total - size), max_page_size);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ for_each_engine(engine, i915, id) {
+ IGT_TIMEOUT(end_time);
+
+ if (!intel_engine_can_store_dword(engine)) {
+ pr_info("store-dword-imm not supported on engine=%u\n",
+ id);
+ continue;
+ }
+
+ /*
+ * Try various offsets until we timeout -- we want to avoid
+ * issues hidden by effectively always using offset = 0.
+ */
+ for_each_prime_number_from(num, 0, max) {
+ u64 offset = num * max_page_size;
+ u32 dword;
+
+ err = i915_vma_unbind(vma);
+ if (err)
+ goto out_vma_close;
+
+ err = i915_vma_pin(vma, size, max_page_size, flags | offset);
+ if (err) {
+ /*
+ * The ggtt may have some pages reserved so
+ * refrain from erroring out.
+ */
+ if (err == -ENOSPC && i915_is_ggtt(vm)) {
+ err = 0;
+ continue;
+ }
+
+ goto out_vma_close;
+ }
+
+ err = igt_check_page_sizes(vma);
+ if (err)
+ goto out_vma_unpin;
+
+ dword = offset_in_page(num) / 4;
+
+ err = gpu_write(vma, ctx, engine, dword, num + 1);
+ if (err) {
+ pr_err("gpu-write failed at offset=%llx", offset);
+ goto out_vma_unpin;
+ }
+
+ err = cpu_check(obj, dword, num + 1);
+ if (err) {
+ pr_err("cpu-check failed at offset=%llx", offset);
+ goto out_vma_unpin;
+ }
+
+ i915_vma_unpin(vma);
+
+ if (num > 0 &&
+ igt_timeout(end_time,
+ "%s timed out on engine=%u at offset=%llx, max_page_size=%x\n",
+ __func__, id, offset, max_page_size))
+ break;
+ }
+ }
+
+out_vma_unpin:
+ if (i915_vma_is_pinned(vma))
+ i915_vma_unpin(vma);
+out_vma_close:
+ i915_vma_close(vma);
+
+ return err;
+}
+
+static int igt_ppgtt_exhaust_huge(void *arg)
+{
+ struct i915_gem_context *ctx = arg;
+ struct drm_i915_private *i915 = ctx->i915;
+ unsigned long supported = INTEL_INFO(i915)->page_sizes;
+ static unsigned int pages[ARRAY_SIZE(page_sizes)];
+ struct drm_i915_gem_object *obj;
+ unsigned int size_mask;
+ unsigned int page_mask;
+ int n, i;
+ int err;
+
+ /*
+ * Sanity check creating objects with a varying mix of page sizes --
+ * ensuring that our writes lands in the right place.
+ */
+
+ n = 0;
+ for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1)
+ pages[n++] = BIT(i);
+
+ for (size_mask = 2; size_mask < BIT(n); size_mask++) {
+ unsigned int size = 0;
+
+ for (i = 0; i < n; i++) {
+ if (size_mask & BIT(i))
+ size |= pages[i];
+ }
+
+ /*
+ * For our page mask we want to enumerate all the page-size
+ * combinations which will fit into our chosen object size.
+ */
+ for (page_mask = 2; page_mask <= size_mask; page_mask++) {
+ unsigned int page_sizes = 0;
+
+ for (i = 0; i < n; i++) {
+ if (page_mask & BIT(i))
+ page_sizes |= pages[i];
+ }
+
+ /*
+ * Ensure that we can actually fill the given object
+ * with our chosen page mask.
+ */
+ if (!IS_ALIGNED(size, BIT(__ffs(page_sizes))))
+ continue;
+
+ obj = huge_pages_object(i915, size, page_sizes);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out_device;
+ }
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err) {
+ i915_gem_object_put(obj);
+
+ if (err == -ENOMEM) {
+ pr_info("unable to get pages, size=%u, pages=%u\n",
+ size, page_sizes);
+ err = 0;
+ break;
+ }
+
+ pr_err("pin_pages failed, size=%u, pages=%u\n",
+ size_mask, page_mask);
+
+ goto out_device;
+ }
+
+ /* Force the page-size for the gtt insertion */
+ obj->mm.page_sizes.sg = page_sizes;
+
+ err = igt_write_huge(ctx, obj);
+ if (err) {
+ pr_err("exhaust write-huge failed with size=%u\n",
+ size);
+ goto out_unpin;
+ }
+
+ i915_gem_object_unpin_pages(obj);
+ i915_gem_object_put(obj);
+ }
+ }
+
+ goto out_device;
+
+out_unpin:
+ i915_gem_object_unpin_pages(obj);
+ i915_gem_object_put(obj);
+out_device:
+ mkwrite_device_info(i915)->page_sizes = supported;
+
+ return err;
+}
+
+static int igt_ppgtt_internal_huge(void *arg)
+{
+ struct i915_gem_context *ctx = arg;
+ struct drm_i915_private *i915 = ctx->i915;
+ struct drm_i915_gem_object *obj;
+ static const unsigned int sizes[] = {
+ SZ_64K,
+ SZ_128K,
+ SZ_256K,
+ SZ_512K,
+ SZ_1M,
+ SZ_2M,
+ };
+ int i;
+ int err;
+
+ /*
+ * Sanity check that the HW uses huge pages correctly through internal
+ * -- ensure that our writes land in the right place.
+ */
+
+ for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
+ unsigned int size = sizes[i];
+
+ obj = i915_gem_object_create_internal(i915, size);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ goto out_put;
+
+ if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
+ pr_info("internal unable to allocate huge-page(s) with size=%u\n",
+ size);
+ goto out_unpin;
+ }
+
+ err = igt_write_huge(ctx, obj);
+ if (err) {
+ pr_err("internal write-huge failed with size=%u\n",
+ size);
+ goto out_unpin;
+ }
+
+ i915_gem_object_unpin_pages(obj);
+ i915_gem_object_put(obj);
+ }
+
+ return 0;
+
+out_unpin:
+ i915_gem_object_unpin_pages(obj);
+out_put:
+ i915_gem_object_put(obj);
+
+ return err;
+}
+
+static inline bool igt_can_allocate_thp(struct drm_i915_private *i915)
+{
+ return i915->mm.gemfs && has_transparent_hugepage();
+}
+
+static int igt_ppgtt_gemfs_huge(void *arg)
+{
+ struct i915_gem_context *ctx = arg;
+ struct drm_i915_private *i915 = ctx->i915;
+ struct drm_i915_gem_object *obj;
+ static const unsigned int sizes[] = {
+ SZ_2M,
+ SZ_4M,
+ SZ_8M,
+ SZ_16M,
+ SZ_32M,
+ };
+ int i;
+ int err;
+
+ /*
+ * Sanity check that the HW uses huge pages correctly through gemfs --
+ * ensure that our writes land in the right place.
+ */
+
+ if (!igt_can_allocate_thp(i915)) {
+ pr_info("missing THP support, skipping\n");
+ return 0;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
+ unsigned int size = sizes[i];
+
+ obj = i915_gem_object_create(i915, size);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ goto out_put;
+
+ if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
+ pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n",
+ size);
+ goto out_unpin;
+ }
+
+ err = igt_write_huge(ctx, obj);
+ if (err) {
+ pr_err("gemfs write-huge failed with size=%u\n",
+ size);
+ goto out_unpin;
+ }
+
+ i915_gem_object_unpin_pages(obj);
+ i915_gem_object_put(obj);
+ }
+
+ return 0;
+
+out_unpin:
+ i915_gem_object_unpin_pages(obj);
+out_put:
+ i915_gem_object_put(obj);
+
+ return err;
+}
+
+static int igt_ppgtt_pin_update(void *arg)
+{
+ struct i915_gem_context *ctx = arg;
+ struct drm_i915_private *dev_priv = ctx->i915;
+ unsigned long supported = INTEL_INFO(dev_priv)->page_sizes;
+ struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+ int first, last;
+ int err;
+
+ /*
+ * Make sure there's no funny business when doing a PIN_UPDATE -- in the
+ * past we had a subtle issue with being able to incorrectly do multiple
+ * alloc va ranges on the same object when doing a PIN_UPDATE, which
+ * resulted in some pretty nasty bugs, though only when using
+ * huge-gtt-pages.
+ */
+
+ if (!USES_FULL_48BIT_PPGTT(dev_priv)) {
+ pr_info("48b PPGTT not supported, skipping\n");
+ return 0;
+ }
+
+ first = ilog2(I915_GTT_PAGE_SIZE_64K);
+ last = ilog2(I915_GTT_PAGE_SIZE_2M);
+
+ for_each_set_bit_from(first, &supported, last + 1) {
+ unsigned int page_size = BIT(first);
+
+ obj = i915_gem_object_create_internal(dev_priv, page_size);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, &ppgtt->base, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_put;
+ }
+
+ err = i915_vma_pin(vma, SZ_2M, 0, flags);
+ if (err)
+ goto out_close;
+
+ if (vma->page_sizes.sg < page_size) {
+ pr_info("Unable to allocate page-size %x, finishing test early\n",
+ page_size);
+ goto out_unpin;
+ }
+
+ err = igt_check_page_sizes(vma);
+ if (err)
+ goto out_unpin;
+
+ if (vma->page_sizes.gtt != page_size) {
+ dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0);
+
+ /*
+ * The only valid reason for this to ever fail would be
+ * if the dma-mapper screwed us over when we did the
+ * dma_map_sg(), since it has the final say over the dma
+ * address.
+ */
+ if (IS_ALIGNED(addr, page_size)) {
+ pr_err("page_sizes.gtt=%u, expected=%u\n",
+ vma->page_sizes.gtt, page_size);
+ err = -EINVAL;
+ } else {
+ pr_info("dma address misaligned, finishing test early\n");
+ }
+
+ goto out_unpin;
+ }
+
+ err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE);
+ if (err)
+ goto out_unpin;
+
+ i915_vma_unpin(vma);
+ i915_vma_close(vma);
+
+ i915_gem_object_put(obj);
+ }
+
+ obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, &ppgtt->base, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_put;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, flags);
+ if (err)
+ goto out_close;
+
+ /*
+ * Make sure we don't end up with something like where the pde is still
+ * pointing to the 2M page, and the pt we just filled-in is dangling --
+ * we can check this by writing to the first page where it would then
+ * land in the now stale 2M page.
+ */
+
+ err = gpu_write(vma, ctx, dev_priv->engine[RCS], 0, 0xdeadbeaf);
+ if (err)
+ goto out_unpin;
+
+ err = cpu_check(obj, 0, 0xdeadbeaf);
+
+out_unpin:
+ i915_vma_unpin(vma);
+out_close:
+ i915_vma_close(vma);
+out_put:
+ i915_gem_object_put(obj);
+
+ return err;
+}
+
+static int igt_tmpfs_fallback(void *arg)
+{
+ struct i915_gem_context *ctx = arg;
+ struct drm_i915_private *i915 = ctx->i915;
+ struct vfsmount *gemfs = i915->mm.gemfs;
+ struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 *vaddr;
+ int err = 0;
+
+ /*
+ * Make sure that we don't burst into a ball of flames upon falling back
+ * to tmpfs, which we rely on if on the off-chance we encouter a failure
+ * when setting up gemfs.
+ */
+
+ i915->mm.gemfs = NULL;
+
+ obj = i915_gem_object_create(i915, PAGE_SIZE);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out_restore;
+ }
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto out_put;
+ }
+ *vaddr = 0xdeadbeaf;
+
+ i915_gem_object_unpin_map(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_put;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto out_close;
+
+ err = igt_check_page_sizes(vma);
+
+ i915_vma_unpin(vma);
+out_close:
+ i915_vma_close(vma);
+out_put:
+ i915_gem_object_put(obj);
+out_restore:
+ i915->mm.gemfs = gemfs;
+
+ return err;
+}
+
+static int igt_shrink_thp(void *arg)
+{
+ struct i915_gem_context *ctx = arg;
+ struct drm_i915_private *i915 = ctx->i915;
+ struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ unsigned int flags = PIN_USER;
+ int err;
+
+ /*
+ * Sanity check shrinking huge-paged object -- make sure nothing blows
+ * up.
+ */
+
+ if (!igt_can_allocate_thp(i915)) {
+ pr_info("missing THP support, skipping\n");
+ return 0;
+ }
+
+ obj = i915_gem_object_create(i915, SZ_2M);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_put;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, flags);
+ if (err)
+ goto out_close;
+
+ if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
+ pr_info("failed to allocate THP, finishing test early\n");
+ goto out_unpin;
+ }
+
+ err = igt_check_page_sizes(vma);
+ if (err)
+ goto out_unpin;
+
+ err = gpu_write(vma, ctx, i915->engine[RCS], 0, 0xdeadbeaf);
+ if (err)
+ goto out_unpin;
+
+ i915_vma_unpin(vma);
+
+ /*
+ * Now that the pages are *unpinned* shrink-all should invoke
+ * shmem to truncate our pages.
+ */
+ i915_gem_shrink_all(i915);
+ if (!IS_ERR_OR_NULL(obj->mm.pages)) {
+ pr_err("shrink-all didn't truncate the pages\n");
+ err = -EINVAL;
+ goto out_close;
+ }
+
+ if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) {
+ pr_err("residual page-size bits left\n");
+ err = -EINVAL;
+ goto out_close;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, flags);
+ if (err)
+ goto out_close;
+
+ err = cpu_check(obj, 0, 0xdeadbeaf);
+
+out_unpin:
+ i915_vma_unpin(vma);
+out_close:
+ i915_vma_close(vma);
+out_put:
+ i915_gem_object_put(obj);
+
+ return err;
+}
+
+int i915_gem_huge_page_mock_selftests(void)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(igt_mock_exhaust_device_supported_pages),
+ SUBTEST(igt_mock_ppgtt_misaligned_dma),
+ SUBTEST(igt_mock_ppgtt_huge_fill),
+ SUBTEST(igt_mock_ppgtt_64K),
+ };
+ int saved_ppgtt = i915_modparams.enable_ppgtt;
+ struct drm_i915_private *dev_priv;
+ struct pci_dev *pdev;
+ struct i915_hw_ppgtt *ppgtt;
+ int err;
+
+ dev_priv = mock_gem_device();
+ if (!dev_priv)
+ return -ENOMEM;
+
+ /* Pretend to be a device which supports the 48b PPGTT */
+ i915_modparams.enable_ppgtt = 3;
+
+ pdev = dev_priv->drm.pdev;
+ dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(39));
+
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ ppgtt = i915_ppgtt_create(dev_priv, ERR_PTR(-ENODEV), "mock");
+ if (IS_ERR(ppgtt)) {
+ err = PTR_ERR(ppgtt);
+ goto out_unlock;
+ }
+
+ if (!i915_vm_is_48bit(&ppgtt->base)) {
+ pr_err("failed to create 48b PPGTT\n");
+ err = -EINVAL;
+ goto out_close;
+ }
+
+ /* If we were ever hit this then it's time to mock the 64K scratch */
+ if (!i915_vm_has_scratch_64K(&ppgtt->base)) {
+ pr_err("PPGTT missing 64K scratch page\n");
+ err = -EINVAL;
+ goto out_close;
+ }
+
+ err = i915_subtests(tests, ppgtt);
+
+out_close:
+ i915_ppgtt_close(&ppgtt->base);
+ i915_ppgtt_put(ppgtt);
+
+out_unlock:
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
+ i915_modparams.enable_ppgtt = saved_ppgtt;
+
+ drm_dev_unref(&dev_priv->drm);
+
+ return err;
+}
+
+int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(igt_shrink_thp),
+ SUBTEST(igt_ppgtt_pin_update),
+ SUBTEST(igt_tmpfs_fallback),
+ SUBTEST(igt_ppgtt_exhaust_huge),
+ SUBTEST(igt_ppgtt_gemfs_huge),
+ SUBTEST(igt_ppgtt_internal_huge),
+ };
+ struct drm_file *file;
+ struct i915_gem_context *ctx;
+ int err;
+
+ if (!USES_PPGTT(dev_priv)) {
+ pr_info("PPGTT not supported, skipping live-selftests\n");
+ return 0;
+ }
+
+ file = mock_file(dev_priv);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ mutex_lock(&dev_priv->drm.struct_mutex);
+
+ ctx = live_context(dev_priv, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out_unlock;
+ }
+
+ err = i915_subtests(tests, ctx);
+
+out_unlock:
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
+ mock_file_free(dev_priv, file);
+
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 6b132caffa18..9da0c9f99916 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -39,25 +39,26 @@ static void fake_free_pages(struct drm_i915_gem_object *obj,
kfree(pages);
}
-static struct sg_table *
-fake_get_pages(struct drm_i915_gem_object *obj)
+static int fake_get_pages(struct drm_i915_gem_object *obj)
{
#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
#define PFN_BIAS 0x1000
struct sg_table *pages;
struct scatterlist *sg;
+ unsigned int sg_page_sizes;
typeof(obj->base.size) rem;
pages = kmalloc(sizeof(*pages), GFP);
if (!pages)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
rem = round_up(obj->base.size, BIT(31)) >> 31;
if (sg_alloc_table(pages, rem, GFP)) {
kfree(pages);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
+ sg_page_sizes = 0;
rem = obj->base.size;
for (sg = pages->sgl; sg; sg = sg_next(sg)) {
unsigned long len = min_t(typeof(rem), rem, BIT(31));
@@ -66,13 +67,17 @@ fake_get_pages(struct drm_i915_gem_object *obj)
sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0);
sg_dma_address(sg) = page_to_phys(sg_page(sg));
sg_dma_len(sg) = len;
+ sg_page_sizes |= len;
rem -= len;
}
GEM_BUG_ON(rem);
obj->mm.madv = I915_MADV_DONTNEED;
- return pages;
+
+ __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
+
+ return 0;
#undef GFP
}
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 8f011c447e41..1b8774a42e48 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -251,14 +251,6 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
return PTR_ERR(io);
}
- err = i915_vma_get_fence(vma);
- if (err) {
- pr_err("Failed to get fence for partial view: offset=%lu\n",
- page);
- i915_vma_unpin_iomap(vma);
- return err;
- }
-
iowrite32(page, io + n * PAGE_SIZE/sizeof(*io));
i915_vma_unpin_iomap(vma);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
index 6664cb2eb0b8..a999161e8db1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@@ -215,7 +215,9 @@ static int igt_request_rewind(void *arg)
}
i915_gem_request_get(vip);
i915_add_request(vip);
+ rcu_read_lock();
request->engine->submit_request(request);
+ rcu_read_unlock();
mutex_unlock(&i915->drm.struct_mutex);
@@ -418,7 +420,10 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915)
err = PTR_ERR(cmd);
goto err;
}
+
*cmd = MI_BATCH_BUFFER_END;
+ i915_gem_chipset_flush(i915);
+
i915_gem_object_unpin_map(obj);
err = i915_gem_object_set_to_gtt_domain(obj, false);
@@ -605,8 +610,8 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
*cmd++ = lower_32_bits(vma->node.start);
}
*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
+ i915_gem_chipset_flush(i915);
- wmb();
i915_gem_object_unpin_map(obj);
return vma;
@@ -625,7 +630,7 @@ static int recursive_batch_resolve(struct i915_vma *batch)
return PTR_ERR(cmd);
*cmd = MI_BATCH_BUFFER_END;
- wmb();
+ i915_gem_chipset_flush(batch->vm->i915);
i915_gem_object_unpin_map(batch->obj);
@@ -858,7 +863,8 @@ out_request:
I915_MAP_WC);
if (!IS_ERR(cmd)) {
*cmd = MI_BATCH_BUFFER_END;
- wmb();
+ i915_gem_chipset_flush(i915);
+
i915_gem_object_unpin_map(request[id]->batch->obj);
}
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 18b174d855ca..64acd7eccc5c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -15,5 +15,6 @@ selftest(objects, i915_gem_object_live_selftests)
selftest(dmabuf, i915_gem_dmabuf_live_selftests)
selftest(coherency, i915_gem_coherency_live_selftests)
selftest(gtt, i915_gem_gtt_live_selftests)
+selftest(hugepages, i915_gem_huge_page_live_selftests)
selftest(contexts, i915_gem_context_live_selftests)
selftest(hangcheck, intel_hangcheck_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index fc74687501ba..9961b44f76ed 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -21,3 +21,4 @@ selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
selftest(vma, i915_vma_mock_selftests)
selftest(evict, i915_gem_evict_mock_selftests)
selftest(gtt, i915_gem_gtt_mock_selftests)
+selftest(hugepages, i915_gem_huge_page_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 377c1de766ce..71ce06680d66 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -165,6 +165,7 @@ static int emit_recurse_batch(struct hang *h,
*batch++ = lower_32_bits(vma->node.start);
}
*batch++ = MI_BATCH_BUFFER_END; /* not reached */
+ i915_gem_chipset_flush(h->i915);
flags = 0;
if (INTEL_GEN(vm->i915) <= 5)
@@ -231,7 +232,7 @@ static u32 hws_seqno(const struct hang *h,
static void hang_fini(struct hang *h)
{
*h->batch = MI_BATCH_BUFFER_END;
- wmb();
+ i915_gem_chipset_flush(h->i915);
i915_gem_object_unpin_map(h->obj);
i915_gem_object_put(h->obj);
@@ -275,6 +276,8 @@ static int igt_hang_sanitycheck(void *arg)
i915_gem_request_get(rq);
*h.batch = MI_BATCH_BUFFER_END;
+ i915_gem_chipset_flush(i915);
+
__i915_add_request(rq, true);
timeout = i915_wait_request(rq,
@@ -621,8 +624,11 @@ static int igt_wait_reset(void *arg)
__i915_add_request(rq, true);
if (!wait_for_hang(&h, rq)) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
pr_err("Failed to start request %x, at %x\n",
rq->fence.seqno, hws_seqno(&h, rq));
+ intel_engine_dump(rq->engine, &p);
i915_reset(i915, 0);
i915_gem_set_wedged(i915);
@@ -713,8 +719,12 @@ static int igt_reset_queue(void *arg)
__i915_add_request(rq, true);
if (!wait_for_hang(&h, prev)) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
pr_err("Failed to start request %x, at %x\n",
prev->fence.seqno, hws_seqno(&h, prev));
+ intel_engine_dump(rq->engine, &p);
+
i915_gem_request_put(rq);
i915_gem_request_put(prev);
@@ -765,7 +775,7 @@ static int igt_reset_queue(void *arg)
pr_info("%s: Completed %d resets\n", engine->name, count);
*h.batch = MI_BATCH_BUFFER_END;
- wmb();
+ i915_gem_chipset_flush(i915);
i915_gem_request_put(prev);
}
@@ -815,8 +825,11 @@ static int igt_handle_error(void *arg)
__i915_add_request(rq, true);
if (!wait_for_hang(&h, rq)) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
pr_err("Failed to start request %x, at %x\n",
rq->fence.seqno, hws_seqno(&h, rq));
+ intel_engine_dump(rq->engine, &p);
i915_reset(i915, 0);
i915_gem_set_wedged(i915);
@@ -865,9 +878,16 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_reset_queue),
SUBTEST(igt_handle_error),
};
+ int err;
if (!intel_has_gpu_reset(i915))
return 0;
- return i915_subtests(tests, i915);
+ intel_runtime_pm_get(i915);
+
+ err = i915_subtests(tests, i915);
+
+ intel_runtime_pm_put(i915);
+
+ return err;
}
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 2388424a14da..04eb9362f4f8 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -83,6 +83,8 @@ static void mock_device_release(struct drm_device *dev)
kmem_cache_destroy(i915->vmas);
kmem_cache_destroy(i915->objects);
+ i915_gemfs_fini(i915);
+
drm_dev_fini(&i915->drm);
put_device(&i915->drm.pdev->dev);
}
@@ -146,7 +148,7 @@ struct drm_i915_private *mock_gem_device(void)
dev_set_name(&pdev->dev, "mock");
dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-#if IS_ENABLED(CONFIG_IOMMU_API)
+#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU)
/* hack to disable iommu for the fake device; force identity mapping */
pdev->dev.archdata.iommu = (void *)-1;
#endif
@@ -172,6 +174,11 @@ struct drm_i915_private *mock_gem_device(void)
mkwrite_device_info(i915)->gen = -1;
+ mkwrite_device_info(i915)->page_sizes =
+ I915_GTT_PAGE_SIZE_4K |
+ I915_GTT_PAGE_SIZE_64K |
+ I915_GTT_PAGE_SIZE_2M;
+
spin_lock_init(&i915->mm.object_stat_lock);
mock_uncore_init(i915);
@@ -239,8 +246,16 @@ struct drm_i915_private *mock_gem_device(void)
if (!i915->kernel_context)
goto err_engine;
+ i915->preempt_context = mock_context(i915, NULL);
+ if (!i915->preempt_context)
+ goto err_kernel_context;
+
+ WARN_ON(i915_gemfs_init(i915));
+
return i915;
+err_kernel_context:
+ i915_gem_context_put(i915->kernel_context);
err_engine:
for_each_engine(engine, i915, id)
mock_engine_free(engine);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index f2118cf535a0..336e1afb250f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -43,7 +43,6 @@ static int mock_bind_ppgtt(struct i915_vma *vma,
u32 flags)
{
GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND);
- vma->pages = vma->obj->mm.pages;
vma->flags |= I915_VMA_LOCAL_BIND;
return 0;
}
@@ -84,6 +83,8 @@ mock_ppgtt(struct drm_i915_private *i915,
ppgtt->base.insert_entries = mock_insert_entries;
ppgtt->base.bind_vma = mock_bind_ppgtt;
ppgtt->base.unbind_vma = mock_unbind_ppgtt;
+ ppgtt->base.set_pages = ppgtt_set_pages;
+ ppgtt->base.clear_pages = clear_pages;
ppgtt->base.cleanup = mock_cleanup;
return ppgtt;
@@ -93,12 +94,6 @@ static int mock_bind_ggtt(struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
{
- int err;
-
- err = i915_get_ggtt_vma_pages(vma);
- if (err)
- return err;
-
vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
return 0;
}
@@ -124,6 +119,8 @@ void mock_init_ggtt(struct drm_i915_private *i915)
ggtt->base.insert_entries = mock_insert_entries;
ggtt->base.bind_vma = mock_bind_ggtt;
ggtt->base.unbind_vma = mock_unbind_ggtt;
+ ggtt->base.set_pages = ggtt_set_pages;
+ ggtt->base.clear_pages = clear_pages;
ggtt->base.cleanup = mock_cleanup;
i915_address_space_init(&ggtt->base, i915, "global");
diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c
index 1cc5d2931753..cd6d2a16071f 100644
--- a/drivers/gpu/drm/i915/selftests/scatterlist.c
+++ b/drivers/gpu/drm/i915/selftests/scatterlist.c
@@ -189,6 +189,20 @@ static unsigned int random(unsigned long n,
return 1 + (prandom_u32_state(rnd) % 1024);
}
+static unsigned int random_page_size_pages(unsigned long n,
+ unsigned long count,
+ struct rnd_state *rnd)
+{
+ /* 4K, 64K, 2M */
+ static unsigned int page_count[] = {
+ BIT(12) >> PAGE_SHIFT,
+ BIT(16) >> PAGE_SHIFT,
+ BIT(21) >> PAGE_SHIFT,
+ };
+
+ return page_count[(prandom_u32_state(rnd) % 3)];
+}
+
static inline bool page_contiguous(struct page *first,
struct page *last,
unsigned long npages)
@@ -252,6 +266,7 @@ static const npages_fn_t npages_funcs[] = {
grow,
shrink,
random,
+ random_page_size_pages,
NULL,
};
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index b6c3540e07bc..0937d9a7d8fb 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -53,6 +53,8 @@ extern struct file *shmem_file_setup(const char *name,
loff_t size, unsigned long flags);
extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
unsigned long flags);
+extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt,
+ const char *name, loff_t size, unsigned long flags);
extern int shmem_zero_setup(struct vm_area_struct *);
extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index fe25a01c81f2..125bde7d9504 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -397,10 +397,20 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_MIN_EU_IN_POOL 39
#define I915_PARAM_MMAP_GTT_VERSION 40
-/* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
* priorities and the driver will attempt to execute batches in priority order.
+ * The param returns a capability bitmask, nonzero implies that the scheduler
+ * is enabled, with different features present according to the mask.
+ *
+ * The initial priority for each batch is supplied by the context and is
+ * controlled via I915_CONTEXT_PARAM_PRIORITY.
*/
#define I915_PARAM_HAS_SCHEDULER 41
+#define I915_SCHEDULER_CAP_ENABLED (1ul << 0)
+#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1)
+#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
+
#define I915_PARAM_HUC_STATUS 42
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
@@ -1308,7 +1318,7 @@ struct drm_i915_reg_read {
* be specified
*/
__u64 offset;
-#define I915_REG_READ_8B_WA BIT(0)
+#define I915_REG_READ_8B_WA (1ul << 0)
__u64 val; /* Return value */
};
@@ -1360,6 +1370,10 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
#define I915_CONTEXT_PARAM_BANNABLE 0x5
+#define I915_CONTEXT_PARAM_PRIORITY 0x6
+#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
+#define I915_CONTEXT_DEFAULT_PRIORITY 0
+#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
__u64 value;
};
diff --git a/mm/shmem.c b/mm/shmem.c
index 07a1d22807be..3229d27503ec 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -4183,7 +4183,7 @@ static const struct dentry_operations anon_ops = {
.d_dname = simple_dname
};
-static struct file *__shmem_file_setup(const char *name, loff_t size,
+static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size,
unsigned long flags, unsigned int i_flags)
{
struct file *res;
@@ -4192,8 +4192,8 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
struct super_block *sb;
struct qstr this;
- if (IS_ERR(shm_mnt))
- return ERR_CAST(shm_mnt);
+ if (IS_ERR(mnt))
+ return ERR_CAST(mnt);
if (size < 0 || size > MAX_LFS_FILESIZE)
return ERR_PTR(-EINVAL);
@@ -4205,8 +4205,8 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
this.name = name;
this.len = strlen(name);
this.hash = 0; /* will go */
- sb = shm_mnt->mnt_sb;
- path.mnt = mntget(shm_mnt);
+ sb = mnt->mnt_sb;
+ path.mnt = mntget(mnt);
path.dentry = d_alloc_pseudo(sb, &this);
if (!path.dentry)
goto put_memory;
@@ -4251,7 +4251,7 @@ put_path:
*/
struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
{
- return __shmem_file_setup(name, size, flags, S_PRIVATE);
+ return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE);
}
/**
@@ -4262,11 +4262,25 @@ struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned lon
*/
struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
{
- return __shmem_file_setup(name, size, flags, 0);
+ return __shmem_file_setup(shm_mnt, name, size, flags, 0);
}
EXPORT_SYMBOL_GPL(shmem_file_setup);
/**
+ * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs
+ * @mnt: the tmpfs mount where the file will be created
+ * @name: name for dentry (to be seen in /proc/<pid>/maps
+ * @size: size to be set for the file
+ * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
+ */
+struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name,
+ loff_t size, unsigned long flags)
+{
+ return __shmem_file_setup(mnt, name, size, flags, 0);
+}
+EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt);
+
+/**
* shmem_zero_setup - setup a shared anonymous mapping
* @vma: the vma to be mmapped is prepared by do_mmap_pgoff
*/
@@ -4281,7 +4295,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
* accessible to the user through its mapping, use S_PRIVATE flag to
* bypass file security, in the same way as shmem_kernel_file_setup().
*/
- file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE);
+ file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags);
if (IS_ERR(file))
return PTR_ERR(file);