diff options
44 files changed, 4101 insertions, 842 deletions
diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 9ddf8c6cb887..1f9dc3e97ab7 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -4238,6 +4238,20 @@ int num_ioctls;</synopsis> </sect2> </sect1> <sect1> + <title>GuC-based Command Submission</title> + <sect2> + <title>GuC</title> +!Pdrivers/gpu/drm/i915/intel_guc_loader.c GuC-specific firmware loader +!Idrivers/gpu/drm/i915/intel_guc_loader.c + </sect2> + <sect2> + <title>GuC Client</title> +!Pdrivers/gpu/drm/i915/i915_guc_submission.c GuC-based command submissison +!Idrivers/gpu/drm/i915/i915_guc_submission.c + </sect2> + </sect1> + + <sect1> <title> Tracing </title> <para> This sections covers all things related to the tracepoints implemented in diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 998b4643109f..44d290ae1999 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -40,6 +40,10 @@ i915-y += i915_cmd_parser.o \ intel_ringbuffer.o \ intel_uncore.o +# general-purpose microcontroller (GuC) support +i915-y += intel_guc_loader.o \ + i915_guc_submission.o + # autogenerated null render state i915-y += intel_renderstate_gen6.o \ intel_renderstate_gen7.o \ diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 237ff6884a22..ad7d7ab76d3f 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -94,7 +94,7 @@ #define CMD(op, opm, f, lm, fl, ...) \ { \ .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ - .cmd = { (op), (opm) }, \ + .cmd = { (op), (opm) }, \ .length = { (lm) }, \ __VA_ARGS__ \ } @@ -124,14 +124,14 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), - CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W | B, + CMD( MI_STORE_REGISTER_MEM, SMI, F, 1, W | B, .reg = { .offset = 1, .mask = 0x007FFFFC }, .bits = {{ .offset = 0, .mask = MI_GLOBAL_GTT, .expected = 0, }}, ), - CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B, + CMD( MI_LOAD_REGISTER_MEM, SMI, F, 1, W | B, .reg = { .offset = 1, .mask = 0x007FFFFC }, .bits = {{ .offset = 0, @@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring, * only MI_LOAD_REGISTER_IMM commands. */ if (reg_addr == OACONTROL) { - if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) { + if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n"); return false; } @@ -1035,7 +1035,7 @@ static bool check_cmd(const struct intel_engine_cs *ring, * allowed mask/value pair given in the whitelist entry. */ if (reg->mask) { - if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) { + if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n", reg_addr); return false; @@ -1213,6 +1213,7 @@ int i915_cmd_parser_get_version(void) * 2. Allow access to the MI_PREDICATE_SRC0 and * MI_PREDICATE_SRC1 registers. * 3. Allow access to the GPGPU_THREADS_DISPATCHED register. + * 4. L3 atomic chicken bits of HSW_SCRATCH1 and HSW_ROW_CHICKEN3. */ - return 3; + return 4; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e3ec9049081f..4563f8b955ea 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -46,11 +46,6 @@ enum { PINNED_LIST, }; -static const char *yesno(int v) -{ - return v ? "yes" : "no"; -} - /* As the drm_debugfs_init() routines are called before dev->dev_private is * allocated we need to hook into the minor for release. */ static int @@ -1995,7 +1990,7 @@ static void i915_dump_lrc_obj(struct seq_file *m, return; } - page = i915_gem_object_get_page(ctx_obj, 1); + page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); if (!WARN_ON(page == NULL)) { reg_state = kmap_atomic(page); @@ -2250,7 +2245,6 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; - struct drm_file *file; int i; if (INTEL_INFO(dev)->gen == 6) @@ -2273,13 +2267,6 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev) ppgtt->debug_dump(ppgtt, m); } - list_for_each_entry_reverse(file, &dev->filelist, lhead) { - struct drm_i915_file_private *file_priv = file->driver_priv; - - seq_printf(m, "proc: %s\n", - get_pid_task(file->pid, PIDTYPE_PID)->comm); - idr_for_each(&file_priv->context_idr, per_file_ctx, m); - } seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK)); } @@ -2288,6 +2275,7 @@ static int i915_ppgtt_info(struct seq_file *m, void *data) struct drm_info_node *node = m->private; struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_file *file; int ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret) @@ -2299,6 +2287,15 @@ static int i915_ppgtt_info(struct seq_file *m, void *data) else if (INTEL_INFO(dev)->gen >= 6) gen6_ppgtt_info(m, dev); + list_for_each_entry_reverse(file, &dev->filelist, lhead) { + struct drm_i915_file_private *file_priv = file->driver_priv; + + seq_printf(m, "\nproc: %s\n", + get_pid_task(file->pid, PIDTYPE_PID)->comm); + idr_for_each(&file_priv->context_idr, per_file_ctx, + (void *)(unsigned long)m); + } + intel_runtime_pm_put(dev_priv); mutex_unlock(&dev->struct_mutex); @@ -2372,6 +2369,147 @@ static int i915_llc(struct seq_file *m, void *data) return 0; } +static int i915_guc_load_status_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_i915_private *dev_priv = node->minor->dev->dev_private; + struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + u32 tmp, i; + + if (!HAS_GUC_UCODE(dev_priv->dev)) + return 0; + + seq_printf(m, "GuC firmware status:\n"); + seq_printf(m, "\tpath: %s\n", + guc_fw->guc_fw_path); + seq_printf(m, "\tfetch: %s\n", + intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status)); + seq_printf(m, "\tload: %s\n", + intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); + seq_printf(m, "\tversion wanted: %d.%d\n", + guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted); + seq_printf(m, "\tversion found: %d.%d\n", + guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found); + + tmp = I915_READ(GUC_STATUS); + + seq_printf(m, "\nGuC status 0x%08x:\n", tmp); + seq_printf(m, "\tBootrom status = 0x%x\n", + (tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT); + seq_printf(m, "\tuKernel status = 0x%x\n", + (tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT); + seq_printf(m, "\tMIA Core status = 0x%x\n", + (tmp & GS_MIA_MASK) >> GS_MIA_SHIFT); + seq_puts(m, "\nScratch registers:\n"); + for (i = 0; i < 16; i++) + seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i))); + + return 0; +} + +static void i915_guc_client_info(struct seq_file *m, + struct drm_i915_private *dev_priv, + struct i915_guc_client *client) +{ + struct intel_engine_cs *ring; + uint64_t tot = 0; + uint32_t i; + + seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", + client->priority, client->ctx_index, client->proc_desc_offset); + seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n", + client->doorbell_id, client->doorbell_offset, client->cookie); + seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n", + client->wq_size, client->wq_offset, client->wq_tail); + + seq_printf(m, "\tFailed to queue: %u\n", client->q_fail); + seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail); + seq_printf(m, "\tLast submission result: %d\n", client->retcode); + + for_each_ring(ring, dev_priv, i) { + seq_printf(m, "\tSubmissions: %llu %s\n", + client->submissions[i], + ring->name); + tot += client->submissions[i]; + } + seq_printf(m, "\tTotal: %llu\n", tot); +} + +static int i915_guc_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc guc; + struct i915_guc_client client = {}; + struct intel_engine_cs *ring; + enum intel_ring_id i; + u64 total = 0; + + if (!HAS_GUC_SCHED(dev_priv->dev)) + return 0; + + /* Take a local copy of the GuC data, so we can dump it at leisure */ + spin_lock(&dev_priv->guc.host2guc_lock); + guc = dev_priv->guc; + if (guc.execbuf_client) { + spin_lock(&guc.execbuf_client->wq_lock); + client = *guc.execbuf_client; + spin_unlock(&guc.execbuf_client->wq_lock); + } + spin_unlock(&dev_priv->guc.host2guc_lock); + + seq_printf(m, "GuC total action count: %llu\n", guc.action_count); + seq_printf(m, "GuC action failure count: %u\n", guc.action_fail); + seq_printf(m, "GuC last action command: 0x%x\n", guc.action_cmd); + seq_printf(m, "GuC last action status: 0x%x\n", guc.action_status); + seq_printf(m, "GuC last action error code: %d\n", guc.action_err); + + seq_printf(m, "\nGuC submissions:\n"); + for_each_ring(ring, dev_priv, i) { + seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x %9d\n", + ring->name, guc.submissions[i], + guc.last_seqno[i], guc.last_seqno[i]); + total += guc.submissions[i]; + } + seq_printf(m, "\t%s: %llu\n", "Total", total); + + seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client); + i915_guc_client_info(m, dev_priv, &client); + + /* Add more as required ... */ + + return 0; +} + +static int i915_guc_log_dump(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_object *log_obj = dev_priv->guc.log_obj; + u32 *log; + int i = 0, pg; + + if (!log_obj) + return 0; + + for (pg = 0; pg < log_obj->base.size / PAGE_SIZE; pg++) { + log = kmap_atomic(i915_gem_object_get_page(log_obj, pg)); + + for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4) + seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", + *(log + i), *(log + i + 1), + *(log + i + 2), *(log + i + 3)); + + kunmap_atomic(log); + } + + seq_putc(m, '\n'); + + return 0; +} + static int i915_edp_psr_status(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; @@ -4807,7 +4945,7 @@ static void cherryview_sseu_device_status(struct drm_device *dev, struct sseu_dev_status *stat) { struct drm_i915_private *dev_priv = dev->dev_private; - const int ss_max = 2; + int ss_max = 2; int ss; u32 sig1[ss_max], sig2[ss_max]; @@ -5033,6 +5171,9 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_gem_hws_bsd", i915_hws_info, 0, (void *)VCS}, {"i915_gem_hws_vebox", i915_hws_info, 0, (void *)VECS}, {"i915_gem_batch_pool", i915_gem_batch_pool_info, 0}, + {"i915_guc_info", i915_guc_info, 0}, + {"i915_guc_load_status", i915_guc_load_status_info, 0}, + {"i915_guc_log_dump", i915_guc_log_dump, 0}, {"i915_frequency_info", i915_frequency_info, 0}, {"i915_hangcheck_info", i915_hangcheck_info, 0}, {"i915_drpc_info", i915_drpc_info, 0}, diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index ab37d1121be8..f0eaa6f8826b 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -364,12 +364,12 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_ dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* i915 resume handler doesn't set to D0 */ pci_set_power_state(dev->pdev, PCI_D0); - i915_resume_legacy(dev); + i915_resume_switcheroo(dev); dev->switch_power_state = DRM_SWITCH_POWER_ON; } else { pr_err("switched off\n"); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; - i915_suspend_legacy(dev, pmm); + i915_suspend_switcheroo(dev, pmm); dev->switch_power_state = DRM_SWITCH_POWER_OFF; } } @@ -435,6 +435,11 @@ static int i915_load_modeset_init(struct drm_device *dev) * working irqs for e.g. gmbus and dp aux transfers. */ intel_modeset_init(dev); + /* intel_guc_ucode_init() needs the mutex to allocate GEM objects */ + mutex_lock(&dev->struct_mutex); + intel_guc_ucode_init(dev); + mutex_unlock(&dev->struct_mutex); + ret = i915_gem_init(dev); if (ret) goto cleanup_irq; @@ -476,6 +481,9 @@ cleanup_gem: i915_gem_context_fini(dev); mutex_unlock(&dev->struct_mutex); cleanup_irq: + mutex_lock(&dev->struct_mutex); + intel_guc_ucode_fini(dev); + mutex_unlock(&dev->struct_mutex); drm_irq_uninstall(dev); cleanup_gem_stolen: i915_gem_cleanup_stolen(dev); @@ -791,6 +799,24 @@ static void intel_device_info_runtime_init(struct drm_device *dev) info->has_eu_pg ? "y" : "n"); } +static void intel_init_dpio(struct drm_i915_private *dev_priv) +{ + if (!IS_VALLEYVIEW(dev_priv)) + return; + + /* + * IOSF_PORT_DPIO is used for VLV x2 PHY (DP/HDMI B and C), + * CHV x1 PHY (DP/HDMI D) + * IOSF_PORT_DPIO_2 is used for CHV x2 PHY (DP/HDMI B and C) + */ + if (IS_CHERRYVIEW(dev_priv)) { + DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO_2; + DPIO_PHY_IOSF_PORT(DPIO_PHY1) = IOSF_PORT_DPIO; + } else { + DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO; + } +} + /** * i915_driver_load - setup chip and create an initial config * @dev: DRM device @@ -991,6 +1017,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) intel_device_info_runtime_init(dev); + intel_init_dpio(dev_priv); + if (INTEL_INFO(dev)->num_pipes) { ret = drm_vblank_init(dev, INTEL_INFO(dev)->num_pipes); if (ret) @@ -1128,6 +1156,7 @@ int i915_driver_unload(struct drm_device *dev) flush_workqueue(dev_priv->wq); mutex_lock(&dev->struct_mutex); + intel_guc_ucode_fini(dev); i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 8edcec8ae592..4737d15de5f0 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -362,6 +362,7 @@ static const struct intel_device_info intel_skylake_info = { .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, .has_llc = 1, .has_ddi = 1, + .has_fpga_dbg = 1, .has_fbc = 1, GEN_DEFAULT_PIPEOFFSETS, IVB_CURSOR_OFFSETS, @@ -374,6 +375,7 @@ static const struct intel_device_info intel_skylake_gt3_info = { .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, + .has_fpga_dbg = 1, .has_fbc = 1, GEN_DEFAULT_PIPEOFFSETS, IVB_CURSOR_OFFSETS, @@ -386,6 +388,7 @@ static const struct intel_device_info intel_broxton_info = { .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, .num_pipes = 3, .has_ddi = 1, + .has_fpga_dbg = 1, .has_fbc = 1, GEN_DEFAULT_PIPEOFFSETS, IVB_CURSOR_OFFSETS, @@ -679,7 +682,7 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation) return 0; } -int i915_suspend_legacy(struct drm_device *dev, pm_message_t state) +int i915_suspend_switcheroo(struct drm_device *dev, pm_message_t state) { int error; @@ -812,7 +815,7 @@ static int i915_drm_resume_early(struct drm_device *dev) return ret; } -int i915_resume_legacy(struct drm_device *dev) +int i915_resume_switcheroo(struct drm_device *dev) { int ret; @@ -1649,7 +1652,7 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME | - DRIVER_RENDER, + DRIVER_RENDER | DRIVER_MODESET, .load = i915_driver_load, .unload = i915_driver_unload, .open = i915_driver_open, @@ -1658,10 +1661,6 @@ static struct drm_driver driver = { .postclose = i915_driver_postclose, .set_busid = drm_pci_set_busid, - /* Used in place of i915_pm_ops for non-DRIVER_MODESET */ - .suspend = i915_suspend_legacy, - .resume = i915_resume_legacy, - #if defined(CONFIG_DEBUG_FS) .debugfs_init = i915_debugfs_init, .debugfs_cleanup = i915_debugfs_cleanup, @@ -1704,7 +1703,6 @@ static int __init i915_init(void) * either the i915.modeset prarameter or by the * vga_text_mode_force boot option. */ - driver.driver_features |= DRIVER_MODESET; if (i915.modeset == 0) driver.driver_features &= ~DRIVER_MODESET; @@ -1715,18 +1713,12 @@ static int __init i915_init(void) #endif if (!(driver.driver_features & DRIVER_MODESET)) { - driver.get_vblank_timestamp = NULL; /* Silently fail loading to not upset userspace. */ DRM_DEBUG_DRIVER("KMS and UMS disabled.\n"); return 0; } - /* - * FIXME: Note that we're lying to the DRM core here so that we can get access - * to the atomic ioctl and the atomic properties. Only plane operations on - * a single CRTC will actually work. - */ - if (driver.driver_features & DRIVER_MODESET) + if (i915.nuclear_pageflip) driver.driver_features |= DRIVER_ATOMIC; return drm_pci_init(&driver, &i915_pci_driver); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e304d4e5ae0c..4eabe19a684f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -50,13 +50,14 @@ #include <linux/intel-iommu.h> #include <linux/kref.h> #include <linux/pm_qos.h> +#include "intel_guc.h" /* General customization: */ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20150731" +#define DRIVER_DATE "20150828" #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ @@ -67,11 +68,11 @@ BUILD_BUG_ON(__i915_warn_cond); \ WARN(__i915_warn_cond, "WARN_ON(" #x ")"); }) #else -#define WARN_ON(x) WARN((x), "WARN_ON(" #x ")") +#define WARN_ON(x) WARN((x), "WARN_ON(%s)", #x ) #endif #undef WARN_ON_ONCE -#define WARN_ON_ONCE(x) WARN_ONCE((x), "WARN_ON_ONCE(" #x ")") +#define WARN_ON_ONCE(x) WARN_ONCE((x), "WARN_ON_ONCE(%s)", #x ) #define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \ (long) (x), __func__); @@ -105,6 +106,11 @@ unlikely(__ret_warn_on); \ }) +static inline const char *yesno(bool v) +{ + return v ? "yes" : "no"; +} + enum pipe { INVALID_PIPE = -1, PIPE_A = 0, @@ -549,7 +555,7 @@ struct drm_i915_error_state { struct drm_i915_error_object { int page_count; - u32 gtt_offset; + u64 gtt_offset; u32 *pages[0]; } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; @@ -575,7 +581,7 @@ struct drm_i915_error_state { u32 size; u32 name; u32 rseqno[I915_NUM_RINGS], wseqno; - u32 gtt_offset; + u64 gtt_offset; u32 read_domains; u32 write_domain; s32 fence_reg:I915_MAX_NUM_FENCE_BITS; @@ -1693,7 +1699,7 @@ struct i915_execbuffer_params { struct drm_file *file; uint32_t dispatch_flags; uint32_t args_batch_start_offset; - uint32_t batch_obj_vm_offset; + uint64_t batch_obj_vm_offset; struct intel_engine_cs *ring; struct drm_i915_gem_object *batch_obj; struct intel_context *ctx; @@ -1716,6 +1722,8 @@ struct drm_i915_private { struct i915_virtual_gpu vgpu; + struct intel_guc guc; + struct intel_csr csr; /* Display CSR-related protection */ @@ -1796,6 +1804,7 @@ struct drm_i915_private { unsigned int fsb_freq, mem_freq, is_ddr3; unsigned int skl_boot_cdclk; unsigned int cdclk_freq, max_cdclk_freq; + unsigned int max_dotclk_freq; unsigned int hpll_freq; /** @@ -1960,6 +1969,11 @@ static inline struct drm_i915_private *dev_to_i915(struct device *dev) return to_i915(dev_get_drvdata(dev)); } +static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) +{ + return container_of(guc, struct drm_i915_private, guc); +} + /* Iterate over initialised rings */ #define for_each_ring(ring__, dev_priv__, i__) \ for ((i__) = 0; (i__) < I915_NUM_RINGS; (i__)++) \ @@ -2517,7 +2531,8 @@ struct drm_i915_cmd_table { #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 8) #define USES_PPGTT(dev) (i915.enable_ppgtt) -#define USES_FULL_PPGTT(dev) (i915.enable_ppgtt == 2) +#define USES_FULL_PPGTT(dev) (i915.enable_ppgtt >= 2) +#define USES_FULL_48BIT_PPGTT(dev) (i915.enable_ppgtt == 3) #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) @@ -2563,6 +2578,9 @@ struct drm_i915_cmd_table { #define HAS_CSR(dev) (IS_SKYLAKE(dev)) +#define HAS_GUC_UCODE(dev) (IS_GEN9(dev)) +#define HAS_GUC_SCHED(dev) (IS_GEN9(dev)) + #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \ INTEL_INFO(dev)->gen >= 8) @@ -2600,8 +2618,8 @@ struct drm_i915_cmd_table { extern const struct drm_ioctl_desc i915_ioctls[]; extern int i915_max_ioctl; -extern int i915_suspend_legacy(struct drm_device *dev, pm_message_t state); -extern int i915_resume_legacy(struct drm_device *dev); +extern int i915_suspend_switcheroo(struct drm_device *dev, pm_message_t state); +extern int i915_resume_switcheroo(struct drm_device *dev); /* i915_params.c */ struct i915_params { @@ -2634,6 +2652,7 @@ struct i915_params { int use_mmio_flip; int mmio_debug; bool verbose_state_checks; + bool nuclear_pageflip; int edp_vswing; }; extern struct i915_params i915 __read_mostly; @@ -2983,13 +3002,11 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags); -unsigned long -i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view); -unsigned long -i915_gem_obj_offset(struct drm_i915_gem_object *o, - struct i915_address_space *vm); -static inline unsigned long +u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, + const struct i915_ggtt_view *view); +u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, + struct i915_address_space *vm); +static inline u64 i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o) { return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 84f91bcc12f7..41263cd4170c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1005,12 +1005,14 @@ out: if (!needs_clflush_after && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { if (i915_gem_clflush_object(obj, obj->pin_display)) - i915_gem_chipset_flush(dev); + needs_clflush_after = true; } } if (needs_clflush_after) i915_gem_chipset_flush(dev); + else + obj->cache_dirty = true; intel_fb_obj_flush(obj, false, ORIGIN_CPU); return ret; @@ -3355,7 +3357,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - u32 size, fence_size, fence_alignment, unfenced_alignment; + u32 fence_alignment, unfenced_alignment; + u64 size, fence_size; u64 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; u64 end = @@ -3414,7 +3417,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, * attempt to find space. */ if (size > end) { - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%llu\n", + DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", ggtt_view ? ggtt_view->type : 0, size, flags & PIN_MAPPABLE ? "mappable" : "total", @@ -3638,10 +3641,10 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, { struct drm_device *dev = obj->base.dev; struct i915_vma *vma, *next; - int ret; + int ret = 0; if (obj->cache_level == cache_level) - return 0; + goto out; if (i915_gem_obj_is_pinned(obj)) { DRM_DEBUG("can not change the cache level of pinned objects\n"); @@ -3686,6 +3689,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, vma->node.color = cache_level; obj->cache_level = cache_level; +out: if (obj->cache_dirty && obj->base.write_domain != I915_GEM_DOMAIN_CPU && cpu_write_needs_clflush(obj)) { @@ -3738,6 +3742,15 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, level = I915_CACHE_NONE; break; case I915_CACHING_CACHED: + /* + * Due to a HW issue on BXT A stepping, GPU stores via a + * snooped mapping may leave stale data in a corresponding CPU + * cacheline, whereas normally such cachelines would get + * invalidated. + */ + if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) + return -ENODEV; + level = I915_CACHE_LLC; break; case I915_CACHING_DISPLAY: @@ -4011,15 +4024,13 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, return -EBUSY; if (i915_vma_misplaced(vma, alignment, flags)) { - unsigned long offset; - offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view) : - i915_gem_obj_offset(obj, vm); WARN(vma->pin_count, "bo is already pinned in %s with incorrect alignment:" - " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," + " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," " obj->map_and_fenceable=%d\n", ggtt_view ? "ggtt" : "ppgtt", - offset, + upper_32_bits(vma->node.start), + lower_32_bits(vma->node.start), alignment, !!(flags & PIN_MAPPABLE), obj->map_and_fenceable); @@ -4679,6 +4690,22 @@ i915_gem_init_hw(struct drm_device *dev) goto out; } + /* We can't enable contexts until all firmware is loaded */ + ret = intel_guc_ucode_load(dev); + if (ret) { + /* + * If we got an error and GuC submission is enabled, map + * the error to -EIO so the GPU will be declared wedged. + * OTOH, if we didn't intend to use the GuC anyway, just + * discard the error and carry on. + */ + DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret, + i915.enable_guc_submission ? "" : " (ignored)"); + ret = i915.enable_guc_submission ? -EIO : 0; + if (ret) + goto out; + } + /* Now it is safe to go back round and do everything else: */ for_each_ring(ring, dev_priv, i) { struct drm_i915_gem_request *req; @@ -4974,9 +5001,8 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, } /* All the new VM stuff */ -unsigned long -i915_gem_obj_offset(struct drm_i915_gem_object *o, - struct i915_address_space *vm) +u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, + struct i915_address_space *vm) { struct drm_i915_private *dev_priv = o->base.dev->dev_private; struct i915_vma *vma; @@ -4996,9 +5022,8 @@ i915_gem_obj_offset(struct drm_i915_gem_object *o, return -1; } -unsigned long -i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view) +u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, + const struct i915_ggtt_view *view) { struct i915_address_space *ggtt = i915_obj_to_ggtt(o); struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8e893b354bcc..74aa0c9929ba 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -332,6 +332,13 @@ int i915_gem_context_init(struct drm_device *dev) if (WARN_ON(dev_priv->ring[RCS].default_context)) return 0; + if (intel_vgpu_active(dev) && HAS_LOGICAL_RING_CONTEXTS(dev)) { + if (!i915.enable_execlists) { + DRM_INFO("Only EXECLIST mode is supported in vgpu.\n"); + return -EINVAL; + } + } + if (i915.enable_execlists) { /* NB: intentionally left blank. We will allocate our own * backing objects as we need them, thank you very much */ diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index af1f8c461060..6077dffb318a 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -128,7 +128,7 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg, WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || (size & -size) != size || (i915_gem_obj_ggtt_offset(obj) & (size - 1)), - "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", + "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) @@ -171,7 +171,7 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg, WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || (size & -size) != size || (i915_gem_obj_ggtt_offset(obj) & (size - 1)), - "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", + "object 0x%08llx not 512K or pot-size 0x%08x aligned\n", i915_gem_obj_ggtt_offset(obj), size); pitch_val = obj->stride / 128; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 96054a560f4f..bdb7adc7359e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -204,6 +204,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, return pde; } +#define gen8_pdpe_encode gen8_pde_encode +#define gen8_pml4e_encode gen8_pde_encode + static gen6_pte_t snb_pte_encode(dma_addr_t addr, enum i915_cache_level level, bool valid, u32 unused) @@ -522,6 +525,127 @@ static void gen8_initialize_pd(struct i915_address_space *vm, fill_px(vm->dev, pd, scratch_pde); } +static int __pdp_init(struct drm_device *dev, + struct i915_page_directory_pointer *pdp) +{ + size_t pdpes = I915_PDPES_PER_PDP(dev); + + pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), + sizeof(unsigned long), + GFP_KERNEL); + if (!pdp->used_pdpes) + return -ENOMEM; + + pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), + GFP_KERNEL); + if (!pdp->page_directory) { + kfree(pdp->used_pdpes); + /* the PDP might be the statically allocated top level. Keep it + * as clean as possible */ + pdp->used_pdpes = NULL; + return -ENOMEM; + } + + return 0; +} + +static void __pdp_fini(struct i915_page_directory_pointer *pdp) +{ + kfree(pdp->used_pdpes); + kfree(pdp->page_directory); + pdp->page_directory = NULL; +} + +static struct +i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) +{ + struct i915_page_directory_pointer *pdp; + int ret = -ENOMEM; + + WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); + + pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); + if (!pdp) + return ERR_PTR(-ENOMEM); + + ret = __pdp_init(dev, pdp); + if (ret) + goto fail_bitmap; + + ret = setup_px(dev, pdp); + if (ret) + goto fail_page_m; + + return pdp; + +fail_page_m: + __pdp_fini(pdp); +fail_bitmap: + kfree(pdp); + + return ERR_PTR(ret); +} + +static void free_pdp(struct drm_device *dev, + struct i915_page_directory_pointer *pdp) +{ + __pdp_fini(pdp); + if (USES_FULL_48BIT_PPGTT(dev)) { + cleanup_px(dev, pdp); + kfree(pdp); + } +} + +static void gen8_initialize_pdp(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp) +{ + gen8_ppgtt_pdpe_t scratch_pdpe; + + scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); + + fill_px(vm->dev, pdp, scratch_pdpe); +} + +static void gen8_initialize_pml4(struct i915_address_space *vm, + struct i915_pml4 *pml4) +{ + gen8_ppgtt_pml4e_t scratch_pml4e; + + scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), + I915_CACHE_LLC); + + fill_px(vm->dev, pml4, scratch_pml4e); +} + +static void +gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, + struct i915_page_directory_pointer *pdp, + struct i915_page_directory *pd, + int index) +{ + gen8_ppgtt_pdpe_t *page_directorypo; + + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + return; + + page_directorypo = kmap_px(pdp); + page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); + kunmap_px(ppgtt, page_directorypo); +} + +static void +gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, + struct i915_pml4 *pml4, + struct i915_page_directory_pointer *pdp, + int index) +{ + gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); + + WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); + pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); + kunmap_px(ppgtt, pagemap); +} + /* Broadwell Page Directory Pointer Descriptors */ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, @@ -547,8 +671,8 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, return 0; } -static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) +static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_request *req) { int i, ret; @@ -563,31 +687,38 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, return 0; } -static void gen8_ppgtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length, - bool use_scratch) +static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_request *req) +{ + return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); +} + +static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + uint64_t start, + uint64_t length, + gen8_pte_t scratch_pte) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); - gen8_pte_t *pt_vaddr, scratch_pte; - unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; - unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; - unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; + gen8_pte_t *pt_vaddr; + unsigned pdpe = gen8_pdpe_index(start); + unsigned pde = gen8_pde_index(start); + unsigned pte = gen8_pte_index(start); unsigned num_entries = length >> PAGE_SHIFT; unsigned last_pte, i; - scratch_pte = gen8_pte_encode(px_dma(ppgtt->base.scratch_page), - I915_CACHE_LLC, use_scratch); + if (WARN_ON(!pdp)) + return; while (num_entries) { struct i915_page_directory *pd; struct i915_page_table *pt; - if (WARN_ON(!ppgtt->pdp.page_directory[pdpe])) + if (WARN_ON(!pdp->page_directory[pdpe])) break; - pd = ppgtt->pdp.page_directory[pdpe]; + pd = pdp->page_directory[pdpe]; if (WARN_ON(!pd->page_table[pde])) break; @@ -612,45 +743,69 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, pte = 0; if (++pde == I915_PDES) { - pdpe++; + if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) + break; pde = 0; } } } -static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, - struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, u32 unused) +static void gen8_ppgtt_clear_range(struct i915_address_space *vm, + uint64_t start, + uint64_t length, + bool use_scratch) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), + I915_CACHE_LLC, use_scratch); + + if (!USES_FULL_48BIT_PPGTT(vm->dev)) { + gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length, + scratch_pte); + } else { + uint64_t templ4, pml4e; + struct i915_page_directory_pointer *pdp; + + gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) { + gen8_ppgtt_clear_pte_range(vm, pdp, start, length, + scratch_pte); + } + } +} + +static void +gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + struct sg_page_iter *sg_iter, + uint64_t start, + enum i915_cache_level cache_level) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen8_pte_t *pt_vaddr; - unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; - unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; - unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; - struct sg_page_iter sg_iter; + unsigned pdpe = gen8_pdpe_index(start); + unsigned pde = gen8_pde_index(start); + unsigned pte = gen8_pte_index(start); pt_vaddr = NULL; - for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { - if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES)) - break; - + while (__sg_page_iter_next(sg_iter)) { if (pt_vaddr == NULL) { - struct i915_page_directory *pd = ppgtt->pdp.page_directory[pdpe]; + struct i915_page_directory *pd = pdp->page_directory[pdpe]; struct i915_page_table *pt = pd->page_table[pde]; pt_vaddr = kmap_px(pt); } pt_vaddr[pte] = - gen8_pte_encode(sg_page_iter_dma_address(&sg_iter), + gen8_pte_encode(sg_page_iter_dma_address(sg_iter), cache_level, true); if (++pte == GEN8_PTES) { kunmap_px(ppgtt, pt_vaddr); pt_vaddr = NULL; if (++pde == I915_PDES) { - pdpe++; + if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) + break; pde = 0; } pte = 0; @@ -661,6 +816,33 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, kunmap_px(ppgtt, pt_vaddr); } +static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, + struct sg_table *pages, + uint64_t start, + enum i915_cache_level cache_level, + u32 unused) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + struct sg_page_iter sg_iter; + + __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); + + if (!USES_FULL_48BIT_PPGTT(vm->dev)) { + gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, + cache_level); + } else { + struct i915_page_directory_pointer *pdp; + uint64_t templ4, pml4e; + uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; + + gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) { + gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, + start, cache_level); + } + } +} + static void gen8_free_page_tables(struct drm_device *dev, struct i915_page_directory *pd) { @@ -699,8 +881,55 @@ static int gen8_init_scratch(struct i915_address_space *vm) return PTR_ERR(vm->scratch_pd); } + if (USES_FULL_48BIT_PPGTT(dev)) { + vm->scratch_pdp = alloc_pdp(dev); + if (IS_ERR(vm->scratch_pdp)) { + free_pd(dev, vm->scratch_pd); + free_pt(dev, vm->scratch_pt); + free_scratch_page(dev, vm->scratch_page); + return PTR_ERR(vm->scratch_pdp); + } + } + gen8_initialize_pt(vm, vm->scratch_pt); gen8_initialize_pd(vm, vm->scratch_pd); + if (USES_FULL_48BIT_PPGTT(dev)) + gen8_initialize_pdp(vm, vm->scratch_pdp); + + return 0; +} + +static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) +{ + enum vgt_g2v_type msg; + struct drm_device *dev = ppgtt->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned int offset = vgtif_reg(pdp0_lo); + int i; + + if (USES_FULL_48BIT_PPGTT(dev)) { + u64 daddr = px_dma(&ppgtt->pml4); + + I915_WRITE(offset, lower_32_bits(daddr)); + I915_WRITE(offset + 4, upper_32_bits(daddr)); + + msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : + VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); + } else { + for (i = 0; i < GEN8_LEGACY_PDPES; i++) { + u64 daddr = i915_page_dir_dma_addr(ppgtt, i); + + I915_WRITE(offset, lower_32_bits(daddr)); + I915_WRITE(offset + 4, upper_32_bits(daddr)); + + offset += 8; + } + + msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : + VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); + } + + I915_WRITE(vgtif_reg(g2v_notify), msg); return 0; } @@ -709,35 +938,65 @@ static void gen8_free_scratch(struct i915_address_space *vm) { struct drm_device *dev = vm->dev; + if (USES_FULL_48BIT_PPGTT(dev)) + free_pdp(dev, vm->scratch_pdp); free_pd(dev, vm->scratch_pd); free_pt(dev, vm->scratch_pt); free_scratch_page(dev, vm->scratch_page); } -static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, + struct i915_page_directory_pointer *pdp) { - struct i915_hw_ppgtt *ppgtt = - container_of(vm, struct i915_hw_ppgtt, base); int i; - for_each_set_bit(i, ppgtt->pdp.used_pdpes, GEN8_LEGACY_PDPES) { - if (WARN_ON(!ppgtt->pdp.page_directory[i])) + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { + if (WARN_ON(!pdp->page_directory[i])) continue; - gen8_free_page_tables(ppgtt->base.dev, - ppgtt->pdp.page_directory[i]); - free_pd(ppgtt->base.dev, ppgtt->pdp.page_directory[i]); + gen8_free_page_tables(dev, pdp->page_directory[i]); + free_pd(dev, pdp->page_directory[i]); } + free_pdp(dev, pdp); +} + +static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) +{ + int i; + + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { + if (WARN_ON(!ppgtt->pml4.pdps[i])) + continue; + + gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); + } + + cleanup_px(ppgtt->base.dev, &ppgtt->pml4); +} + +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + + if (intel_vgpu_active(vm->dev)) + gen8_ppgtt_notify_vgt(ppgtt, false); + + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); + else + gen8_ppgtt_cleanup_4lvl(ppgtt); + gen8_free_scratch(vm); } /** * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. - * @ppgtt: Master ppgtt structure. - * @pd: Page directory for this address range. + * @vm: Master vm structure. + * @pd: Page directory for this address range. * @start: Starting virtual address to begin allocations. - * @length Size of the allocations. + * @length: Size of the allocations. * @new_pts: Bitmap set by function with new allocations. Likely used by the * caller to free on error. * @@ -750,22 +1009,22 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) * * Return: 0 if success; negative error code otherwise. */ -static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt, +static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, struct i915_page_directory *pd, uint64_t start, uint64_t length, unsigned long *new_pts) { - struct drm_device *dev = ppgtt->base.dev; + struct drm_device *dev = vm->dev; struct i915_page_table *pt; uint64_t temp; uint32_t pde; gen8_for_each_pde(pt, pd, start, length, temp, pde) { /* Don't reallocate page tables */ - if (pt) { + if (test_bit(pde, pd->used_pdes)) { /* Scratch is never allocated this way */ - WARN_ON(pt == ppgtt->base.scratch_pt); + WARN_ON(pt == vm->scratch_pt); continue; } @@ -773,9 +1032,10 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt, if (IS_ERR(pt)) goto unwind_out; - gen8_initialize_pt(&ppgtt->base, pt); + gen8_initialize_pt(vm, pt); pd->page_table[pde] = pt; __set_bit(pde, new_pts); + trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); } return 0; @@ -789,11 +1049,11 @@ unwind_out: /** * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. - * @ppgtt: Master ppgtt structure. + * @vm: Master vm structure. * @pdp: Page directory pointer for this address range. * @start: Starting virtual address to begin allocations. - * @length Size of the allocations. - * @new_pds Bitmap set by function with new allocations. Likely used by the + * @length: Size of the allocations. + * @new_pds: Bitmap set by function with new allocations. Likely used by the * caller to free on error. * * Allocate the required number of page directories starting at the pde index of @@ -810,47 +1070,106 @@ unwind_out: * * Return: 0 if success; negative error code otherwise. */ -static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt, - struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length, - unsigned long *new_pds) +static int +gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + uint64_t start, + uint64_t length, + unsigned long *new_pds) { - struct drm_device *dev = ppgtt->base.dev; + struct drm_device *dev = vm->dev; struct i915_page_directory *pd; uint64_t temp; uint32_t pdpe; + uint32_t pdpes = I915_PDPES_PER_PDP(dev); - WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES)); + WARN_ON(!bitmap_empty(new_pds, pdpes)); gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) { - if (pd) + if (test_bit(pdpe, pdp->used_pdpes)) continue; pd = alloc_pd(dev); if (IS_ERR(pd)) goto unwind_out; - gen8_initialize_pd(&ppgtt->base, pd); + gen8_initialize_pd(vm, pd); pdp->page_directory[pdpe] = pd; __set_bit(pdpe, new_pds); + trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); } return 0; unwind_out: - for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES) + for_each_set_bit(pdpe, new_pds, pdpes) free_pd(dev, pdp->page_directory[pdpe]); return -ENOMEM; } +/** + * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. + * @vm: Master vm structure. + * @pml4: Page map level 4 for this address range. + * @start: Starting virtual address to begin allocations. + * @length: Size of the allocations. + * @new_pdps: Bitmap set by function with new allocations. Likely used by the + * caller to free on error. + * + * Allocate the required number of page directory pointers. Extremely similar to + * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). + * The main difference is here we are limited by the pml4 boundary (instead of + * the page directory pointer). + * + * Return: 0 if success; negative error code otherwise. + */ +static int +gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, + struct i915_pml4 *pml4, + uint64_t start, + uint64_t length, + unsigned long *new_pdps) +{ + struct drm_device *dev = vm->dev; + struct i915_page_directory_pointer *pdp; + uint64_t temp; + uint32_t pml4e; + + WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); + + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { + if (!test_bit(pml4e, pml4->used_pml4es)) { + pdp = alloc_pdp(dev); + if (IS_ERR(pdp)) + goto unwind_out; + + gen8_initialize_pdp(vm, pdp); + pml4->pdps[pml4e] = pdp; + __set_bit(pml4e, new_pdps); + trace_i915_page_directory_pointer_entry_alloc(vm, + pml4e, + start, + GEN8_PML4E_SHIFT); + } + } + + return 0; + +unwind_out: + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) + free_pdp(dev, pml4->pdps[pml4e]); + + return -ENOMEM; +} + static void -free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts) +free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts, + uint32_t pdpes) { int i; - for (i = 0; i < GEN8_LEGACY_PDPES; i++) + for (i = 0; i < pdpes; i++) kfree(new_pts[i]); kfree(new_pts); kfree(new_pds); @@ -861,23 +1180,24 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts) */ static int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, - unsigned long ***new_pts) + unsigned long ***new_pts, + uint32_t pdpes) { int i; unsigned long *pds; unsigned long **pts; - pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL); + pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_KERNEL); if (!pds) return -ENOMEM; - pts = kcalloc(GEN8_LEGACY_PDPES, sizeof(unsigned long *), GFP_KERNEL); + pts = kcalloc(pdpes, sizeof(unsigned long *), GFP_KERNEL); if (!pts) { kfree(pds); return -ENOMEM; } - for (i = 0; i < GEN8_LEGACY_PDPES; i++) { + for (i = 0; i < pdpes; i++) { pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES), sizeof(unsigned long), GFP_KERNEL); if (!pts[i]) @@ -890,7 +1210,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, return 0; err_out: - free_gen8_temp_bitmaps(pds, pts); + free_gen8_temp_bitmaps(pds, pts, pdpes); return -ENOMEM; } @@ -904,18 +1224,21 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; } -static int gen8_alloc_va_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) +static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + uint64_t start, + uint64_t length) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); unsigned long *new_page_dirs, **new_page_tables; + struct drm_device *dev = vm->dev; struct i915_page_directory *pd; const uint64_t orig_start = start; const uint64_t orig_length = length; uint64_t temp; uint32_t pdpe; + uint32_t pdpes = I915_PDPES_PER_PDP(dev); int ret; /* Wrap is never okay since we can only represent 48b, and we don't @@ -924,24 +1247,24 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, if (WARN_ON(start + length < start)) return -ENODEV; - if (WARN_ON(start + length > ppgtt->base.total)) + if (WARN_ON(start + length > vm->total)) return -ENODEV; - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables); + ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); if (ret) return ret; /* Do the allocations first so we can easily bail out */ - ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp, start, length, - new_page_dirs); + ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, + new_page_dirs); if (ret) { - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); return ret; } /* For every page directory referenced, allocate page tables */ - gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) { - ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length, + gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) { + ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, new_page_tables[pdpe]); if (ret) goto err_out; @@ -952,10 +1275,10 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, /* Allocations have completed successfully, so set the bitmaps, and do * the mappings. */ - gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) { + gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) { gen8_pde_t *const page_directory = kmap_px(pd); struct i915_page_table *pt; - uint64_t pd_len = gen8_clamp_pd(start, length); + uint64_t pd_len = length; uint64_t pd_start = start; uint32_t pde; @@ -979,34 +1302,210 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, /* Map the PDE to the page table */ page_directory[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC); + trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, + gen8_pte_index(start), + gen8_pte_count(start, length), + GEN8_PTES); /* NB: We haven't yet mapped ptes to pages. At this * point we're still relying on insert_entries() */ } kunmap_px(ppgtt, page_directory); - - __set_bit(pdpe, ppgtt->pdp.used_pdpes); + __set_bit(pdpe, pdp->used_pdpes); + gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); } - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); mark_tlbs_dirty(ppgtt); return 0; err_out: while (pdpe--) { for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES) - free_pt(vm->dev, ppgtt->pdp.page_directory[pdpe]->page_table[temp]); + free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]); } - for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES) - free_pd(vm->dev, ppgtt->pdp.page_directory[pdpe]); + for_each_set_bit(pdpe, new_page_dirs, pdpes) + free_pd(dev, pdp->page_directory[pdpe]); - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); mark_tlbs_dirty(ppgtt); return ret; } +static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, + struct i915_pml4 *pml4, + uint64_t start, + uint64_t length) +{ + DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + struct i915_page_directory_pointer *pdp; + uint64_t temp, pml4e; + int ret = 0; + + /* Do the pml4 allocations first, so we don't need to track the newly + * allocated tables below the pdp */ + bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); + + /* The pagedirectory and pagetable allocations are done in the shared 3 + * and 4 level code. Just allocate the pdps. + */ + ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, + new_pdps); + if (ret) + return ret; + + WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, + "The allocation has spanned more than 512GB. " + "It is highly likely this is incorrect."); + + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { + WARN_ON(!pdp); + + ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); + if (ret) + goto err_out; + + gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); + } + + bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, + GEN8_PML4ES_PER_PML4); + + return 0; + +err_out: + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) + gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); + + return ret; +} + +static int gen8_alloc_va_range(struct i915_address_space *vm, + uint64_t start, uint64_t length) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + + if (USES_FULL_48BIT_PPGTT(vm->dev)) + return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); + else + return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); +} + +static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, + uint64_t start, uint64_t length, + gen8_pte_t scratch_pte, + struct seq_file *m) +{ + struct i915_page_directory *pd; + uint64_t temp; + uint32_t pdpe; + + gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) { + struct i915_page_table *pt; + uint64_t pd_len = length; + uint64_t pd_start = start; + uint32_t pde; + + if (!test_bit(pdpe, pdp->used_pdpes)) + continue; + + seq_printf(m, "\tPDPE #%d\n", pdpe); + gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) { + uint32_t pte; + gen8_pte_t *pt_vaddr; + + if (!test_bit(pde, pd->used_pdes)) + continue; + + pt_vaddr = kmap_px(pt); + for (pte = 0; pte < GEN8_PTES; pte += 4) { + uint64_t va = + (pdpe << GEN8_PDPE_SHIFT) | + (pde << GEN8_PDE_SHIFT) | + (pte << GEN8_PTE_SHIFT); + int i; + bool found = false; + + for (i = 0; i < 4; i++) + if (pt_vaddr[pte + i] != scratch_pte) + found = true; + if (!found) + continue; + + seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); + for (i = 0; i < 4; i++) { + if (pt_vaddr[pte + i] != scratch_pte) + seq_printf(m, " %llx", pt_vaddr[pte + i]); + else + seq_puts(m, " SCRATCH "); + } + seq_puts(m, "\n"); + } + /* don't use kunmap_px, it could trigger + * an unnecessary flush. + */ + kunmap_atomic(pt_vaddr); + } + } +} + +static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) +{ + struct i915_address_space *vm = &ppgtt->base; + uint64_t start = ppgtt->base.start; + uint64_t length = ppgtt->base.total; + gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), + I915_CACHE_LLC, true); + + if (!USES_FULL_48BIT_PPGTT(vm->dev)) { + gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); + } else { + uint64_t templ4, pml4e; + struct i915_pml4 *pml4 = &ppgtt->pml4; + struct i915_page_directory_pointer *pdp; + + gen8_for_each_pml4e(pdp, pml4, start, length, templ4, pml4e) { + if (!test_bit(pml4e, pml4->used_pml4es)) + continue; + + seq_printf(m, " PML4E #%llu\n", pml4e); + gen8_dump_pdp(pdp, start, length, scratch_pte, m); + } + } +} + +static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) +{ + unsigned long *new_page_dirs, **new_page_tables; + uint32_t pdpes = I915_PDPES_PER_PDP(dev); + int ret; + + /* We allocate temp bitmap for page tables for no gain + * but as this is for init only, lets keep the things simple + */ + ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); + if (ret) + return ret; + + /* Allocate for all pdps regardless of how the ppgtt + * was defined. + */ + ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, + 0, 1ULL << 32, + new_page_dirs); + if (!ret) + *ppgtt->pdp.used_pdpes = *new_page_dirs; + + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); + + return ret; +} + /* * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -1023,24 +1522,49 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return ret; ppgtt->base.start = 0; - ppgtt->base.total = 1ULL << 32; - if (IS_ENABLED(CONFIG_X86_32)) - /* While we have a proliferation of size_t variables - * we cannot represent the full ppgtt size on 32bit, - * so limit it to the same size as the GGTT (currently - * 2GiB). - */ - ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.allocate_va_range = gen8_alloc_va_range; ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; ppgtt->base.clear_range = gen8_ppgtt_clear_range; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->debug_dump = gen8_dump_ppgtt; + + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); + if (ret) + goto free_scratch; - ppgtt->switch_mm = gen8_mm_switch; + gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); + + ppgtt->base.total = 1ULL << 48; + ppgtt->switch_mm = gen8_48b_mm_switch; + } else { + ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp); + if (ret) + goto free_scratch; + + ppgtt->base.total = 1ULL << 32; + ppgtt->switch_mm = gen8_legacy_mm_switch; + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, + 0, 0, + GEN8_PML4E_SHIFT); + + if (intel_vgpu_active(ppgtt->base.dev)) { + ret = gen8_preallocate_top_level_pdps(ppgtt); + if (ret) + goto free_scratch; + } + } + + if (intel_vgpu_active(ppgtt->base.dev)) + gen8_ppgtt_notify_vgt(ppgtt, true); return 0; + +free_scratch: + gen8_free_scratch(&ppgtt->base); + return ret; } static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) @@ -1228,8 +1752,9 @@ static void gen8_ppgtt_enable(struct drm_device *dev) int j; for_each_ring(ring, dev_priv, j) { + u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0; I915_WRITE(RING_MODE_GEN7(ring), - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); } } @@ -2084,9 +2609,9 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node, } static int i915_gem_setup_global_gtt(struct drm_device *dev, - unsigned long start, - unsigned long mappable_end, - unsigned long end) + u64 start, + u64 mappable_end, + u64 end) { /* Let GEM Manage all of the aperture. * @@ -2125,7 +2650,7 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); - DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", + DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n", i915_gem_obj_ggtt_offset(obj), obj->base.size); WARN_ON(i915_gem_obj_ggtt_bound(obj)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index e1cfa292f9ad..82750073d5b3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -39,6 +39,8 @@ struct drm_i915_file_private; typedef uint32_t gen6_pte_t; typedef uint64_t gen8_pte_t; typedef uint64_t gen8_pde_t; +typedef uint64_t gen8_ppgtt_pdpe_t; +typedef uint64_t gen8_ppgtt_pml4e_t; #define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT) @@ -88,9 +90,18 @@ typedef uint64_t gen8_pde_t; * PDPE | PDE | PTE | offset * The difference as compared to normal x86 3 level page table is the PDPEs are * programmed via register. + * + * GEN8 48b legacy style address is defined as a 4 level page table: + * 47:39 | 38:30 | 29:21 | 20:12 | 11:0 + * PML4E | PDPE | PDE | PTE | offset */ +#define GEN8_PML4ES_PER_PML4 512 +#define GEN8_PML4E_SHIFT 39 +#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1) #define GEN8_PDPE_SHIFT 30 -#define GEN8_PDPE_MASK 0x3 +/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page + * tables */ +#define GEN8_PDPE_MASK 0x1ff #define GEN8_PDE_SHIFT 21 #define GEN8_PDE_MASK 0x1ff #define GEN8_PTE_SHIFT 12 @@ -98,6 +109,9 @@ typedef uint64_t gen8_pde_t; #define GEN8_LEGACY_PDPES 4 #define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) +#define I915_PDPES_PER_PDP(dev) (USES_FULL_48BIT_PPGTT(dev) ?\ + GEN8_PML4ES_PER_PML4 : GEN8_LEGACY_PDPES) + #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ @@ -135,7 +149,7 @@ struct i915_ggtt_view { union { struct { - unsigned long offset; + u64 offset; unsigned int size; } partial; } params; @@ -241,9 +255,17 @@ struct i915_page_directory { }; struct i915_page_directory_pointer { - /* struct page *page; */ - DECLARE_BITMAP(used_pdpes, GEN8_LEGACY_PDPES); - struct i915_page_directory *page_directory[GEN8_LEGACY_PDPES]; + struct i915_page_dma base; + + unsigned long *used_pdpes; + struct i915_page_directory **page_directory; +}; + +struct i915_pml4 { + struct i915_page_dma base; + + DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4); + struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; }; struct i915_address_space { @@ -256,6 +278,7 @@ struct i915_address_space { struct i915_page_scratch *scratch_page; struct i915_page_table *scratch_pt; struct i915_page_directory *scratch_pd; + struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */ /** * List of objects currently involved in rendering. @@ -341,8 +364,9 @@ struct i915_hw_ppgtt { struct drm_mm_node node; unsigned long pd_dirty_rings; union { - struct i915_page_directory_pointer pdp; - struct i915_page_directory pd; + struct i915_pml4 pml4; /* GEN8+ & 48b PPGTT */ + struct i915_page_directory_pointer pdp; /* GEN8+ */ + struct i915_page_directory pd; /* GEN6-7 */ }; struct drm_i915_file_private *file_priv; @@ -436,24 +460,23 @@ static inline uint32_t gen6_pde_index(uint32_t addr) temp = min(temp, length), \ start += temp, length -= temp) -#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ - for (iter = gen8_pdpe_index(start); \ - pd = (pdp)->page_directory[iter], length > 0 && iter < GEN8_LEGACY_PDPES; \ +#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ + for (iter = gen8_pdpe_index(start); \ + pd = (pdp)->page_directory[iter], \ + length > 0 && (iter < I915_PDPES_PER_PDP(dev)); \ iter++, \ temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start, \ temp = min(temp, length), \ start += temp, length -= temp) -/* Clamp length to the next page_directory boundary */ -static inline uint64_t gen8_clamp_pd(uint64_t start, uint64_t length) -{ - uint64_t next_pd = ALIGN(start + 1, 1 << GEN8_PDPE_SHIFT); - - if (next_pd > (start + length)) - return length; - - return next_pd - start; -} +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \ + for (iter = gen8_pml4e_index(start); \ + pdp = (pml4)->pdps[iter], \ + length > 0 && iter < GEN8_PML4ES_PER_PML4; \ + iter++, \ + temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \ + temp = min(temp, length), \ + start += temp, length -= temp) static inline uint32_t gen8_pte_index(uint64_t address) { @@ -472,8 +495,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address) static inline uint32_t gen8_pml4e_index(uint64_t address) { - WARN_ON(1); /* For 64B */ - return 0; + return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; } static inline size_t gen8_pte_count(uint64_t address, uint64_t length) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 8fd431bcdfd3..d11901d590ac 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -813,7 +813,6 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { int i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_userptr *args = data; struct drm_i915_gem_object *obj; int ret; @@ -826,9 +825,6 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file if (offset_in_page(args->user_ptr | args->user_size)) return -EINVAL; - if (args->user_size > dev_priv->gtt.base.total) - return -E2BIG; - if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE, (char __user *)(unsigned long)args->user_ptr, args->user_size)) return -EFAULT; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 41d0739e6fdf..3379f9c1ef88 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -30,11 +30,6 @@ #include <generated/utsrelease.h> #include "i915_drv.h" -static const char *yesno(int v) -{ - return v ? "yes" : "no"; -} - static const char *ring_str(int ring) { switch (ring) { @@ -197,8 +192,9 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_printf(m, " %s [%d]:\n", name, count); while (count--) { - err_printf(m, " %08x %8u %02x %02x [ ", - err->gtt_offset, + err_printf(m, " %08x_%08x %8u %02x %02x [ ", + upper_32_bits(err->gtt_offset), + lower_32_bits(err->gtt_offset), err->size, err->read_domains, err->write_domain); @@ -427,15 +423,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, " (submitted by %s [%d])", error->ring[i].comm, error->ring[i].pid); - err_printf(m, " --- gtt_offset = 0x%08x\n", - obj->gtt_offset); + err_printf(m, " --- gtt_offset = 0x%08x %08x\n", + upper_32_bits(obj->gtt_offset), + lower_32_bits(obj->gtt_offset)); print_error_obj(m, obj); } obj = error->ring[i].wa_batchbuffer; if (obj) { err_printf(m, "%s (w/a) --- gtt_offset = 0x%08x\n", - dev_priv->ring[i].name, obj->gtt_offset); + dev_priv->ring[i].name, + lower_32_bits(obj->gtt_offset)); print_error_obj(m, obj); } @@ -454,22 +452,22 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if ((obj = error->ring[i].ringbuffer)) { err_printf(m, "%s --- ringbuffer = 0x%08x\n", dev_priv->ring[i].name, - obj->gtt_offset); + lower_32_bits(obj->gtt_offset)); print_error_obj(m, obj); } if ((obj = error->ring[i].hws_page)) { - err_printf(m, "%s --- HW Status = 0x%08x\n", - dev_priv->ring[i].name, - obj->gtt_offset); + err_printf(m, "%s --- HW Status = 0x%08llx\n", + dev_priv->ring[i].name, + obj->gtt_offset + LRC_PPHWSP_PN * PAGE_SIZE); offset = 0; for (elt = 0; elt < PAGE_SIZE/16; elt += 4) { err_printf(m, "[%04x] %08x %08x %08x %08x\n", offset, - obj->pages[0][elt], - obj->pages[0][elt+1], - obj->pages[0][elt+2], - obj->pages[0][elt+3]); + obj->pages[LRC_PPHWSP_PN][elt], + obj->pages[LRC_PPHWSP_PN][elt+1], + obj->pages[LRC_PPHWSP_PN][elt+2], + obj->pages[LRC_PPHWSP_PN][elt+3]); offset += 16; } } @@ -477,13 +475,14 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if ((obj = error->ring[i].ctx)) { err_printf(m, "%s --- HW Context = 0x%08x\n", dev_priv->ring[i].name, - obj->gtt_offset); + lower_32_bits(obj->gtt_offset)); print_error_obj(m, obj); } } if ((obj = error->semaphore_obj)) { - err_printf(m, "Semaphore page = 0x%08x\n", obj->gtt_offset); + err_printf(m, "Semaphore page = 0x%08x\n", + lower_32_bits(obj->gtt_offset)); for (elt = 0; elt < PAGE_SIZE/16; elt += 4) { err_printf(m, "[%04x] %08x %08x %08x %08x\n", elt * 4, @@ -591,7 +590,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv, int num_pages; bool use_ggtt; int i = 0; - u32 reloc_offset; + u64 reloc_offset; if (src == NULL || src->pages == NULL) return NULL; diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h index ccdc6c8ac20b..8c8e574e63ba 100644 --- a/drivers/gpu/drm/i915/i915_guc_reg.h +++ b/drivers/gpu/drm/i915/i915_guc_reg.h @@ -38,10 +38,6 @@ #define GS_MIA_SHIFT 16 #define GS_MIA_MASK (0x07 << GS_MIA_SHIFT) -#define GUC_WOPCM_SIZE 0xc050 -#define GUC_WOPCM_SIZE_VALUE (0x80 << 12) /* 512KB */ -#define GUC_WOPCM_OFFSET 0x80000 /* 512KB */ - #define SOFT_SCRATCH(n) (0xc180 + ((n) * 4)) #define UOS_RSA_SCRATCH_0 0xc200 @@ -56,10 +52,18 @@ #define UOS_MOVE (1<<4) #define START_DMA (1<<0) #define DMA_GUC_WOPCM_OFFSET 0xc340 +#define GUC_WOPCM_OFFSET_VALUE 0x80000 /* 512KB */ + +#define GUC_WOPCM_SIZE 0xc050 +#define GUC_WOPCM_SIZE_VALUE (0x80 << 12) /* 512KB */ + +/* GuC addresses below GUC_WOPCM_TOP don't map through the GTT */ +#define GUC_WOPCM_TOP (GUC_WOPCM_SIZE_VALUE) #define GEN8_GT_PM_CONFIG 0x138140 +#define GEN9LP_GT_PM_CONFIG 0x138140 #define GEN9_GT_PM_CONFIG 0x13816c -#define GEN8_GT_DOORBELL_ENABLE (1<<0) +#define GT_DOORBELL_ENABLE (1<<0) #define GEN8_GTCR 0x4274 #define GEN8_GTCR_INVALIDATE (1<<0) @@ -80,7 +84,8 @@ GUC_ENABLE_READ_CACHE_LOGIC | \ GUC_ENABLE_MIA_CACHING | \ GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | \ - GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA) + GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | \ + GUC_ENABLE_MIA_CLOCK_GATING) #define HOST2GUC_INTERRUPT 0xc4c8 #define HOST2GUC_TRIGGER (1<<0) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c new file mode 100644 index 000000000000..792d0b958a2c --- /dev/null +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -0,0 +1,916 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include <linux/circ_buf.h> +#include "i915_drv.h" +#include "intel_guc.h" + +/** + * DOC: GuC Client + * + * i915_guc_client: + * We use the term client to avoid confusion with contexts. A i915_guc_client is + * equivalent to GuC object guc_context_desc. This context descriptor is + * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell + * and workqueue for it. Also the process descriptor (guc_process_desc), which + * is mapped to client space. So the client can write Work Item then ring the + * doorbell. + * + * To simplify the implementation, we allocate one gem object that contains all + * pages for doorbell, process descriptor and workqueue. + * + * The Scratch registers: + * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes + * a value to the action register (SOFT_SCRATCH_0) along with any data. It then + * triggers an interrupt on the GuC via another register write (0xC4C8). + * Firmware writes a success/fail code back to the action register after + * processes the request. The kernel driver polls waiting for this update and + * then proceeds. + * See host2guc_action() + * + * Doorbells: + * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW) + * mapped into process space. + * + * Work Items: + * There are several types of work items that the host may place into a + * workqueue, each with its own requirements and limitations. Currently only + * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which + * represents in-order queue. The kernel driver packs ring tail pointer and an + * ELSP context descriptor dword into Work Item. + * See guc_add_workqueue_item() + * + */ + +/* + * Read GuC command/status register (SOFT_SCRATCH_0) + * Return true if it contains a response rather than a command + */ +static inline bool host2guc_action_response(struct drm_i915_private *dev_priv, + u32 *status) +{ + u32 val = I915_READ(SOFT_SCRATCH(0)); + *status = val; + return GUC2HOST_IS_RESPONSE(val); +} + +static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 status; + int i; + int ret; + + if (WARN_ON(len < 1 || len > 15)) + return -EINVAL; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + spin_lock(&dev_priv->guc.host2guc_lock); + + dev_priv->guc.action_count += 1; + dev_priv->guc.action_cmd = data[0]; + + for (i = 0; i < len; i++) + I915_WRITE(SOFT_SCRATCH(i), data[i]); + + POSTING_READ(SOFT_SCRATCH(i - 1)); + + I915_WRITE(HOST2GUC_INTERRUPT, HOST2GUC_TRIGGER); + + /* No HOST2GUC command should take longer than 10ms */ + ret = wait_for_atomic(host2guc_action_response(dev_priv, &status), 10); + if (status != GUC2HOST_STATUS_SUCCESS) { + /* + * Either the GuC explicitly returned an error (which + * we convert to -EIO here) or no response at all was + * received within the timeout limit (-ETIMEDOUT) + */ + if (ret != -ETIMEDOUT) + ret = -EIO; + + DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " + "status=0x%08X response=0x%08X\n", + data[0], ret, status, + I915_READ(SOFT_SCRATCH(15))); + + dev_priv->guc.action_fail += 1; + dev_priv->guc.action_err = ret; + } + dev_priv->guc.action_status = status; + + spin_unlock(&dev_priv->guc.host2guc_lock); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + return ret; +} + +/* + * Tell the GuC to allocate or deallocate a specific doorbell + */ + +static int host2guc_allocate_doorbell(struct intel_guc *guc, + struct i915_guc_client *client) +{ + u32 data[2]; + + data[0] = HOST2GUC_ACTION_ALLOCATE_DOORBELL; + data[1] = client->ctx_index; + + return host2guc_action(guc, data, 2); +} + +static int host2guc_release_doorbell(struct intel_guc *guc, + struct i915_guc_client *client) +{ + u32 data[2]; + + data[0] = HOST2GUC_ACTION_DEALLOCATE_DOORBELL; + data[1] = client->ctx_index; + + return host2guc_action(guc, data, 2); +} + +static int host2guc_sample_forcewake(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 data[2]; + + data[0] = HOST2GUC_ACTION_SAMPLE_FORCEWAKE; + data[1] = (intel_enable_rc6(dev_priv->dev)) ? 1 : 0; + + return host2guc_action(guc, data, 2); +} + +/* + * Initialise, update, or clear doorbell data shared with the GuC + * + * These functions modify shared data and so need access to the mapped + * client object which contains the page being used for the doorbell + */ + +static void guc_init_doorbell(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct guc_doorbell_info *doorbell; + void *base; + + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); + doorbell = base + client->doorbell_offset; + + doorbell->db_status = 1; + doorbell->cookie = 0; + + kunmap_atomic(base); +} + +static int guc_ring_doorbell(struct i915_guc_client *gc) +{ + struct guc_process_desc *desc; + union guc_doorbell_qw db_cmp, db_exc, db_ret; + union guc_doorbell_qw *db; + void *base; + int attempt = 2, ret = -EAGAIN; + + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); + desc = base + gc->proc_desc_offset; + + /* Update the tail so it is visible to GuC */ + desc->tail = gc->wq_tail; + + /* current cookie */ + db_cmp.db_status = GUC_DOORBELL_ENABLED; + db_cmp.cookie = gc->cookie; + + /* cookie to be updated */ + db_exc.db_status = GUC_DOORBELL_ENABLED; + db_exc.cookie = gc->cookie + 1; + if (db_exc.cookie == 0) + db_exc.cookie = 1; + + /* pointer of current doorbell cacheline */ + db = base + gc->doorbell_offset; + + while (attempt--) { + /* lets ring the doorbell */ + db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db, + db_cmp.value_qw, db_exc.value_qw); + + /* if the exchange was successfully executed */ + if (db_ret.value_qw == db_cmp.value_qw) { + /* db was successfully rung */ + gc->cookie = db_exc.cookie; + ret = 0; + break; + } + + /* XXX: doorbell was lost and need to acquire it again */ + if (db_ret.db_status == GUC_DOORBELL_DISABLED) + break; + + DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", + db_cmp.cookie, db_ret.cookie); + + /* update the cookie to newly read cookie from GuC */ + db_cmp.cookie = db_ret.cookie; + db_exc.cookie = db_ret.cookie + 1; + if (db_exc.cookie == 0) + db_exc.cookie = 1; + } + + kunmap_atomic(base); + return ret; +} + +static void guc_disable_doorbell(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct guc_doorbell_info *doorbell; + void *base; + int drbreg = GEN8_DRBREGL(client->doorbell_id); + int value; + + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); + doorbell = base + client->doorbell_offset; + + doorbell->db_status = 0; + + kunmap_atomic(base); + + I915_WRITE(drbreg, I915_READ(drbreg) & ~GEN8_DRB_VALID); + + value = I915_READ(drbreg); + WARN_ON((value & GEN8_DRB_VALID) != 0); + + I915_WRITE(GEN8_DRBREGU(client->doorbell_id), 0); + I915_WRITE(drbreg, 0); + + /* XXX: wait for any interrupts */ + /* XXX: wait for workqueue to drain */ +} + +/* + * Select, assign and relase doorbell cachelines + * + * These functions track which doorbell cachelines are in use. + * The data they manipulate is protected by the host2guc lock. + */ + +static uint32_t select_doorbell_cacheline(struct intel_guc *guc) +{ + const uint32_t cacheline_size = cache_line_size(); + uint32_t offset; + + spin_lock(&guc->host2guc_lock); + + /* Doorbell uses a single cache line within a page */ + offset = offset_in_page(guc->db_cacheline); + + /* Moving to next cache line to reduce contention */ + guc->db_cacheline += cacheline_size; + + spin_unlock(&guc->host2guc_lock); + + DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n", + offset, guc->db_cacheline, cacheline_size); + + return offset; +} + +static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority) +{ + /* + * The bitmap is split into two halves; the first half is used for + * normal priority contexts, the second half for high-priority ones. + * Note that logically higher priorities are numerically less than + * normal ones, so the test below means "is it high-priority?" + */ + const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH); + const uint16_t half = GUC_MAX_DOORBELLS / 2; + const uint16_t start = hi_pri ? half : 0; + const uint16_t end = start + half; + uint16_t id; + + spin_lock(&guc->host2guc_lock); + id = find_next_zero_bit(guc->doorbell_bitmap, end, start); + if (id == end) + id = GUC_INVALID_DOORBELL_ID; + else + bitmap_set(guc->doorbell_bitmap, id, 1); + spin_unlock(&guc->host2guc_lock); + + DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n", + hi_pri ? "high" : "normal", id); + + return id; +} + +static void release_doorbell(struct intel_guc *guc, uint16_t id) +{ + spin_lock(&guc->host2guc_lock); + bitmap_clear(guc->doorbell_bitmap, id, 1); + spin_unlock(&guc->host2guc_lock); +} + +/* + * Initialise the process descriptor shared with the GuC firmware. + */ +static void guc_init_proc_desc(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct guc_process_desc *desc; + void *base; + + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); + desc = base + client->proc_desc_offset; + + memset(desc, 0, sizeof(*desc)); + + /* + * XXX: pDoorbell and WQVBaseAddress are pointers in process address + * space for ring3 clients (set them as in mmap_ioctl) or kernel + * space for kernel clients (map on demand instead? May make debug + * easier to have it mapped). + */ + desc->wq_base_addr = 0; + desc->db_base_addr = 0; + + desc->context_id = client->ctx_index; + desc->wq_size_bytes = client->wq_size; + desc->wq_status = WQ_STATUS_ACTIVE; + desc->priority = client->priority; + + kunmap_atomic(base); +} + +/* + * Initialise/clear the context descriptor shared with the GuC firmware. + * + * This descriptor tells the GuC where (in GGTT space) to find the important + * data structures relating to this client (doorbell, process descriptor, + * write queue, etc). + */ + +static void guc_init_ctx_desc(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct intel_context *ctx = client->owner; + struct guc_context_desc desc; + struct sg_table *sg; + int i; + + memset(&desc, 0, sizeof(desc)); + + desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL; + desc.context_id = client->ctx_index; + desc.priority = client->priority; + desc.db_id = client->doorbell_id; + + for (i = 0; i < I915_NUM_RINGS; i++) { + struct guc_execlist_context *lrc = &desc.lrc[i]; + struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf; + struct intel_engine_cs *ring; + struct drm_i915_gem_object *obj; + uint64_t ctx_desc; + + /* TODO: We have a design issue to be solved here. Only when we + * receive the first batch, we know which engine is used by the + * user. But here GuC expects the lrc and ring to be pinned. It + * is not an issue for default context, which is the only one + * for now who owns a GuC client. But for future owner of GuC + * client, need to make sure lrc is pinned prior to enter here. + */ + obj = ctx->engine[i].state; + if (!obj) + break; /* XXX: continue? */ + + ring = ringbuf->ring; + ctx_desc = intel_lr_context_descriptor(ctx, ring); + lrc->context_desc = (u32)ctx_desc; + + /* The state page is after PPHWSP */ + lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) + + LRC_STATE_PN * PAGE_SIZE; + lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | + (ring->id << GUC_ELC_ENGINE_OFFSET); + + obj = ringbuf->obj; + + lrc->ring_begin = i915_gem_obj_ggtt_offset(obj); + lrc->ring_end = lrc->ring_begin + obj->base.size - 1; + lrc->ring_next_free_location = lrc->ring_begin; + lrc->ring_current_tail_pointer_value = 0; + + desc.engines_used |= (1 << ring->id); + } + + WARN_ON(desc.engines_used == 0); + + /* + * The CPU address is only needed at certain points, so kmap_atomic on + * demand instead of storing it in the ctx descriptor. + * XXX: May make debug easier to have it mapped + */ + desc.db_trigger_cpu = 0; + desc.db_trigger_uk = client->doorbell_offset + + i915_gem_obj_ggtt_offset(client->client_obj); + desc.db_trigger_phy = client->doorbell_offset + + sg_dma_address(client->client_obj->pages->sgl); + + desc.process_desc = client->proc_desc_offset + + i915_gem_obj_ggtt_offset(client->client_obj); + + desc.wq_addr = client->wq_offset + + i915_gem_obj_ggtt_offset(client->client_obj); + + desc.wq_size = client->wq_size; + + /* + * XXX: Take LRCs from an existing intel_context if this is not an + * IsKMDCreatedContext client + */ + desc.desc_private = (uintptr_t)client; + + /* Pool context is pinned already */ + sg = guc->ctx_pool_obj->pages; + sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), + sizeof(desc) * client->ctx_index); +} + +static void guc_fini_ctx_desc(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct guc_context_desc desc; + struct sg_table *sg; + + memset(&desc, 0, sizeof(desc)); + + sg = guc->ctx_pool_obj->pages; + sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), + sizeof(desc) * client->ctx_index); +} + +/* Get valid workqueue item and return it back to offset */ +static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset) +{ + struct guc_process_desc *desc; + void *base; + u32 size = sizeof(struct guc_wq_item); + int ret = 0, timeout_counter = 200; + + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); + desc = base + gc->proc_desc_offset; + + while (timeout_counter-- > 0) { + ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head, + gc->wq_size) >= size, 1); + + if (!ret) { + *offset = gc->wq_tail; + + /* advance the tail for next workqueue item */ + gc->wq_tail += size; + gc->wq_tail &= gc->wq_size - 1; + + /* this will break the loop */ + timeout_counter = 0; + } + }; + + kunmap_atomic(base); + + return ret; +} + +static int guc_add_workqueue_item(struct i915_guc_client *gc, + struct drm_i915_gem_request *rq) +{ + enum intel_ring_id ring_id = rq->ring->id; + struct guc_wq_item *wqi; + void *base; + u32 tail, wq_len, wq_off = 0; + int ret; + + ret = guc_get_workqueue_space(gc, &wq_off); + if (ret) + return ret; + + /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we + * should not have the case where structure wqi is across page, neither + * wrapped to the beginning. This simplifies the implementation below. + * + * XXX: if not the case, we need save data to a temp wqi and copy it to + * workqueue buffer dw by dw. + */ + WARN_ON(sizeof(struct guc_wq_item) != 16); + WARN_ON(wq_off & 3); + + /* wq starts from the page after doorbell / process_desc */ + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, + (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT)); + wq_off &= PAGE_SIZE - 1; + wqi = (struct guc_wq_item *)((char *)base + wq_off); + + /* len does not include the header */ + wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1; + wqi->header = WQ_TYPE_INORDER | + (wq_len << WQ_LEN_SHIFT) | + (ring_id << WQ_TARGET_SHIFT) | + WQ_NO_WCFLUSH_WAIT; + + /* The GuC wants only the low-order word of the context descriptor */ + wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, rq->ring); + + /* The GuC firmware wants the tail index in QWords, not bytes */ + tail = rq->ringbuf->tail >> 3; + wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; + wqi->fence_id = 0; /*XXX: what fence to be here */ + + kunmap_atomic(base); + + return 0; +} + +#define CTX_RING_BUFFER_START 0x08 + +/* Update the ringbuffer pointer in a saved context image */ +static void lr_context_update(struct drm_i915_gem_request *rq) +{ + enum intel_ring_id ring_id = rq->ring->id; + struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring_id].state; + struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj; + struct page *page; + uint32_t *reg_state; + + BUG_ON(!ctx_obj); + WARN_ON(!i915_gem_obj_is_pinned(ctx_obj)); + WARN_ON(!i915_gem_obj_is_pinned(rb_obj)); + + page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); + reg_state = kmap_atomic(page); + + reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj); + + kunmap_atomic(reg_state); +} + +/** + * i915_guc_submit() - Submit commands through GuC + * @client: the guc client where commands will go through + * @ctx: LRC where commands come from + * @ring: HW engine that will excute the commands + * + * Return: 0 if succeed + */ +int i915_guc_submit(struct i915_guc_client *client, + struct drm_i915_gem_request *rq) +{ + struct intel_guc *guc = client->guc; + enum intel_ring_id ring_id = rq->ring->id; + unsigned long flags; + int q_ret, b_ret; + + /* Need this because of the deferred pin ctx and ring */ + /* Shall we move this right after ring is pinned? */ + lr_context_update(rq); + + spin_lock_irqsave(&client->wq_lock, flags); + + q_ret = guc_add_workqueue_item(client, rq); + if (q_ret == 0) + b_ret = guc_ring_doorbell(client); + + client->submissions[ring_id] += 1; + if (q_ret) { + client->q_fail += 1; + client->retcode = q_ret; + } else if (b_ret) { + client->b_fail += 1; + client->retcode = q_ret = b_ret; + } else { + client->retcode = 0; + } + spin_unlock_irqrestore(&client->wq_lock, flags); + + spin_lock(&guc->host2guc_lock); + guc->submissions[ring_id] += 1; + guc->last_seqno[ring_id] = rq->seqno; + spin_unlock(&guc->host2guc_lock); + + return q_ret; +} + +/* + * Everything below here is concerned with setup & teardown, and is + * therefore not part of the somewhat time-critical batch-submission + * path of i915_guc_submit() above. + */ + +/** + * gem_allocate_guc_obj() - Allocate gem object for GuC usage + * @dev: drm device + * @size: size of object + * + * This is a wrapper to create a gem obj. In order to use it inside GuC, the + * object needs to be pinned lifetime. Also we must pin it to gtt space other + * than [0, GUC_WOPCM_TOP) because this range is reserved inside GuC. + * + * Return: A drm_i915_gem_object if successful, otherwise NULL. + */ +static struct drm_i915_gem_object *gem_allocate_guc_obj(struct drm_device *dev, + u32 size) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj; + + obj = i915_gem_alloc_object(dev, size); + if (!obj) + return NULL; + + if (i915_gem_object_get_pages(obj)) { + drm_gem_object_unreference(&obj->base); + return NULL; + } + + if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) { + drm_gem_object_unreference(&obj->base); + return NULL; + } + + /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + + return obj; +} + +/** + * gem_release_guc_obj() - Release gem object allocated for GuC usage + * @obj: gem obj to be released + */ +static void gem_release_guc_obj(struct drm_i915_gem_object *obj) +{ + if (!obj) + return; + + if (i915_gem_obj_is_pinned(obj)) + i915_gem_object_ggtt_unpin(obj); + + drm_gem_object_unreference(&obj->base); +} + +static void guc_client_free(struct drm_device *dev, + struct i915_guc_client *client) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + + if (!client) + return; + + if (client->doorbell_id != GUC_INVALID_DOORBELL_ID) { + /* + * First disable the doorbell, then tell the GuC we've + * finished with it, finally deallocate it in our bitmap + */ + guc_disable_doorbell(guc, client); + host2guc_release_doorbell(guc, client); + release_doorbell(guc, client->doorbell_id); + } + + /* + * XXX: wait for any outstanding submissions before freeing memory. + * Be sure to drop any locks + */ + + gem_release_guc_obj(client->client_obj); + + if (client->ctx_index != GUC_INVALID_CTX_ID) { + guc_fini_ctx_desc(guc, client); + ida_simple_remove(&guc->ctx_ids, client->ctx_index); + } + + kfree(client); +} + +/** + * guc_client_alloc() - Allocate an i915_guc_client + * @dev: drm device + * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW + * The kernel client to replace ExecList submission is created with + * NORMAL priority. Priority of a client for scheduler can be HIGH, + * while a preemption context can use CRITICAL. + * @ctx the context to own the client (we use the default render context) + * + * Return: An i915_guc_client object if success. + */ +static struct i915_guc_client *guc_client_alloc(struct drm_device *dev, + uint32_t priority, + struct intel_context *ctx) +{ + struct i915_guc_client *client; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + struct drm_i915_gem_object *obj; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return NULL; + + client->doorbell_id = GUC_INVALID_DOORBELL_ID; + client->priority = priority; + client->owner = ctx; + client->guc = guc; + + client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, + GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); + if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) { + client->ctx_index = GUC_INVALID_CTX_ID; + goto err; + } + + /* The first page is doorbell/proc_desc. Two followed pages are wq. */ + obj = gem_allocate_guc_obj(dev, GUC_DB_SIZE + GUC_WQ_SIZE); + if (!obj) + goto err; + + client->client_obj = obj; + client->wq_offset = GUC_DB_SIZE; + client->wq_size = GUC_WQ_SIZE; + spin_lock_init(&client->wq_lock); + + client->doorbell_offset = select_doorbell_cacheline(guc); + + /* + * Since the doorbell only requires a single cacheline, we can save + * space by putting the application process descriptor in the same + * page. Use the half of the page that doesn't include the doorbell. + */ + if (client->doorbell_offset >= (GUC_DB_SIZE / 2)) + client->proc_desc_offset = 0; + else + client->proc_desc_offset = (GUC_DB_SIZE / 2); + + client->doorbell_id = assign_doorbell(guc, client->priority); + if (client->doorbell_id == GUC_INVALID_DOORBELL_ID) + /* XXX: evict a doorbell instead */ + goto err; + + guc_init_proc_desc(guc, client); + guc_init_ctx_desc(guc, client); + guc_init_doorbell(guc, client); + + /* XXX: Any cache flushes needed? General domain mgmt calls? */ + + if (host2guc_allocate_doorbell(guc, client)) + goto err; + + DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u db_id %u\n", + priority, client, client->ctx_index, client->doorbell_id); + + return client; + +err: + DRM_ERROR("FAILED to create priority %u GuC client!\n", priority); + + guc_client_free(dev, client); + return NULL; +} + +static void guc_create_log(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct drm_i915_gem_object *obj; + unsigned long offset; + uint32_t size, flags; + + if (i915.guc_log_level < GUC_LOG_VERBOSITY_MIN) + return; + + if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) + i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; + + /* The first page is to save log buffer state. Allocate one + * extra page for others in case for overlap */ + size = (1 + GUC_LOG_DPC_PAGES + 1 + + GUC_LOG_ISR_PAGES + 1 + + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; + + obj = guc->log_obj; + if (!obj) { + obj = gem_allocate_guc_obj(dev_priv->dev, size); + if (!obj) { + /* logging will be off */ + i915.guc_log_level = -1; + return; + } + + guc->log_obj = obj; + } + + /* each allocated unit is a page */ + flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | + (GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) | + (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | + (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); + + offset = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; /* in pages */ + guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; +} + +/* + * Set up the memory resources to be shared with the GuC. At this point, + * we require just one object that can be mapped through the GGTT. + */ +int i915_guc_submission_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + const size_t ctxsize = sizeof(struct guc_context_desc); + const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize; + const size_t gemsize = round_up(poolsize, PAGE_SIZE); + struct intel_guc *guc = &dev_priv->guc; + + if (!i915.enable_guc_submission) + return 0; /* not enabled */ + + if (guc->ctx_pool_obj) + return 0; /* already allocated */ + + guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv->dev, gemsize); + if (!guc->ctx_pool_obj) + return -ENOMEM; + + spin_lock_init(&dev_priv->guc.host2guc_lock); + + ida_init(&guc->ctx_ids); + + guc_create_log(guc); + + return 0; +} + +int i915_guc_submission_enable(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + struct intel_context *ctx = dev_priv->ring[RCS].default_context; + struct i915_guc_client *client; + + /* client for execbuf submission */ + client = guc_client_alloc(dev, GUC_CTX_PRIORITY_KMD_NORMAL, ctx); + if (!client) { + DRM_ERROR("Failed to create execbuf guc_client\n"); + return -ENOMEM; + } + + guc->execbuf_client = client; + + host2guc_sample_forcewake(guc, client); + + return 0; +} + +void i915_guc_submission_disable(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + + guc_client_free(dev, guc->execbuf_client); + guc->execbuf_client = NULL; +} + +void i915_guc_submission_fini(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + + gem_release_guc_obj(dev_priv->guc.log_obj); + guc->log_obj = NULL; + + if (guc->ctx_pool_obj) + ida_destroy(&guc->ctx_ids); + gem_release_guc_obj(guc->ctx_pool_obj); + guc->ctx_pool_obj = NULL; +} diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d94c92d842fb..8485bea966cc 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -97,6 +97,7 @@ static const u32 hpd_status_i915[HPD_NUM_PINS] = { /* BXT hpd list */ static const u32 hpd_bxt[HPD_NUM_PINS] = { + [HPD_PORT_A] = BXT_DE_PORT_HP_DDIA, [HPD_PORT_B] = BXT_DE_PORT_HP_DDIB, [HPD_PORT_C] = BXT_DE_PORT_HP_DDIC }; @@ -3054,30 +3055,25 @@ static void bxt_hpd_irq_setup(struct drm_device *dev) u32 hotplug_port = 0; u32 hotplug_ctrl; - /* Now, enable HPD */ for_each_intel_encoder(dev, intel_encoder) { if (dev_priv->hotplug.stats[intel_encoder->hpd_pin].state == HPD_ENABLED) hotplug_port |= hpd_bxt[intel_encoder->hpd_pin]; } - /* Mask all HPD control bits */ hotplug_ctrl = I915_READ(BXT_HOTPLUG_CTL) & ~BXT_HOTPLUG_CTL_MASK; - /* Enable requested port in hotplug control */ - /* TODO: implement (short) HPD support on port A */ - WARN_ON_ONCE(hotplug_port & BXT_DE_PORT_HP_DDIA); + if (hotplug_port & BXT_DE_PORT_HP_DDIA) + hotplug_ctrl |= BXT_DDIA_HPD_ENABLE; if (hotplug_port & BXT_DE_PORT_HP_DDIB) hotplug_ctrl |= BXT_DDIB_HPD_ENABLE; if (hotplug_port & BXT_DE_PORT_HP_DDIC) hotplug_ctrl |= BXT_DDIC_HPD_ENABLE; I915_WRITE(BXT_HOTPLUG_CTL, hotplug_ctrl); - /* Unmask DDI hotplug in IMR */ hotplug_ctrl = I915_READ(GEN8_DE_PORT_IMR) & ~hotplug_port; I915_WRITE(GEN8_DE_PORT_IMR, hotplug_ctrl); - /* Enable DDI hotplug in IER */ hotplug_ctrl = I915_READ(GEN8_DE_PORT_IER) | hotplug_port; I915_WRITE(GEN8_DE_PORT_IER, hotplug_ctrl); POSTING_READ(GEN8_DE_PORT_IER); diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 5ae4b0aba564..05053e2e9ff0 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -51,6 +51,7 @@ struct i915_params i915 __read_mostly = { .use_mmio_flip = 0, .mmio_debug = 0, .verbose_state_checks = 1, + .nuclear_pageflip = 0, .edp_vswing = 0, .enable_guc_submission = false, .guc_log_level = -1, @@ -177,6 +178,10 @@ module_param_named(verbose_state_checks, i915.verbose_state_checks, bool, 0600); MODULE_PARM_DESC(verbose_state_checks, "Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions."); +module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0600); +MODULE_PARM_DESC(nuclear_pageflip, + "Force atomic modeset functionality; asynchronous mode is not yet supported. (default: false)."); + /* WA to get away with the default setting in VBT for early platforms.Will be removed */ module_param_named_unsafe(edp_vswing, i915.edp_vswing, int, 0400); MODULE_PARM_DESC(edp_vswing, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 83a0888756d6..e7c9dc8e24fe 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -352,8 +352,8 @@ */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) #define MI_LRI_FORCE_POSTED (1<<12) -#define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*(x)-1) -#define MI_STORE_REGISTER_MEM_GEN8(x) MI_INSTR(0x24, 3*(x)-1) +#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) +#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) #define MI_SRM_LRM_GLOBAL_GTT (1<<22) #define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */ #define MI_FLUSH_DW_STORE_INDEX (1<<21) @@ -364,8 +364,8 @@ #define MI_INVALIDATE_BSD (1<<7) #define MI_FLUSH_DW_USE_GTT (1<<2) #define MI_FLUSH_DW_USE_PPGTT (0<<2) -#define MI_LOAD_REGISTER_MEM(x) MI_INSTR(0x29, 2*(x)-1) -#define MI_LOAD_REGISTER_MEM_GEN8(x) MI_INSTR(0x29, 3*(x)-1) +#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) +#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) #define MI_BATCH_NON_SECURE (1) /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ @@ -1099,6 +1099,12 @@ enum skl_disp_power_wells { #define DPIO_CHV_INT_LOCK_THRESHOLD_SEL_COARSE 1 /* 1: coarse & 0 : fine */ #define CHV_PLL_DW9(ch) _PIPE(ch, _CHV_PLL_DW9_CH0, _CHV_PLL_DW9_CH1) +#define _CHV_CMN_DW0_CH0 0x8100 +#define DPIO_ALLDL_POWERDOWN_SHIFT_CH0 19 +#define DPIO_ANYDL_POWERDOWN_SHIFT_CH0 18 +#define DPIO_ALLDL_POWERDOWN (1 << 1) +#define DPIO_ANYDL_POWERDOWN (1 << 0) + #define _CHV_CMN_DW5_CH0 0x8114 #define CHV_BUFRIGHTENA1_DISABLE (0 << 20) #define CHV_BUFRIGHTENA1_NORMAL (1 << 20) @@ -1135,10 +1141,23 @@ enum skl_disp_power_wells { #define _CHV_CMN_DW19_CH0 0x814c #define _CHV_CMN_DW6_CH1 0x8098 +#define DPIO_ALLDL_POWERDOWN_SHIFT_CH1 30 /* CL2 DW6 only */ +#define DPIO_ANYDL_POWERDOWN_SHIFT_CH1 29 /* CL2 DW6 only */ +#define DPIO_DYNPWRDOWNEN_CH1 (1 << 28) /* CL2 DW6 only */ #define CHV_CMN_USEDCLKCHANNEL (1 << 13) + #define CHV_CMN_DW19(ch) _PIPE(ch, _CHV_CMN_DW19_CH0, _CHV_CMN_DW6_CH1) +#define CHV_CMN_DW28 0x8170 +#define DPIO_CL1POWERDOWNEN (1 << 23) +#define DPIO_DYNPWRDOWNEN_CH0 (1 << 22) +#define DPIO_SUS_CLK_CONFIG_ON (0 << 0) +#define DPIO_SUS_CLK_CONFIG_CLKREQ (1 << 0) +#define DPIO_SUS_CLK_CONFIG_GATE (2 << 0) +#define DPIO_SUS_CLK_CONFIG_GATE_CLKREQ (3 << 0) + #define CHV_CMN_DW30 0x8178 +#define DPIO_CL2_LDOFUSE_PWRENB (1 << 6) #define DPIO_LRC_BYPASS (1 << 3) #define _TXLANE(ch, lane, offset) ((ch ? 0x2400 : 0) + \ @@ -1674,11 +1693,18 @@ enum skl_disp_power_wells { #define GFX_MODE_GEN7 0x0229c #define RING_MODE_GEN7(ring) ((ring)->mmio_base+0x29c) #define GFX_RUN_LIST_ENABLE (1<<15) +#define GFX_INTERRUPT_STEERING (1<<14) #define GFX_TLB_INVALIDATE_EXPLICIT (1<<13) #define GFX_SURFACE_FAULT_ENABLE (1<<12) #define GFX_REPLAY_MODE (1<<11) #define GFX_PSMI_GRANULARITY (1<<10) #define GFX_PPGTT_ENABLE (1<<9) +#define GEN8_GFX_PPGTT_48B (1<<7) + +#define GFX_FORWARD_VBLANK_MASK (3<<5) +#define GFX_FORWARD_VBLANK_NEVER (0<<5) +#define GFX_FORWARD_VBLANK_ALWAYS (1<<5) +#define GFX_FORWARD_VBLANK_COND (2<<5) #define VLV_DISPLAY_BASE 0x180000 #define VLV_MIPI_BASE VLV_DISPLAY_BASE @@ -2185,16 +2211,20 @@ enum skl_disp_power_wells { #define DPIO_PHY_STATUS (VLV_DISPLAY_BASE + 0x6240) #define DPLL_PORTD_READY_MASK (0xf) #define DISPLAY_PHY_CONTROL (VLV_DISPLAY_BASE + 0x60100) +#define PHY_CH_POWER_DOWN_OVRD_EN(phy, ch) (1 << (2*(phy)+(ch)+27)) #define PHY_LDO_DELAY_0NS 0x0 #define PHY_LDO_DELAY_200NS 0x1 #define PHY_LDO_DELAY_600NS 0x2 #define PHY_LDO_SEQ_DELAY(delay, phy) ((delay) << (2*(phy)+23)) +#define PHY_CH_POWER_DOWN_OVRD(mask, phy, ch) ((mask) << (8*(phy)+4*(ch)+11)) #define PHY_CH_SU_PSR 0x1 #define PHY_CH_DEEP_PSR 0x7 #define PHY_CH_POWER_MODE(mode, phy, ch) ((mode) << (6*(phy)+3*(ch)+2)) #define PHY_COM_LANE_RESET_DEASSERT(phy) (1 << (phy)) #define DISPLAY_PHY_STATUS (VLV_DISPLAY_BASE + 0x60104) #define PHY_POWERGOOD(phy) (((phy) == DPIO_PHY0) ? (1<<31) : (1<<30)) +#define PHY_STATUS_CMN_LDO(phy, ch) (1 << (6-(6*(phy)+3*(ch)))) +#define PHY_STATUS_SPLINE_LDO(phy, ch, spline) (1 << (8-(6*(phy)+3*(ch)+(spline)))) /* * The i830 generation, in LVDS mode, defines P1 as the bit number set within @@ -4107,6 +4137,7 @@ enum skl_disp_power_wells { /* How many wires to use. I guess 3 was too hard */ #define DP_PORT_WIDTH(width) (((width) - 1) << 19) #define DP_PORT_WIDTH_MASK (7 << 19) +#define DP_PORT_WIDTH_SHIFT 19 /* Mystic DPCD version 1.1 special mode */ #define DP_ENHANCED_FRAMING (1 << 18) @@ -5693,11 +5724,12 @@ enum skl_disp_power_wells { #define GEN8_GT_IIR(which) (0x44308 + (0x10 * (which))) #define GEN8_GT_IER(which) (0x4430c + (0x10 * (which))) -#define GEN8_BCS_IRQ_SHIFT 16 #define GEN8_RCS_IRQ_SHIFT 0 -#define GEN8_VCS2_IRQ_SHIFT 16 +#define GEN8_BCS_IRQ_SHIFT 16 #define GEN8_VCS1_IRQ_SHIFT 0 +#define GEN8_VCS2_IRQ_SHIFT 16 #define GEN8_VECS_IRQ_SHIFT 0 +#define GEN8_WD_IRQ_SHIFT 16 #define GEN8_DE_PIPE_ISR(pipe) (0x44400 + (0x10 * (pipe))) #define GEN8_DE_PIPE_IMR(pipe) (0x44404 + (0x10 * (pipe))) @@ -6870,7 +6902,9 @@ enum skl_disp_power_wells { #define GEN9_PGCTL_SSB_EU311_ACK (1 << 14) #define GEN7_MISCCPCTL (0x9424) -#define GEN7_DOP_CLOCK_GATE_ENABLE (1<<0) +#define GEN7_DOP_CLOCK_GATE_ENABLE (1<<0) +#define GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1<<2) +#define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1<<4) #define GEN8_GARBCNTL 0xB004 #define GEN9_GAPS_TSV_CREDIT_DISABLE (1<<7) @@ -7159,6 +7193,8 @@ enum skl_disp_power_wells { #define DDI_BUF_IS_IDLE (1<<7) #define DDI_A_4_LANES (1<<4) #define DDI_PORT_WIDTH(width) (((width) - 1) << 1) +#define DDI_PORT_WIDTH_MASK (7 << 1) +#define DDI_PORT_WIDTH_SHIFT 1 #define DDI_INIT_DISPLAY_DETECTED (1<<0) /* DDI Buffer Translations */ diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 2f34c47bd4bf..e6b5c7470ba0 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -186,33 +186,49 @@ DEFINE_EVENT(i915_va, i915_va_alloc, TP_ARGS(vm, start, length, name) ); -DECLARE_EVENT_CLASS(i915_page_table_entry, - TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 pde_shift), - TP_ARGS(vm, pde, start, pde_shift), +DECLARE_EVENT_CLASS(i915_px_entry, + TP_PROTO(struct i915_address_space *vm, u32 px, u64 start, u64 px_shift), + TP_ARGS(vm, px, start, px_shift), TP_STRUCT__entry( __field(struct i915_address_space *, vm) - __field(u32, pde) + __field(u32, px) __field(u64, start) __field(u64, end) ), TP_fast_assign( __entry->vm = vm; - __entry->pde = pde; + __entry->px = px; __entry->start = start; - __entry->end = ((start + (1ULL << pde_shift)) & ~((1ULL << pde_shift)-1)) - 1; + __entry->end = ((start + (1ULL << px_shift)) & ~((1ULL << px_shift)-1)) - 1; ), TP_printk("vm=%p, pde=%d (0x%llx-0x%llx)", - __entry->vm, __entry->pde, __entry->start, __entry->end) + __entry->vm, __entry->px, __entry->start, __entry->end) ); -DEFINE_EVENT(i915_page_table_entry, i915_page_table_entry_alloc, +DEFINE_EVENT(i915_px_entry, i915_page_table_entry_alloc, TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 pde_shift), TP_ARGS(vm, pde, start, pde_shift) ); +DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc, + TP_PROTO(struct i915_address_space *vm, u32 pdpe, u64 start, u64 pdpe_shift), + TP_ARGS(vm, pdpe, start, pdpe_shift), + + TP_printk("vm=%p, pdpe=%d (0x%llx-0x%llx)", + __entry->vm, __entry->px, __entry->start, __entry->end) +); + +DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc, + TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift), + TP_ARGS(vm, pml4e, start, pml4e_shift), + + TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)", + __entry->vm, __entry->px, __entry->start, __entry->end) +); + /* Avoid extra math because we only support two sizes. The format is defined by * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */ #define TRACE_PT_SIZE(bits) \ diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index 97a88b5f6a26..21c97f44d637 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -40,6 +40,19 @@ #define INTEL_VGT_IF_VERSION \ INTEL_VGT_IF_VERSION_ENCODE(VGT_VERSION_MAJOR, VGT_VERSION_MINOR) +/* + * notifications from guest to vgpu device model + */ +enum vgt_g2v_type { + VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE = 2, + VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY, + VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE, + VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY, + VGT_G2V_EXECLIST_CONTEXT_CREATE, + VGT_G2V_EXECLIST_CONTEXT_DESTROY, + VGT_G2V_MAX, +}; + struct vgt_if { uint64_t magic; /* VGT_MAGIC */ uint16_t version_major; @@ -70,11 +83,28 @@ struct vgt_if { uint32_t rsv3[0x200 - 24]; /* pad to half page */ /* * The bottom half page is for response from Gfx driver to hypervisor. - * Set to reserved fields temporarily by now. */ uint32_t rsv4; uint32_t display_ready; /* ready for display owner switch */ - uint32_t rsv5[0x200 - 2]; /* pad to one page */ + + uint32_t rsv5[4]; + + uint32_t g2v_notify; + uint32_t rsv6[7]; + + uint32_t pdp0_lo; + uint32_t pdp0_hi; + uint32_t pdp1_lo; + uint32_t pdp1_hi; + uint32_t pdp2_lo; + uint32_t pdp2_hi; + uint32_t pdp3_lo; + uint32_t pdp3_hi; + + uint32_t execlist_context_descriptor_lo; + uint32_t execlist_context_descriptor_hi; + + uint32_t rsv7[0x200 - 24]; /* pad to one page */ } __packed; #define vgtif_reg(x) \ diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index e2531cf59266..9336e8030980 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -149,9 +149,6 @@ int intel_atomic_setup_scalers(struct drm_device *dev, int i, j; num_scalers_need = hweight32(scaler_state->scaler_users); - DRM_DEBUG_KMS("crtc_state = %p need = %d avail = %d scaler_users = 0x%x\n", - crtc_state, num_scalers_need, intel_crtc->num_scalers, - scaler_state->scaler_users); /* * High level flow: diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 61575f67a626..4823184258a0 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -707,7 +707,6 @@ void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) intel_dp->DP = intel_dig_port->saved_port_bits | DDI_BUF_CTL_ENABLE | DDI_BUF_TRANS_SELECT(0); intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count); - } static struct intel_encoder * @@ -1242,9 +1241,10 @@ hsw_ddi_calculate_wrpll(int clock /* in Hz */, static bool hsw_ddi_pll_select(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state, - struct intel_encoder *intel_encoder, - int clock) + struct intel_encoder *intel_encoder) { + int clock = crtc_state->port_clock; + if (intel_encoder->type == INTEL_OUTPUT_HDMI) { struct intel_shared_dpll *pll; uint32_t val; @@ -1523,11 +1523,11 @@ skip_remaining_dividers: static bool skl_ddi_pll_select(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state, - struct intel_encoder *intel_encoder, - int clock) + struct intel_encoder *intel_encoder) { struct intel_shared_dpll *pll; uint32_t ctrl1, cfgcr1, cfgcr2; + int clock = crtc_state->port_clock; /* * See comment in intel_dpll_hw_state to understand why we always use 0 @@ -1615,14 +1615,14 @@ static const struct bxt_clk_div bxt_dp_clk_val[] = { static bool bxt_ddi_pll_select(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state, - struct intel_encoder *intel_encoder, - int clock) + struct intel_encoder *intel_encoder) { struct intel_shared_dpll *pll; struct bxt_clk_div clk_div = {0}; int vco = 0; uint32_t prop_coef, int_coef, gain_ctl, targ_cnt; uint32_t lanestagger; + int clock = crtc_state->port_clock; if (intel_encoder->type == INTEL_OUTPUT_HDMI) { intel_clock_t best_clock; @@ -1750,17 +1750,16 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, struct drm_device *dev = intel_crtc->base.dev; struct intel_encoder *intel_encoder = intel_ddi_get_crtc_new_encoder(crtc_state); - int clock = crtc_state->port_clock; if (IS_SKYLAKE(dev)) return skl_ddi_pll_select(intel_crtc, crtc_state, - intel_encoder, clock); + intel_encoder); else if (IS_BROXTON(dev)) return bxt_ddi_pll_select(intel_crtc, crtc_state, - intel_encoder, clock); + intel_encoder); else return hsw_ddi_pll_select(intel_crtc, crtc_state, - intel_encoder, clock); + intel_encoder); } void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) @@ -1893,7 +1892,7 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) } else temp |= TRANS_DDI_MODE_SELECT_DP_SST; - temp |= DDI_PORT_WIDTH(intel_dp->lane_count); + temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); } else if (type == INTEL_OUTPUT_DP_MST) { struct intel_dp *intel_dp = &enc_to_mst(encoder)->primary->dp; @@ -1902,7 +1901,7 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) } else temp |= TRANS_DDI_MODE_SELECT_DP_SST; - temp |= DDI_PORT_WIDTH(intel_dp->lane_count); + temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); } else { WARN(1, "Invalid encoder type %d for pipe %c\n", intel_encoder->type, pipe_name(pipe)); @@ -2289,6 +2288,8 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + intel_dp_set_link_params(intel_dp, crtc->config); + intel_ddi_init_dp_buf_reg(intel_encoder); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); @@ -3069,6 +3070,8 @@ void intel_ddi_get_config(struct intel_encoder *encoder, case TRANS_DDI_MODE_SELECT_DP_SST: case TRANS_DDI_MODE_SELECT_DP_MST: pipe_config->has_dp_encoder = true; + pipe_config->lane_count = + ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; intel_dp_get_m_n(intel_crtc, pipe_config); break; default: @@ -3215,7 +3218,15 @@ void intel_ddi_init(struct drm_device *dev, enum port port) goto err; intel_dig_port->hpd_pulse = intel_dp_hpd_pulse; - dev_priv->hotplug.irq_port[port] = intel_dig_port; + /* + * On BXT A0/A1, sw needs to activate DDIA HPD logic and + * interrupts to check the external panel connection. + */ + if (IS_BROXTON(dev_priv) && (INTEL_REVID(dev) < BXT_REVID_B0) + && port == PORT_B) + dev_priv->hotplug.irq_port[PORT_A] = intel_dig_port; + else + dev_priv->hotplug.irq_port[port] = intel_dig_port; } /* In theory we don't need the encoder->type check, but leave it just in diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ca9278be49f7..deba3330de71 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -135,6 +135,39 @@ intel_pch_rawclk(struct drm_device *dev) return I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK; } +/* hrawclock is 1/4 the FSB frequency */ +int intel_hrawclk(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t clkcfg; + + /* There is no CLKCFG reg in Valleyview. VLV hrawclk is 200 MHz */ + if (IS_VALLEYVIEW(dev)) + return 200; + + clkcfg = I915_READ(CLKCFG); + switch (clkcfg & CLKCFG_FSB_MASK) { + case CLKCFG_FSB_400: + return 100; + case CLKCFG_FSB_533: + return 133; + case CLKCFG_FSB_667: + return 166; + case CLKCFG_FSB_800: + return 200; + case CLKCFG_FSB_1067: + return 266; + case CLKCFG_FSB_1333: + return 333; + /* these two are just a guess; one of them might be right */ + case CLKCFG_FSB_1600: + case CLKCFG_FSB_1600_ALT: + return 400; + default: + return 133; + } +} + static inline u32 /* units of 100MHz */ intel_fdi_link_freq(struct drm_device *dev) { @@ -1061,54 +1094,6 @@ static void intel_wait_for_pipe_off(struct intel_crtc *crtc) } } -/* - * ibx_digital_port_connected - is the specified port connected? - * @dev_priv: i915 private structure - * @port: the port to test - * - * Returns true if @port is connected, false otherwise. - */ -bool ibx_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) -{ - u32 bit; - - if (HAS_PCH_IBX(dev_priv->dev)) { - switch (port->port) { - case PORT_B: - bit = SDE_PORTB_HOTPLUG; - break; - case PORT_C: - bit = SDE_PORTC_HOTPLUG; - break; - case PORT_D: - bit = SDE_PORTD_HOTPLUG; - break; - default: - return true; - } - } else { - switch (port->port) { - case PORT_B: - bit = SDE_PORTB_HOTPLUG_CPT; - break; - case PORT_C: - bit = SDE_PORTC_HOTPLUG_CPT; - break; - case PORT_D: - bit = SDE_PORTD_HOTPLUG_CPT; - break; - case PORT_E: - bit = SDE_PORTE_HOTPLUG_SPT; - break; - default: - return true; - } - } - - return I915_READ(SDEISR) & bit; -} - static const char *state_string(bool enabled) { return enabled ? "on" : "off"; @@ -1585,26 +1570,6 @@ static void assert_pch_ports_disabled(struct drm_i915_private *dev_priv, assert_pch_hdmi_disabled(dev_priv, pipe, PCH_HDMID); } -static void intel_init_dpio(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (!IS_VALLEYVIEW(dev)) - return; - - /* - * IOSF_PORT_DPIO is used for VLV x2 PHY (DP/HDMI B and C), - * CHV x1 PHY (DP/HDMI D) - * IOSF_PORT_DPIO_2 is used for CHV x2 PHY (DP/HDMI B and C) - */ - if (IS_CHERRYVIEW(dev)) { - DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO_2; - DPIO_PHY_IOSF_PORT(DPIO_PHY1) = IOSF_PORT_DPIO; - } else { - DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO; - } -} - static void vlv_enable_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config) { @@ -1831,17 +1796,6 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) val &= ~DPIO_DCLKP_EN; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), val); - /* disable left/right clock distribution */ - if (pipe != PIPE_B) { - val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW5_CH0); - val &= ~(CHV_BUFLEFTENA1_MASK | CHV_BUFRIGHTENA1_MASK); - vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW5_CH0, val); - } else { - val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW1_CH1); - val &= ~(CHV_BUFLEFTENA2_MASK | CHV_BUFRIGHTENA2_MASK); - vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val); - } - mutex_unlock(&dev_priv->sb_lock); } @@ -2936,8 +2890,6 @@ static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) I915_WRITE(SKL_PS_CTRL(intel_crtc->pipe, id), 0); I915_WRITE(SKL_PS_WIN_POS(intel_crtc->pipe, id), 0); I915_WRITE(SKL_PS_WIN_SZ(intel_crtc->pipe, id), 0); - DRM_DEBUG_KMS("CRTC:%d Disabled scaler id %u.%u\n", - intel_crtc->base.base.id, intel_crtc->pipe, id); } /* @@ -5277,6 +5229,21 @@ static void modeset_update_crtc_power_domains(struct drm_atomic_state *state) modeset_put_power_domains(dev_priv, put_domains[i]); } +static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) +{ + int max_cdclk_freq = dev_priv->max_cdclk_freq; + + if (INTEL_INFO(dev_priv)->gen >= 9 || + IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + return max_cdclk_freq; + else if (IS_CHERRYVIEW(dev_priv)) + return max_cdclk_freq*95/100; + else if (INTEL_INFO(dev_priv)->gen < 4) + return 2*max_cdclk_freq*90/100; + else + return max_cdclk_freq*90/100; +} + static void intel_update_max_cdclk(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -5316,8 +5283,13 @@ static void intel_update_max_cdclk(struct drm_device *dev) dev_priv->max_cdclk_freq = dev_priv->cdclk_freq; } + dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); + DRM_DEBUG_DRIVER("Max CD clock rate: %d kHz\n", dev_priv->max_cdclk_freq); + + DRM_DEBUG_DRIVER("Max dotclock rate: %d kHz\n", + dev_priv->max_dotclk_freq); } static void intel_update_cdclk(struct drm_device *dev) @@ -6035,13 +6007,6 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) is_dsi = intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI); - if (!is_dsi) { - if (IS_CHERRYVIEW(dev)) - chv_prepare_pll(intel_crtc, intel_crtc->config); - else - vlv_prepare_pll(intel_crtc, intel_crtc->config); - } - if (intel_crtc->config->has_dp_encoder) intel_dp_set_m_n(intel_crtc, M1_N1); @@ -6065,10 +6030,13 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) encoder->pre_pll_enable(encoder); if (!is_dsi) { - if (IS_CHERRYVIEW(dev)) + if (IS_CHERRYVIEW(dev)) { + chv_prepare_pll(intel_crtc, intel_crtc->config); chv_enable_pll(intel_crtc, intel_crtc->config); - else + } else { + vlv_prepare_pll(intel_crtc, intel_crtc->config); vlv_enable_pll(intel_crtc, intel_crtc->config); + } } for_each_encoder_on_crtc(dev, crtc, encoder) @@ -6196,6 +6164,10 @@ static void i9xx_crtc_disable(struct drm_crtc *crtc) i9xx_disable_pll(intel_crtc); } + for_each_encoder_on_crtc(dev, crtc, encoder) + if (encoder->post_pll_disable) + encoder->post_pll_disable(encoder); + if (!IS_GEN2(dev)) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); @@ -7377,8 +7349,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc, 1 << DPIO_CHV_N_DIV_SHIFT); /* M2 fraction division */ - if (bestm2_frac) - vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW2(port), bestm2_frac); + vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW2(port), bestm2_frac); /* M2 fraction division enable */ dpio_val = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port)); @@ -8119,6 +8090,14 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, else i9xx_crtc_clock_get(crtc, pipe_config); + /* + * Normally the dotclock is filled in by the encoder .get_config() + * but in case the pipe is enabled w/o any ports we need a sane + * default. + */ + pipe_config->base.adjusted_mode.crtc_clock = + pipe_config->port_clock / pipe_config->pixel_multiplier; + return true; } @@ -11034,10 +11013,10 @@ static int intel_gen7_queue_flip(struct drm_device *dev, DERRMR_PIPEB_PRI_FLIP_DONE | DERRMR_PIPEC_PRI_FLIP_DONE)); if (IS_GEN8(dev)) - intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8(1) | + intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT); else - intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | + intel_ring_emit(ring, MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); intel_ring_emit(ring, DERRMR); intel_ring_emit(ring, ring->scratch.gtt_offset + 256); @@ -11161,11 +11140,10 @@ static void ilk_do_mmio_flip(struct intel_crtc *intel_crtc) static void intel_do_mmio_flip(struct intel_crtc *intel_crtc) { struct drm_device *dev = intel_crtc->base.dev; - u32 start_vbl_count; intel_mark_page_flip_active(intel_crtc); - intel_pipe_update_start(intel_crtc, &start_vbl_count); + intel_pipe_update_start(intel_crtc); if (INTEL_INFO(dev)->gen >= 9) skl_do_mmio_flip(intel_crtc); @@ -11173,7 +11151,7 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc) /* use_mmio_flip() retricts MMIO flips to ilk+ */ ilk_do_mmio_flip(intel_crtc); - intel_pipe_update_end(intel_crtc, start_vbl_count); + intel_pipe_update_end(intel_crtc); } static void intel_mmio_flip_work_func(struct work_struct *work) @@ -11237,6 +11215,9 @@ static bool __intel_pageflip_stall_check(struct drm_device *dev, if (atomic_read(&work->pending) >= INTEL_FLIP_COMPLETE) return true; + if (atomic_read(&work->pending) < INTEL_FLIP_PENDING) + return false; + if (!work->enable_stall_check) return false; @@ -11627,7 +11608,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, intel_crtc->atomic.update_wm_pre = true; } - if (visible) + if (visible || was_visible) intel_crtc->atomic.fb_bits |= to_intel_plane(plane)->frontbuffer_bit; @@ -11900,14 +11881,16 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, pipe_config->fdi_m_n.gmch_m, pipe_config->fdi_m_n.gmch_n, pipe_config->fdi_m_n.link_m, pipe_config->fdi_m_n.link_n, pipe_config->fdi_m_n.tu); - DRM_DEBUG_KMS("dp: %i, gmch_m: %u, gmch_n: %u, link_m: %u, link_n: %u, tu: %u\n", + DRM_DEBUG_KMS("dp: %i, lanes: %i, gmch_m: %u, gmch_n: %u, link_m: %u, link_n: %u, tu: %u\n", pipe_config->has_dp_encoder, + pipe_config->lane_count, pipe_config->dp_m_n.gmch_m, pipe_config->dp_m_n.gmch_n, pipe_config->dp_m_n.link_m, pipe_config->dp_m_n.link_n, pipe_config->dp_m_n.tu); - DRM_DEBUG_KMS("dp: %i, gmch_m2: %u, gmch_n2: %u, link_m2: %u, link_n2: %u, tu2: %u\n", + DRM_DEBUG_KMS("dp: %i, lanes: %i, gmch_m2: %u, gmch_n2: %u, link_m2: %u, link_n2: %u, tu2: %u\n", pipe_config->has_dp_encoder, + pipe_config->lane_count, pipe_config->dp_m2_n2.gmch_m, pipe_config->dp_m2_n2.gmch_n, pipe_config->dp_m2_n2.link_m, @@ -12414,6 +12397,7 @@ intel_pipe_config_compare(struct drm_device *dev, PIPE_CONF_CHECK_M_N(fdi_m_n); PIPE_CONF_CHECK_I(has_dp_encoder); + PIPE_CONF_CHECK_I(lane_count); if (INTEL_INFO(dev)->gen < 8) { PIPE_CONF_CHECK_M_N(dp_m_n); @@ -13476,7 +13460,7 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc, /* Perform vblank evasion around commit operation */ if (crtc->state->active) - intel_pipe_update_start(intel_crtc, &intel_crtc->start_vbl_count); + intel_pipe_update_start(intel_crtc); if (!needs_modeset(crtc->state) && INTEL_INFO(dev)->gen >= 9) skl_detach_scalers(intel_crtc); @@ -13488,7 +13472,7 @@ static void intel_finish_crtc_commit(struct drm_crtc *crtc, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); if (crtc->state->active) - intel_pipe_update_end(intel_crtc, intel_crtc->start_vbl_count); + intel_pipe_update_end(intel_crtc); } /** @@ -14799,8 +14783,6 @@ void intel_modeset_init(struct drm_device *dev) } } - intel_init_dpio(dev); - intel_shared_dpll_init(dev); /* Just disable it once at startup */ @@ -14882,13 +14864,22 @@ intel_check_plane_mapping(struct intel_crtc *crtc) return true; } +static bool intel_crtc_has_encoders(struct intel_crtc *crtc) +{ + struct drm_device *dev = crtc->base.dev; + struct intel_encoder *encoder; + + for_each_encoder_on_crtc(dev, &crtc->base, encoder) + return true; + + return false; +} + static void intel_sanitize_crtc(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_encoder *encoder; u32 reg; - bool enable; /* Clear any frame start delays used for debugging left by the BIOS */ reg = PIPECONF(crtc->config->cpu_transcoder); @@ -14932,16 +14923,11 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) /* Adjust the state of the output pipe according to whether we * have active connectors/encoders. */ - enable = false; - for_each_encoder_on_crtc(dev, &crtc->base, encoder) { - enable = true; - break; - } - - if (!enable) + if (!intel_crtc_has_encoders(crtc)) intel_crtc_disable_noatomic(&crtc->base); if (crtc->active != crtc->base.state->active) { + struct intel_encoder *encoder; /* This can happen either due to bugs in the get_hw_state * functions or because of calls to intel_crtc_disable_noatomic, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0a2e33fbf20d..f8f4d99440c1 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -130,6 +130,11 @@ static void vlv_init_panel_power_sequencer(struct intel_dp *intel_dp); static void vlv_steal_power_sequencer(struct drm_device *dev, enum pipe pipe); +static unsigned int intel_dp_unused_lane_mask(int lane_count) +{ + return ~((1 << lane_count) - 1) & 0xf; +} + static int intel_dp_max_link_bw(struct intel_dp *intel_dp) { @@ -253,40 +258,6 @@ static void intel_dp_unpack_aux(uint32_t src, uint8_t *dst, int dst_bytes) dst[i] = src >> ((3-i) * 8); } -/* hrawclock is 1/4 the FSB frequency */ -static int -intel_hrawclk(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t clkcfg; - - /* There is no CLKCFG reg in Valleyview. VLV hrawclk is 200 MHz */ - if (IS_VALLEYVIEW(dev)) - return 200; - - clkcfg = I915_READ(CLKCFG); - switch (clkcfg & CLKCFG_FSB_MASK) { - case CLKCFG_FSB_400: - return 100; - case CLKCFG_FSB_533: - return 133; - case CLKCFG_FSB_667: - return 166; - case CLKCFG_FSB_800: - return 200; - case CLKCFG_FSB_1067: - return 266; - case CLKCFG_FSB_1333: - return 333; - /* these two are just a guess; one of them might be right */ - case CLKCFG_FSB_1600: - case CLKCFG_FSB_1600_ALT: - return 400; - default: - return 133; - } -} - static void intel_dp_init_panel_power_sequencer(struct drm_device *dev, struct intel_dp *intel_dp); @@ -333,7 +304,9 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = dev->dev_private; enum pipe pipe = intel_dp->pps_pipe; - bool pll_enabled; + bool pll_enabled, release_cl_override = false; + enum dpio_phy phy = DPIO_PHY(pipe); + enum dpio_channel ch = vlv_pipe_to_channel(pipe); uint32_t DP; if (WARN(I915_READ(intel_dp->output_reg) & DP_PORT_EN, @@ -363,9 +336,13 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) * The DPLL for the pipe must be enabled for this to work. * So enable temporarily it if it's not already enabled. */ - if (!pll_enabled) + if (!pll_enabled) { + release_cl_override = IS_CHERRYVIEW(dev) && + !chv_phy_powergate_ch(dev_priv, phy, ch, true); + vlv_force_pll_on(dev, pipe, IS_CHERRYVIEW(dev) ? &chv_dpll[0].dpll : &vlv_dpll[0].dpll); + } /* * Similar magic as in intel_dp_enable_port(). @@ -382,8 +359,12 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) I915_WRITE(intel_dp->output_reg, DP & ~DP_PORT_EN); POSTING_READ(intel_dp->output_reg); - if (!pll_enabled) + if (!pll_enabled) { vlv_force_pll_off(dev, pipe); + + if (release_cl_override) + chv_phy_powergate_ch(dev_priv, phy, ch, false); + } } static enum pipe @@ -1383,6 +1364,19 @@ int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) return rate_to_index(rate, intel_dp->sink_rates); } +static void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, + uint8_t *link_bw, uint8_t *rate_select) +{ + if (intel_dp->num_sink_rates) { + *link_bw = 0; + *rate_select = + intel_dp_rate_select(intel_dp, port_clock); + } else { + *link_bw = drm_dp_link_rate_to_bw_code(port_clock); + *rate_select = 0; + } +} + bool intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) @@ -1404,6 +1398,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, int link_avail, link_clock; int common_rates[DP_MAX_SUPPORTED_RATES] = {}; int common_len; + uint8_t link_bw, rate_select; common_len = intel_dp_common_rates(intel_dp, common_rates); @@ -1499,32 +1494,23 @@ found: * CEA-861-E - 5.1 Default Encoding Parameters * VESA DisplayPort Ver.1.2a - 5.1.1.1 Video Colorimetry */ - if (bpp != 18 && drm_match_cea_mode(adjusted_mode) > 1) - intel_dp->color_range = DP_COLOR_RANGE_16_235; - else - intel_dp->color_range = 0; - } - - if (intel_dp->color_range) - pipe_config->limited_color_range = true; - - intel_dp->lane_count = lane_count; - - if (intel_dp->num_sink_rates) { - intel_dp->link_bw = 0; - intel_dp->rate_select = - intel_dp_rate_select(intel_dp, common_rates[clock]); + pipe_config->limited_color_range = + bpp != 18 && drm_match_cea_mode(adjusted_mode) > 1; } else { - intel_dp->link_bw = - drm_dp_link_rate_to_bw_code(common_rates[clock]); - intel_dp->rate_select = 0; + pipe_config->limited_color_range = + intel_dp->limited_color_range; } + pipe_config->lane_count = lane_count; + pipe_config->pipe_bpp = bpp; pipe_config->port_clock = common_rates[clock]; - DRM_DEBUG_KMS("DP link bw %02x lane count %d clock %d bpp %d\n", - intel_dp->link_bw, intel_dp->lane_count, + intel_dp_compute_rate(intel_dp, pipe_config->port_clock, + &link_bw, &rate_select); + + DRM_DEBUG_KMS("DP link bw %02x rate select %02x lane count %d clock %d bpp %d\n", + link_bw, rate_select, pipe_config->lane_count, pipe_config->port_clock, bpp); DRM_DEBUG_KMS("DP link bw required %i available %i\n", mode_rate, link_avail); @@ -1586,6 +1572,13 @@ static void ironlake_set_pll_cpu_edp(struct intel_dp *intel_dp) udelay(500); } +void intel_dp_set_link_params(struct intel_dp *intel_dp, + const struct intel_crtc_state *pipe_config) +{ + intel_dp->link_rate = pipe_config->port_clock; + intel_dp->lane_count = pipe_config->lane_count; +} + static void intel_dp_prepare(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; @@ -1595,6 +1588,8 @@ static void intel_dp_prepare(struct intel_encoder *encoder) struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; + intel_dp_set_link_params(intel_dp, crtc->config); + /* * There are four kinds of DP registers: * @@ -1619,7 +1614,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder) /* Handle DP bits in common between all three register formats */ intel_dp->DP |= DP_VOLTAGE_0_4 | DP_PRE_EMPHASIS_0; - intel_dp->DP |= DP_PORT_WIDTH(intel_dp->lane_count); + intel_dp->DP |= DP_PORT_WIDTH(crtc->config->lane_count); if (crtc->config->has_audio) intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE; @@ -1649,8 +1644,9 @@ static void intel_dp_prepare(struct intel_encoder *encoder) trans_dp &= ~TRANS_DP_ENH_FRAMING; I915_WRITE(TRANS_DP_CTL(crtc->pipe), trans_dp); } else { - if (!HAS_PCH_SPLIT(dev) && !IS_VALLEYVIEW(dev)) - intel_dp->DP |= intel_dp->color_range; + if (!HAS_PCH_SPLIT(dev) && !IS_VALLEYVIEW(dev) && + crtc->config->limited_color_range) + intel_dp->DP |= DP_COLOR_RANGE_16_235; if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC) intel_dp->DP |= DP_SYNC_HS_HIGH; @@ -2290,13 +2286,14 @@ static void intel_dp_get_config(struct intel_encoder *encoder, pipe_config->has_audio = tmp & DP_AUDIO_OUTPUT_ENABLE && port != PORT_A; if (HAS_PCH_CPT(dev) && port != PORT_A) { - tmp = I915_READ(TRANS_DP_CTL(crtc->pipe)); - if (tmp & TRANS_DP_HSYNC_ACTIVE_HIGH) + u32 trans_dp = I915_READ(TRANS_DP_CTL(crtc->pipe)); + + if (trans_dp & TRANS_DP_HSYNC_ACTIVE_HIGH) flags |= DRM_MODE_FLAG_PHSYNC; else flags |= DRM_MODE_FLAG_NHSYNC; - if (tmp & TRANS_DP_VSYNC_ACTIVE_HIGH) + if (trans_dp & TRANS_DP_VSYNC_ACTIVE_HIGH) flags |= DRM_MODE_FLAG_PVSYNC; else flags |= DRM_MODE_FLAG_NVSYNC; @@ -2320,6 +2317,9 @@ static void intel_dp_get_config(struct intel_encoder *encoder, pipe_config->has_dp_encoder = true; + pipe_config->lane_count = + ((tmp & DP_PORT_WIDTH_MASK) >> DP_PORT_WIDTH_SHIFT) + 1; + intel_dp_get_m_n(crtc, pipe_config); if (port == PORT_A) { @@ -2399,38 +2399,62 @@ static void vlv_post_disable_dp(struct intel_encoder *encoder) intel_dp_link_down(intel_dp); } -static void chv_post_disable_dp(struct intel_encoder *encoder) +static void chv_data_lane_soft_reset(struct intel_encoder *encoder, + bool reset) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct intel_digital_port *dport = dp_to_dig_port(intel_dp); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_crtc *intel_crtc = - to_intel_crtc(encoder->base.crtc); - enum dpio_channel ch = vlv_dport_to_channel(dport); - enum pipe pipe = intel_crtc->pipe; - u32 val; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base)); + struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + enum pipe pipe = crtc->pipe; + uint32_t val; - intel_dp_link_down(intel_dp); + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch)); + if (reset) + val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); + else + val |= DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET; + vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW0(ch), val); - mutex_lock(&dev_priv->sb_lock); + if (crtc->config->lane_count > 2) { + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW0(ch)); + if (reset) + val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); + else + val |= DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET; + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val); + } - /* Propagate soft reset to data lane reset */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch)); val |= CHV_PCS_REQ_SOFTRESET_EN; + if (reset) + val &= ~DPIO_PCS_CLK_SOFT_RESET; + else + val |= DPIO_PCS_CLK_SOFT_RESET; vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW1(ch), val); - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW1(ch)); - val |= CHV_PCS_REQ_SOFTRESET_EN; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW1(ch), val); + if (crtc->config->lane_count > 2) { + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW1(ch)); + val |= CHV_PCS_REQ_SOFTRESET_EN; + if (reset) + val &= ~DPIO_PCS_CLK_SOFT_RESET; + else + val |= DPIO_PCS_CLK_SOFT_RESET; + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW1(ch), val); + } +} - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch)); - val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW0(ch), val); +static void chv_post_disable_dp(struct intel_encoder *encoder) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct drm_device *dev = encoder->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW0(ch)); - val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val); + intel_dp_link_down(intel_dp); + + mutex_lock(&dev_priv->sb_lock); + + /* Assert data lane reset */ + chv_data_lane_soft_reset(encoder, true); mutex_unlock(&dev_priv->sb_lock); } @@ -2550,7 +2574,6 @@ static void intel_enable_dp(struct intel_encoder *encoder) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); uint32_t dp_reg = I915_READ(intel_dp->output_reg); - unsigned int lane_mask = 0x0; if (WARN_ON(dp_reg & DP_PORT_EN)) return; @@ -2568,9 +2591,15 @@ static void intel_enable_dp(struct intel_encoder *encoder) pps_unlock(intel_dp); - if (IS_VALLEYVIEW(dev)) + if (IS_VALLEYVIEW(dev)) { + unsigned int lane_mask = 0x0; + + if (IS_CHERRYVIEW(dev)) + lane_mask = intel_dp_unused_lane_mask(crtc->config->lane_count); + vlv_wait_port_ready(dev_priv, dp_to_dig_port(intel_dp), lane_mask); + } intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); @@ -2797,31 +2826,19 @@ static void chv_pre_enable_dp(struct intel_encoder *encoder) val &= ~DPIO_LANEDESKEW_STRAP_OVRD; vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW11(ch), val); - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW11(ch)); - val &= ~DPIO_LANEDESKEW_STRAP_OVRD; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val); - - /* Deassert soft data lane reset*/ - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch)); - val |= CHV_PCS_REQ_SOFTRESET_EN; - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW1(ch), val); - - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW1(ch)); - val |= CHV_PCS_REQ_SOFTRESET_EN; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW1(ch), val); - - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch)); - val |= (DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW0(ch), val); - - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW0(ch)); - val |= (DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val); + if (intel_crtc->config->lane_count > 2) { + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW11(ch)); + val &= ~DPIO_LANEDESKEW_STRAP_OVRD; + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val); + } /* Program Tx lane latency optimal setting*/ - for (i = 0; i < 4; i++) { + for (i = 0; i < intel_crtc->config->lane_count; i++) { /* Set the upar bit */ - data = (i == 1) ? 0x0 : 0x1; + if (intel_crtc->config->lane_count == 1) + data = 0x0; + else + data = (i == 1) ? 0x0 : 0x1; vlv_dpio_write(dev_priv, pipe, CHV_TX_DW14(ch, i), data << DPIO_UPAR_SHIFT); } @@ -2842,9 +2859,11 @@ static void chv_pre_enable_dp(struct intel_encoder *encoder) val |= DPIO_TX2_STAGGER_MASK(0x1f); vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW11(ch), val); - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW11(ch)); - val |= DPIO_TX2_STAGGER_MASK(0x1f); - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val); + if (intel_crtc->config->lane_count > 2) { + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW11(ch)); + val |= DPIO_TX2_STAGGER_MASK(0x1f); + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val); + } vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW12(ch), DPIO_LANESTAGGER_STRAP(stagger) | @@ -2853,16 +2872,27 @@ static void chv_pre_enable_dp(struct intel_encoder *encoder) DPIO_TX1_STAGGER_MULT(6) | DPIO_TX2_STAGGER_MULT(0)); - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW12(ch), - DPIO_LANESTAGGER_STRAP(stagger) | - DPIO_LANESTAGGER_STRAP_OVRD | - DPIO_TX1_STAGGER_MASK(0x1f) | - DPIO_TX1_STAGGER_MULT(7) | - DPIO_TX2_STAGGER_MULT(5)); + if (intel_crtc->config->lane_count > 2) { + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW12(ch), + DPIO_LANESTAGGER_STRAP(stagger) | + DPIO_LANESTAGGER_STRAP_OVRD | + DPIO_TX1_STAGGER_MASK(0x1f) | + DPIO_TX1_STAGGER_MULT(7) | + DPIO_TX2_STAGGER_MULT(5)); + } + + /* Deassert data lane reset */ + chv_data_lane_soft_reset(encoder, false); mutex_unlock(&dev_priv->sb_lock); intel_enable_dp(encoder); + + /* Second common lane will stay alive on its own now */ + if (dport->release_cl2_override) { + chv_phy_powergate_ch(dev_priv, DPIO_PHY0, DPIO_CH1, false); + dport->release_cl2_override = false; + } } static void chv_dp_pre_pll_enable(struct intel_encoder *encoder) @@ -2874,12 +2904,27 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder) to_intel_crtc(encoder->base.crtc); enum dpio_channel ch = vlv_dport_to_channel(dport); enum pipe pipe = intel_crtc->pipe; + unsigned int lane_mask = + intel_dp_unused_lane_mask(intel_crtc->config->lane_count); u32 val; intel_dp_prepare(encoder); + /* + * Must trick the second common lane into life. + * Otherwise we can't even access the PLL. + */ + if (ch == DPIO_CH0 && pipe == PIPE_B) + dport->release_cl2_override = + !chv_phy_powergate_ch(dev_priv, DPIO_PHY0, DPIO_CH1, true); + + chv_phy_powergate_lanes(encoder, true, lane_mask); + mutex_lock(&dev_priv->sb_lock); + /* Assert data lane reset */ + chv_data_lane_soft_reset(encoder, true); + /* program left/right clock distribution */ if (pipe != PIPE_B) { val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW5_CH0); @@ -2908,13 +2953,15 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder) val |= CHV_PCS_USEDCLKCHANNEL; vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW8(ch), val); - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW8(ch)); - val |= CHV_PCS_USEDCLKCHANNEL_OVRRIDE; - if (pipe != PIPE_B) - val &= ~CHV_PCS_USEDCLKCHANNEL; - else - val |= CHV_PCS_USEDCLKCHANNEL; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW8(ch), val); + if (intel_crtc->config->lane_count > 2) { + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW8(ch)); + val |= CHV_PCS_USEDCLKCHANNEL_OVRRIDE; + if (pipe != PIPE_B) + val &= ~CHV_PCS_USEDCLKCHANNEL; + else + val |= CHV_PCS_USEDCLKCHANNEL; + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW8(ch), val); + } /* * This a a bit weird since generally CL @@ -2931,6 +2978,39 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder) mutex_unlock(&dev_priv->sb_lock); } +static void chv_dp_post_pll_disable(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum pipe pipe = to_intel_crtc(encoder->base.crtc)->pipe; + u32 val; + + mutex_lock(&dev_priv->sb_lock); + + /* disable left/right clock distribution */ + if (pipe != PIPE_B) { + val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW5_CH0); + val &= ~(CHV_BUFLEFTENA1_MASK | CHV_BUFRIGHTENA1_MASK); + vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW5_CH0, val); + } else { + val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW1_CH1); + val &= ~(CHV_BUFLEFTENA2_MASK | CHV_BUFRIGHTENA2_MASK); + vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val); + } + + mutex_unlock(&dev_priv->sb_lock); + + /* + * Leave the power down bit cleared for at least one + * lane so that chv_powergate_phy_ch() will power + * on something when the channel is otherwise unused. + * When the port is off and the override is removed + * the lanes power down anyway, so otherwise it doesn't + * really matter what the state of power down bits is + * after this. + */ + chv_phy_powergate_lanes(encoder, false, 0x0); +} + /* * Native read with retry for link status and receiver capability reads for * cases where the sink may still be asleep. @@ -3167,6 +3247,12 @@ static uint32_t vlv_signal_levels(struct intel_dp *intel_dp) return 0; } +static bool chv_need_uniq_trans_scale(uint8_t train_set) +{ + return (train_set & DP_TRAIN_PRE_EMPHASIS_MASK) == DP_TRAIN_PRE_EMPH_LEVEL_0 && + (train_set & DP_TRAIN_VOLTAGE_SWING_MASK) == DP_TRAIN_VOLTAGE_SWING_LEVEL_3; +} + static uint32_t chv_signal_levels(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); @@ -3258,24 +3344,28 @@ static uint32_t chv_signal_levels(struct intel_dp *intel_dp) val |= DPIO_PCS_TX1DEEMP_9P5 | DPIO_PCS_TX2DEEMP_9P5; vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val); - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch)); - val &= ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3); - val &= ~(DPIO_PCS_TX1DEEMP_MASK | DPIO_PCS_TX2DEEMP_MASK); - val |= DPIO_PCS_TX1DEEMP_9P5 | DPIO_PCS_TX2DEEMP_9P5; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val); + if (intel_crtc->config->lane_count > 2) { + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch)); + val &= ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3); + val &= ~(DPIO_PCS_TX1DEEMP_MASK | DPIO_PCS_TX2DEEMP_MASK); + val |= DPIO_PCS_TX1DEEMP_9P5 | DPIO_PCS_TX2DEEMP_9P5; + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val); + } val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW9(ch)); val &= ~(DPIO_PCS_TX1MARGIN_MASK | DPIO_PCS_TX2MARGIN_MASK); val |= DPIO_PCS_TX1MARGIN_000 | DPIO_PCS_TX2MARGIN_000; vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW9(ch), val); - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW9(ch)); - val &= ~(DPIO_PCS_TX1MARGIN_MASK | DPIO_PCS_TX2MARGIN_MASK); - val |= DPIO_PCS_TX1MARGIN_000 | DPIO_PCS_TX2MARGIN_000; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW9(ch), val); + if (intel_crtc->config->lane_count > 2) { + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW9(ch)); + val &= ~(DPIO_PCS_TX1MARGIN_MASK | DPIO_PCS_TX2MARGIN_MASK); + val |= DPIO_PCS_TX1MARGIN_000 | DPIO_PCS_TX2MARGIN_000; + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW9(ch), val); + } /* Program swing deemph */ - for (i = 0; i < 4; i++) { + for (i = 0; i < intel_crtc->config->lane_count; i++) { val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW4(ch, i)); val &= ~DPIO_SWING_DEEMPH9P5_MASK; val |= deemph_reg_value << DPIO_SWING_DEEMPH9P5_SHIFT; @@ -3283,43 +3373,36 @@ static uint32_t chv_signal_levels(struct intel_dp *intel_dp) } /* Program swing margin */ - for (i = 0; i < 4; i++) { + for (i = 0; i < intel_crtc->config->lane_count; i++) { val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i)); + val &= ~DPIO_SWING_MARGIN000_MASK; val |= margin_reg_value << DPIO_SWING_MARGIN000_SHIFT; + + /* + * Supposedly this value shouldn't matter when unique transition + * scale is disabled, but in fact it does matter. Let's just + * always program the same value and hope it's OK. + */ + val &= ~(0xff << DPIO_UNIQ_TRANS_SCALE_SHIFT); + val |= 0x9a << DPIO_UNIQ_TRANS_SCALE_SHIFT; + vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val); } - /* Disable unique transition scale */ - for (i = 0; i < 4; i++) { + /* + * The document said it needs to set bit 27 for ch0 and bit 26 + * for ch1. Might be a typo in the doc. + * For now, for this unique transition scale selection, set bit + * 27 for ch0 and ch1. + */ + for (i = 0; i < intel_crtc->config->lane_count; i++) { val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW3(ch, i)); - val &= ~DPIO_TX_UNIQ_TRANS_SCALE_EN; - vlv_dpio_write(dev_priv, pipe, CHV_TX_DW3(ch, i), val); - } - - if (((train_set & DP_TRAIN_PRE_EMPHASIS_MASK) - == DP_TRAIN_PRE_EMPH_LEVEL_0) && - ((train_set & DP_TRAIN_VOLTAGE_SWING_MASK) - == DP_TRAIN_VOLTAGE_SWING_LEVEL_3)) { - - /* - * The document said it needs to set bit 27 for ch0 and bit 26 - * for ch1. Might be a typo in the doc. - * For now, for this unique transition scale selection, set bit - * 27 for ch0 and ch1. - */ - for (i = 0; i < 4; i++) { - val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW3(ch, i)); + if (chv_need_uniq_trans_scale(train_set)) val |= DPIO_TX_UNIQ_TRANS_SCALE_EN; - vlv_dpio_write(dev_priv, pipe, CHV_TX_DW3(ch, i), val); - } - - for (i = 0; i < 4; i++) { - val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i)); - val &= ~(0xff << DPIO_UNIQ_TRANS_SCALE_SHIFT); - val |= (0x9a << DPIO_UNIQ_TRANS_SCALE_SHIFT); - vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val); - } + else + val &= ~DPIO_TX_UNIQ_TRANS_SCALE_EN; + vlv_dpio_write(dev_priv, pipe, CHV_TX_DW3(ch, i), val); } /* Start swing calculation */ @@ -3327,9 +3410,11 @@ static uint32_t chv_signal_levels(struct intel_dp *intel_dp) val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3; vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val); - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch)); - val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val); + if (intel_crtc->config->lane_count > 2) { + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch)); + val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3; + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val); + } /* LRC Bypass */ val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW30); @@ -3520,8 +3605,8 @@ intel_dp_set_link_train(struct intel_dp *intel_dp, uint8_t dp_train_pat) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = + to_i915(intel_dig_port->base.base.dev); uint8_t buf[sizeof(intel_dp->train_set) + 1]; int ret, len; @@ -3562,8 +3647,8 @@ intel_dp_update_link_train(struct intel_dp *intel_dp, uint32_t *DP, const uint8_t link_status[DP_LINK_STATUS_SIZE]) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = + to_i915(intel_dig_port->base.base.dev); int ret; intel_get_adjust_train(intel_dp, link_status); @@ -3620,19 +3705,23 @@ intel_dp_start_link_train(struct intel_dp *intel_dp) int voltage_tries, loop_tries; uint32_t DP = intel_dp->DP; uint8_t link_config[2]; + uint8_t link_bw, rate_select; if (HAS_DDI(dev)) intel_ddi_prepare_link_retrain(encoder); + intel_dp_compute_rate(intel_dp, intel_dp->link_rate, + &link_bw, &rate_select); + /* Write the link configuration data */ - link_config[0] = intel_dp->link_bw; + link_config[0] = link_bw; link_config[1] = intel_dp->lane_count; if (drm_dp_enhanced_frame_cap(intel_dp->dpcd)) link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2); if (intel_dp->num_sink_rates) drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET, - &intel_dp->rate_select, 1); + &rate_select, 1); link_config[0] = 0; link_config[1] = DP_SET_ANSI_8B10B; @@ -3728,8 +3817,8 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp) uint32_t DP = intel_dp->DP; uint32_t training_pattern = DP_TRAINING_PATTERN_2; - /* Training Pattern 3 for HBR2 ot 1.2 devices that support it*/ - if (intel_dp->link_bw == DP_LINK_BW_5_4 || intel_dp->use_tps3) + /* Training Pattern 3 for HBR2 or 1.2 devices that support it*/ + if (intel_dp->link_rate == 540000 || intel_dp->use_tps3) training_pattern = DP_TRAINING_PATTERN_3; /* channel equalization */ @@ -3758,7 +3847,8 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp) } /* Make sure clock is still ok */ - if (!drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) { + if (!drm_dp_clock_recovery_ok(link_status, + intel_dp->lane_count)) { intel_dp->train_set_valid = false; intel_dp_start_link_train(intel_dp); intel_dp_set_link_train(intel_dp, &DP, @@ -3768,7 +3858,8 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp) continue; } - if (drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { + if (drm_dp_channel_eq_ok(link_status, + intel_dp->lane_count)) { channel_eq = true; break; } @@ -3915,8 +4006,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) * SKL < B0: due it's WaDisableHBR2 is the only exception where TP3 is * supported but still not enabled. */ - if (intel_dp->dpcd[DP_DPCD_REV] >= 0x12 && - intel_dp->dpcd[DP_MAX_LANE_COUNT] & DP_TPS3_SUPPORTED && + if (drm_dp_tps3_supported(intel_dp->dpcd) && intel_dp_source_supports_hbr2(dev)) { intel_dp->use_tps3 = true; DRM_DEBUG_KMS("Displayport TPS3 supported\n"); @@ -4007,22 +4097,30 @@ intel_dp_probe_mst(struct intel_dp *intel_dp) return intel_dp->is_mst; } -static void intel_dp_sink_crc_stop(struct intel_dp *intel_dp) +static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc); u8 buf; + int ret = 0; if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) { DRM_DEBUG_KMS("Sink CRC couldn't be stopped properly\n"); - return; + ret = -EIO; + goto out; } if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK, - buf & ~DP_TEST_SINK_START) < 0) + buf & ~DP_TEST_SINK_START) < 0) { DRM_DEBUG_KMS("Sink CRC couldn't be stopped properly\n"); + ret = -EIO; + goto out; + } + intel_dp->sink_crc.started = false; + out: hsw_enable_ips(intel_crtc); + return ret; } static int intel_dp_sink_crc_start(struct intel_dp *intel_dp) @@ -4030,6 +4128,13 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp) struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc); u8 buf; + int ret; + + if (intel_dp->sink_crc.started) { + ret = intel_dp_sink_crc_stop(intel_dp); + if (ret) + return ret; + } if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) return -EIO; @@ -4037,6 +4142,8 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp) if (!(buf & DP_TEST_CRC_SUPPORTED)) return -ENOTTY; + intel_dp->sink_crc.last_count = buf & DP_TEST_COUNT_MASK; + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) return -EIO; @@ -4048,6 +4155,7 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp) return -EIO; } + intel_dp->sink_crc.started = true; return 0; } @@ -4057,38 +4165,55 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc) struct drm_device *dev = dig_port->base.base.dev; struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc); u8 buf; - int test_crc_count; + int count, ret; int attempts = 6; - int ret; + bool old_equal_new; ret = intel_dp_sink_crc_start(intel_dp); if (ret) return ret; - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) { - ret = -EIO; - goto stop; - } - - test_crc_count = buf & DP_TEST_COUNT_MASK; - do { + intel_wait_for_vblank(dev, intel_crtc->pipe); + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) { ret = -EIO; goto stop; } - intel_wait_for_vblank(dev, intel_crtc->pipe); - } while (--attempts && (buf & DP_TEST_COUNT_MASK) == test_crc_count); + count = buf & DP_TEST_COUNT_MASK; + + /* + * Count might be reset during the loop. In this case + * last known count needs to be reset as well. + */ + if (count == 0) + intel_dp->sink_crc.last_count = 0; + + if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) { + ret = -EIO; + goto stop; + } + + old_equal_new = (count == intel_dp->sink_crc.last_count && + !memcmp(intel_dp->sink_crc.last_crc, crc, + 6 * sizeof(u8))); + + } while (--attempts && (count == 0 || old_equal_new)); + + intel_dp->sink_crc.last_count = buf & DP_TEST_COUNT_MASK; + memcpy(intel_dp->sink_crc.last_crc, crc, 6 * sizeof(u8)); if (attempts == 0) { - DRM_DEBUG_KMS("Panel is unable to calculate CRC after 6 vblanks\n"); - ret = -ETIMEDOUT; - goto stop; + if (old_equal_new) { + DRM_DEBUG_KMS("Unreliable Sink CRC counter: Current returned CRC is identical to the previous one\n"); + } else { + DRM_ERROR("Panel is unable to calculate any CRC after 6 vblanks\n"); + ret = -ETIMEDOUT; + goto stop; + } } - if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) - ret = -EIO; stop: intel_dp_sink_crc_stop(intel_dp); return ret; @@ -4248,7 +4373,8 @@ go_again: if (bret == true) { /* check link status - esi[10] = 0x200c */ - if (intel_dp->active_mst_links && !drm_dp_channel_eq_ok(&esi[10], intel_dp->lane_count)) { + if (intel_dp->active_mst_links && + !drm_dp_channel_eq_ok(&esi[10], intel_dp->lane_count)) { DRM_DEBUG_KMS("channel EQ not ok, retraining\n"); intel_dp_start_link_train(intel_dp); intel_dp_complete_link_train(intel_dp); @@ -4410,6 +4536,147 @@ edp_detect(struct intel_dp *intel_dp) return status; } +static bool ibx_digital_port_connected(struct drm_i915_private *dev_priv, + struct intel_digital_port *port) +{ + u32 bit; + + switch (port->port) { + case PORT_A: + return true; + case PORT_B: + bit = SDE_PORTB_HOTPLUG; + break; + case PORT_C: + bit = SDE_PORTC_HOTPLUG; + break; + case PORT_D: + bit = SDE_PORTD_HOTPLUG; + break; + default: + MISSING_CASE(port->port); + return false; + } + + return I915_READ(SDEISR) & bit; +} + +static bool cpt_digital_port_connected(struct drm_i915_private *dev_priv, + struct intel_digital_port *port) +{ + u32 bit; + + switch (port->port) { + case PORT_A: + return true; + case PORT_B: + bit = SDE_PORTB_HOTPLUG_CPT; + break; + case PORT_C: + bit = SDE_PORTC_HOTPLUG_CPT; + break; + case PORT_D: + bit = SDE_PORTD_HOTPLUG_CPT; + break; + default: + MISSING_CASE(port->port); + return false; + } + + return I915_READ(SDEISR) & bit; +} + +static bool g4x_digital_port_connected(struct drm_i915_private *dev_priv, + struct intel_digital_port *port) +{ + u32 bit; + + switch (port->port) { + case PORT_B: + bit = PORTB_HOTPLUG_LIVE_STATUS_G4X; + break; + case PORT_C: + bit = PORTC_HOTPLUG_LIVE_STATUS_G4X; + break; + case PORT_D: + bit = PORTD_HOTPLUG_LIVE_STATUS_G4X; + break; + default: + MISSING_CASE(port->port); + return false; + } + + return I915_READ(PORT_HOTPLUG_STAT) & bit; +} + +static bool vlv_digital_port_connected(struct drm_i915_private *dev_priv, + struct intel_digital_port *port) +{ + u32 bit; + + switch (port->port) { + case PORT_B: + bit = PORTB_HOTPLUG_LIVE_STATUS_VLV; + break; + case PORT_C: + bit = PORTC_HOTPLUG_LIVE_STATUS_VLV; + break; + case PORT_D: + bit = PORTD_HOTPLUG_LIVE_STATUS_VLV; + break; + default: + MISSING_CASE(port->port); + return false; + } + + return I915_READ(PORT_HOTPLUG_STAT) & bit; +} + +static bool bxt_digital_port_connected(struct drm_i915_private *dev_priv, + struct intel_digital_port *port) +{ + u32 bit; + + switch (port->port) { + case PORT_A: + bit = BXT_DE_PORT_HP_DDIA; + break; + case PORT_B: + bit = BXT_DE_PORT_HP_DDIB; + break; + case PORT_C: + bit = BXT_DE_PORT_HP_DDIC; + break; + default: + MISSING_CASE(port->port); + return false; + } + + return I915_READ(GEN8_DE_PORT_ISR) & bit; +} + +/* + * intel_digital_port_connected - is the specified port connected? + * @dev_priv: i915 private structure + * @port: the port to test + * + * Return %true if @port is connected, %false otherwise. + */ +static bool intel_digital_port_connected(struct drm_i915_private *dev_priv, + struct intel_digital_port *port) +{ + if (HAS_PCH_IBX(dev_priv)) + return ibx_digital_port_connected(dev_priv, port); + if (HAS_PCH_SPLIT(dev_priv)) + return cpt_digital_port_connected(dev_priv, port); + else if (IS_BROXTON(dev_priv)) + return bxt_digital_port_connected(dev_priv, port); + else if (IS_VALLEYVIEW(dev_priv)) + return vlv_digital_port_connected(dev_priv, port); + else + return g4x_digital_port_connected(dev_priv, port); +} + static enum drm_connector_status ironlake_dp_detect(struct intel_dp *intel_dp) { @@ -4417,59 +4684,17 @@ ironlake_dp_detect(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - if (!ibx_digital_port_connected(dev_priv, intel_dig_port)) + if (!intel_digital_port_connected(dev_priv, intel_dig_port)) return connector_status_disconnected; return intel_dp_detect_dpcd(intel_dp); } -static int g4x_digital_port_connected(struct drm_device *dev, - struct intel_digital_port *intel_dig_port) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t bit; - - if (IS_VALLEYVIEW(dev)) { - switch (intel_dig_port->port) { - case PORT_B: - bit = PORTB_HOTPLUG_LIVE_STATUS_VLV; - break; - case PORT_C: - bit = PORTC_HOTPLUG_LIVE_STATUS_VLV; - break; - case PORT_D: - bit = PORTD_HOTPLUG_LIVE_STATUS_VLV; - break; - default: - return -EINVAL; - } - } else { - switch (intel_dig_port->port) { - case PORT_B: - bit = PORTB_HOTPLUG_LIVE_STATUS_G4X; - break; - case PORT_C: - bit = PORTC_HOTPLUG_LIVE_STATUS_G4X; - break; - case PORT_D: - bit = PORTD_HOTPLUG_LIVE_STATUS_G4X; - break; - default: - return -EINVAL; - } - } - - if ((I915_READ(PORT_HOTPLUG_STAT) & bit) == 0) - return 0; - return 1; -} - static enum drm_connector_status g4x_dp_detect(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - int ret; /* Can't disconnect eDP, but you can close the lid... */ if (is_edp(intel_dp)) { @@ -4481,10 +4706,7 @@ g4x_dp_detect(struct intel_dp *intel_dp) return status; } - ret = g4x_digital_port_connected(dev, intel_dig_port); - if (ret == -EINVAL) - return connector_status_unknown; - else if (ret == 0) + if (!intel_digital_port_connected(dev->dev_private, intel_dig_port)) return connector_status_disconnected; return intel_dp_detect_dpcd(intel_dp); @@ -4728,7 +4950,7 @@ intel_dp_set_property(struct drm_connector *connector, if (property == dev_priv->broadcast_rgb_property) { bool old_auto = intel_dp->color_range_auto; - uint32_t old_range = intel_dp->color_range; + bool old_range = intel_dp->limited_color_range; switch (val) { case INTEL_BROADCAST_RGB_AUTO: @@ -4736,18 +4958,18 @@ intel_dp_set_property(struct drm_connector *connector, break; case INTEL_BROADCAST_RGB_FULL: intel_dp->color_range_auto = false; - intel_dp->color_range = 0; + intel_dp->limited_color_range = false; break; case INTEL_BROADCAST_RGB_LIMITED: intel_dp->color_range_auto = false; - intel_dp->color_range = DP_COLOR_RANGE_16_235; + intel_dp->limited_color_range = true; break; default: return -EINVAL; } if (old_auto == intel_dp->color_range_auto && - old_range == intel_dp->color_range) + old_range == intel_dp->limited_color_range) return 0; goto done; @@ -4947,13 +5169,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) /* indicate that we need to restart link training */ intel_dp->train_set_valid = false; - if (HAS_PCH_SPLIT(dev)) { - if (!ibx_digital_port_connected(dev_priv, intel_dig_port)) - goto mst_fail; - } else { - if (g4x_digital_port_connected(dev, intel_dig_port) != 1) - goto mst_fail; - } + if (!intel_digital_port_connected(dev_priv, intel_dig_port)) + goto mst_fail; if (!intel_dp_get_dpcd(intel_dp)) { goto mst_fail; @@ -5853,6 +6070,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, break; case PORT_B: intel_encoder->hpd_pin = HPD_PORT_B; + if (IS_BROXTON(dev_priv) && (INTEL_REVID(dev) < BXT_REVID_B0)) + intel_encoder->hpd_pin = HPD_PORT_A; break; case PORT_C: intel_encoder->hpd_pin = HPD_PORT_C; @@ -5953,6 +6172,7 @@ intel_dp_init(struct drm_device *dev, int output_reg, enum port port) intel_encoder->pre_enable = chv_pre_enable_dp; intel_encoder->enable = vlv_enable_dp; intel_encoder->post_disable = chv_post_disable_dp; + intel_encoder->post_pll_disable = chv_dp_post_pll_disable; } else if (IS_VALLEYVIEW(dev)) { intel_encoder->pre_pll_enable = vlv_dp_pre_pll_enable; intel_encoder->pre_enable = vlv_pre_enable_dp; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 983553cf8b74..677d70e4d363 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -39,7 +39,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_atomic_state *state; int bpp, i; - int lane_count, slots, rate; + int lane_count, slots; struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; struct drm_connector *drm_connector; struct intel_connector *connector, *found = NULL; @@ -56,20 +56,11 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, */ lane_count = drm_dp_max_lane_count(intel_dp->dpcd); - rate = intel_dp_max_link_rate(intel_dp); - if (intel_dp->num_sink_rates) { - intel_dp->link_bw = 0; - intel_dp->rate_select = intel_dp_rate_select(intel_dp, rate); - } else { - intel_dp->link_bw = drm_dp_link_rate_to_bw_code(rate); - intel_dp->rate_select = 0; - } - - intel_dp->lane_count = lane_count; + pipe_config->lane_count = lane_count; pipe_config->pipe_bpp = 24; - pipe_config->port_clock = rate; + pipe_config->port_clock = intel_dp_max_link_rate(intel_dp); state = pipe_config->base.state; @@ -179,6 +170,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder) if (intel_dp->active_mst_links == 0) { enum port port = intel_ddi_get_encoder_port(encoder); + intel_dp_set_link_params(intel_dp, intel_crtc->config); + /* FIXME: add support for SKL */ if (INTEL_INFO(dev)->gen < 9) I915_WRITE(PORT_CLK_SEL(port), @@ -281,6 +274,10 @@ static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder, break; } pipe_config->base.adjusted_mode.flags |= flags; + + pipe_config->lane_count = + ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; + intel_dp_get_m_n(crtc, pipe_config); intel_ddi_clock_get(&intel_dig_port->base, pipe_config); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2b9e6f9775c5..40e825d6a26f 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -142,6 +142,7 @@ struct intel_encoder { void (*mode_set)(struct intel_encoder *intel_encoder); void (*disable)(struct intel_encoder *); void (*post_disable)(struct intel_encoder *); + void (*post_pll_disable)(struct intel_encoder *); /* Read out the current hw state of this connector, returning true if * the encoder is active. If the encoder is enabled it also set the pipe * it is connected to in the pipe parameter. */ @@ -423,6 +424,8 @@ struct intel_crtc_state { /* Used by SDVO (and if we ever fix it, HDMI). */ unsigned pixel_multiplier; + uint8_t lane_count; + /* Panel fitter controls for gen2-gen4 + VLV */ struct { u32 control; @@ -561,6 +564,8 @@ struct intel_crtc { int scanline_offset; unsigned start_vbl_count; + ktime_t start_vbl_time; + struct intel_crtc_atomic_commit atomic; /* scalers available on this crtc */ @@ -657,7 +662,7 @@ struct cxsr_latency { struct intel_hdmi { u32 hdmi_reg; int ddc_bus; - uint32_t color_range; + bool limited_color_range; bool color_range_auto; bool has_hdmi_sink; bool has_audio; @@ -696,23 +701,29 @@ enum link_m_n_set { M2_N2 }; +struct sink_crc { + bool started; + u8 last_crc[6]; + int last_count; +}; + struct intel_dp { uint32_t output_reg; uint32_t aux_ch_ctl_reg; uint32_t DP; + int link_rate; + uint8_t lane_count; bool has_audio; enum hdmi_force_audio force_audio; - uint32_t color_range; + bool limited_color_range; bool color_range_auto; - uint8_t link_bw; - uint8_t rate_select; - uint8_t lane_count; uint8_t dpcd[DP_RECEIVER_CAP_SIZE]; uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; /* sink rates as reported by DP_SUPPORTED_LINK_RATES */ uint8_t num_sink_rates; int sink_rates[DP_MAX_SUPPORTED_RATES]; + struct sink_crc sink_crc; struct drm_dp_aux aux; uint8_t train_set[4]; int panel_power_up_delay; @@ -770,6 +781,7 @@ struct intel_digital_port { struct intel_dp dp; struct intel_hdmi hdmi; enum irqreturn (*hpd_pulse)(struct intel_digital_port *, bool); + bool release_cl2_override; }; struct intel_dp_mst_encoder { @@ -779,7 +791,7 @@ struct intel_dp_mst_encoder { void *port; /* store this opaque as its illegal to dereference it */ }; -static inline int +static inline enum dpio_channel vlv_dport_to_channel(struct intel_digital_port *dport) { switch (dport->port) { @@ -793,7 +805,21 @@ vlv_dport_to_channel(struct intel_digital_port *dport) } } -static inline int +static inline enum dpio_phy +vlv_dport_to_phy(struct intel_digital_port *dport) +{ + switch (dport->port) { + case PORT_B: + case PORT_C: + return DPIO_PHY0; + case PORT_D: + return DPIO_PHY1; + default: + BUG(); + } +} + +static inline enum dpio_channel vlv_pipe_to_channel(enum pipe pipe) { switch (pipe) { @@ -987,6 +1013,7 @@ void i915_audio_component_cleanup(struct drm_i915_private *dev_priv); extern const struct drm_plane_funcs intel_plane_funcs; bool intel_has_pending_fb_unpin(struct drm_device *dev); int intel_pch_rawclk(struct drm_device *dev); +int intel_hrawclk(struct drm_device *dev); void intel_mark_busy(struct drm_device *dev); void intel_mark_idle(struct drm_device *dev); void intel_crtc_restore_mode(struct drm_crtc *crtc); @@ -995,8 +1022,6 @@ void intel_encoder_destroy(struct drm_encoder *encoder); int intel_connector_init(struct intel_connector *); struct intel_connector *intel_connector_alloc(void); bool intel_connector_get_hw_state(struct intel_connector *connector); -bool ibx_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port); void intel_connector_attach_encoder(struct intel_connector *connector, struct intel_encoder *encoder); struct drm_encoder *intel_best_encoder(struct drm_connector *connector); @@ -1155,6 +1180,8 @@ void assert_csr_loaded(struct drm_i915_private *dev_priv); void intel_dp_init(struct drm_device *dev, int output_reg, enum port port); bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector); +void intel_dp_set_link_params(struct intel_dp *intel_dp, + const struct intel_crtc_state *pipe_config); void intel_dp_start_link_train(struct intel_dp *intel_dp); void intel_dp_complete_link_train(struct intel_dp *intel_dp); void intel_dp_stop_link_train(struct intel_dp *intel_dp); @@ -1339,6 +1366,12 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv); void intel_display_set_init_power(struct drm_i915_private *dev, bool enable); +void chv_phy_powergate_lanes(struct intel_encoder *encoder, + bool override, unsigned int mask); +bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy, + enum dpio_channel ch, bool override); + + /* intel_pm.c */ void intel_init_clock_gating(struct drm_device *dev); void intel_suspend_hw(struct drm_device *dev); @@ -1384,9 +1417,8 @@ bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob); int intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane); int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); -void intel_pipe_update_start(struct intel_crtc *crtc, - uint32_t *start_vbl_count); -void intel_pipe_update_end(struct intel_crtc *crtc, u32 start_vbl_count); +void intel_pipe_update_start(struct intel_crtc *crtc); +void intel_pipe_update_end(struct intel_crtc *crtc); /* intel_tv.c */ void intel_tv_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 4a601cf90f16..781c267ea97c 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -654,6 +654,7 @@ intel_dsi_mode_valid(struct drm_connector *connector, { struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode; + int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; DRM_DEBUG_KMS("\n"); @@ -667,6 +668,8 @@ intel_dsi_mode_valid(struct drm_connector *connector, return MODE_PANEL; if (mode->vdisplay > fixed_mode->vdisplay) return MODE_PANEL; + if (fixed_mode->clock > max_dotclk) + return MODE_CLOCK_HIGH; } return MODE_OK; diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index dc532bb61d22..c80fe1f49ede 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -201,6 +201,8 @@ intel_dvo_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct intel_dvo *intel_dvo = intel_attached_dvo(connector); + int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; + int target_clock = mode->clock; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; @@ -212,8 +214,13 @@ intel_dvo_mode_valid(struct drm_connector *connector, return MODE_PANEL; if (mode->vdisplay > intel_dvo->panel_fixed_mode->vdisplay) return MODE_PANEL; + + target_clock = intel_dvo->panel_fixed_mode->clock; } + if (target_clock > max_dotclk) + return MODE_CLOCK_HIGH; + return intel_dvo->dev.dev_ops->mode_valid(&intel_dvo->dev, mode); } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 8c6a6fa46005..96476d7d7ed2 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -263,7 +263,7 @@ static int intelfb_create(struct drm_fb_helper *helper, /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ - DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08lx, bo %p\n", + DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx, bo %p\n", fb->width, fb->height, i915_gem_obj_ggtt_offset(obj), obj); diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h new file mode 100644 index 000000000000..4ec2d27a557e --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -0,0 +1,122 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#ifndef _INTEL_GUC_H_ +#define _INTEL_GUC_H_ + +#include "intel_guc_fwif.h" +#include "i915_guc_reg.h" + +struct i915_guc_client { + struct drm_i915_gem_object *client_obj; + struct intel_context *owner; + struct intel_guc *guc; + uint32_t priority; + uint32_t ctx_index; + + uint32_t proc_desc_offset; + uint32_t doorbell_offset; + uint32_t cookie; + uint16_t doorbell_id; + uint16_t padding; /* Maintain alignment */ + + uint32_t wq_offset; + uint32_t wq_size; + + spinlock_t wq_lock; /* Protects all data below */ + uint32_t wq_tail; + + /* GuC submission statistics & status */ + uint64_t submissions[I915_NUM_RINGS]; + uint32_t q_fail; + uint32_t b_fail; + int retcode; +}; + +enum intel_guc_fw_status { + GUC_FIRMWARE_FAIL = -1, + GUC_FIRMWARE_NONE = 0, + GUC_FIRMWARE_PENDING, + GUC_FIRMWARE_SUCCESS +}; + +/* + * This structure encapsulates all the data needed during the process + * of fetching, caching, and loading the firmware image into the GuC. + */ +struct intel_guc_fw { + struct drm_device * guc_dev; + const char * guc_fw_path; + size_t guc_fw_size; + struct drm_i915_gem_object * guc_fw_obj; + enum intel_guc_fw_status guc_fw_fetch_status; + enum intel_guc_fw_status guc_fw_load_status; + + uint16_t guc_fw_major_wanted; + uint16_t guc_fw_minor_wanted; + uint16_t guc_fw_major_found; + uint16_t guc_fw_minor_found; +}; + +struct intel_guc { + struct intel_guc_fw guc_fw; + + uint32_t log_flags; + struct drm_i915_gem_object *log_obj; + + struct drm_i915_gem_object *ctx_pool_obj; + struct ida ctx_ids; + + struct i915_guc_client *execbuf_client; + + spinlock_t host2guc_lock; /* Protects all data below */ + + DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS); + uint32_t db_cacheline; /* Cyclic counter mod pagesize */ + + /* Action status & statistics */ + uint64_t action_count; /* Total commands issued */ + uint32_t action_cmd; /* Last command word */ + uint32_t action_status; /* Last return status */ + uint32_t action_fail; /* Total number of failures */ + int32_t action_err; /* Last error code */ + + uint64_t submissions[I915_NUM_RINGS]; + uint32_t last_seqno[I915_NUM_RINGS]; +}; + +/* intel_guc_loader.c */ +extern void intel_guc_ucode_init(struct drm_device *dev); +extern int intel_guc_ucode_load(struct drm_device *dev); +extern void intel_guc_ucode_fini(struct drm_device *dev); +extern const char *intel_guc_fw_status_repr(enum intel_guc_fw_status status); + +/* i915_guc_submission.c */ +int i915_guc_submission_init(struct drm_device *dev); +int i915_guc_submission_enable(struct drm_device *dev); +int i915_guc_submit(struct i915_guc_client *client, + struct drm_i915_gem_request *rq); +void i915_guc_submission_disable(struct drm_device *dev); +void i915_guc_submission_fini(struct drm_device *dev); + +#endif diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 18d7f20936c8..e1f47ba2b4b0 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -32,17 +32,16 @@ * EDITING THIS FILE IS THEREFORE NOT RECOMMENDED - YOUR CHANGES MAY BE LOST. */ -#define GFXCORE_FAMILY_GEN8 11 #define GFXCORE_FAMILY_GEN9 12 -#define GFXCORE_FAMILY_FORCE_ULONG 0x7fffffff +#define GFXCORE_FAMILY_UNKNOWN 0x7fffffff -#define GUC_CTX_PRIORITY_CRITICAL 0 +#define GUC_CTX_PRIORITY_KMD_HIGH 0 #define GUC_CTX_PRIORITY_HIGH 1 -#define GUC_CTX_PRIORITY_NORMAL 2 -#define GUC_CTX_PRIORITY_LOW 3 +#define GUC_CTX_PRIORITY_KMD_NORMAL 2 +#define GUC_CTX_PRIORITY_NORMAL 3 #define GUC_MAX_GPU_CONTEXTS 1024 -#define GUC_INVALID_CTX_ID (GUC_MAX_GPU_CONTEXTS + 1) +#define GUC_INVALID_CTX_ID GUC_MAX_GPU_CONTEXTS /* Work queue item header definitions */ #define WQ_STATUS_ACTIVE 1 @@ -76,6 +75,7 @@ #define GUC_CTX_DESC_ATTR_RESET (1 << 4) #define GUC_CTX_DESC_ATTR_WQLOCKED (1 << 5) #define GUC_CTX_DESC_ATTR_PCH (1 << 6) +#define GUC_CTX_DESC_ATTR_TERMINATED (1 << 7) /* The guc control data is 10 DWORDs */ #define GUC_CTL_CTXINFO 0 @@ -108,6 +108,7 @@ #define GUC_CTL_DISABLE_SCHEDULER (1 << 4) #define GUC_CTL_PREEMPTION_LOG (1 << 5) #define GUC_CTL_ENABLE_SLPC (1 << 7) +#define GUC_CTL_RESET_ON_PREMPT_FAILURE (1 << 8) #define GUC_CTL_DEBUG 8 #define GUC_LOG_VERBOSITY_SHIFT 0 #define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT) @@ -117,8 +118,9 @@ /* Verbosity range-check limits, without the shift */ #define GUC_LOG_VERBOSITY_MIN 0 #define GUC_LOG_VERBOSITY_MAX 3 +#define GUC_CTL_RSRVD 9 -#define GUC_CTL_MAX_DWORDS (GUC_CTL_DEBUG + 1) +#define GUC_CTL_MAX_DWORDS (GUC_CTL_RSRVD + 1) struct guc_doorbell_info { u32 db_status; @@ -208,7 +210,9 @@ struct guc_context_desc { u32 engine_presence; - u32 reserved0[1]; + u8 engine_suspended; + + u8 reserved0[3]; u64 reserved1[1]; u64 desc_private; diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c new file mode 100644 index 000000000000..5eafd31fb4a6 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -0,0 +1,606 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Vinit Azad <vinit.azad@intel.com> + * Ben Widawsky <ben@bwidawsk.net> + * Dave Gordon <david.s.gordon@intel.com> + * Alex Dai <yu.dai@intel.com> + */ +#include <linux/firmware.h> +#include "i915_drv.h" +#include "intel_guc.h" + +/** + * DOC: GuC + * + * intel_guc: + * Top level structure of guc. It handles firmware loading and manages client + * pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy + * ExecList submission. + * + * Firmware versioning: + * The firmware build process will generate a version header file with major and + * minor version defined. The versions are built into CSS header of firmware. + * i915 kernel driver set the minimal firmware version required per platform. + * The firmware installation package will install (symbolic link) proper version + * of firmware. + * + * GuC address space: + * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP), + * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is + * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects + * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM. + * + * Firmware log: + * Firmware log is enabled by setting i915.guc_log_level to non-negative level. + * Log data is printed out via reading debugfs i915_guc_log_dump. Reading from + * i915_guc_load_status will print out firmware loading status and scratch + * registers value. + * + */ + +#define I915_SKL_GUC_UCODE "i915/skl_guc_ver4.bin" +MODULE_FIRMWARE(I915_SKL_GUC_UCODE); + +/* User-friendly representation of an enum */ +const char *intel_guc_fw_status_repr(enum intel_guc_fw_status status) +{ + switch (status) { + case GUC_FIRMWARE_FAIL: + return "FAIL"; + case GUC_FIRMWARE_NONE: + return "NONE"; + case GUC_FIRMWARE_PENDING: + return "PENDING"; + case GUC_FIRMWARE_SUCCESS: + return "SUCCESS"; + default: + return "UNKNOWN!"; + } +}; + +static void direct_interrupts_to_host(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *ring; + int i, irqs; + + /* tell all command streamers NOT to forward interrupts and vblank to GuC */ + irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); + irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); + for_each_ring(ring, dev_priv, i) + I915_WRITE(RING_MODE_GEN7(ring), irqs); + + /* tell DE to send nothing to GuC */ + I915_WRITE(DE_GUCRMR, ~0); + + /* route all GT interrupts to the host */ + I915_WRITE(GUC_BCS_RCS_IER, 0); + I915_WRITE(GUC_VCS2_VCS1_IER, 0); + I915_WRITE(GUC_WD_VECS_IER, 0); +} + +static void direct_interrupts_to_guc(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *ring; + int i, irqs; + + /* tell all command streamers to forward interrupts and vblank to GuC */ + irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_ALWAYS); + irqs |= _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); + for_each_ring(ring, dev_priv, i) + I915_WRITE(RING_MODE_GEN7(ring), irqs); + + /* tell DE to send (all) flip_done to GuC */ + irqs = DERRMR_PIPEA_PRI_FLIP_DONE | DERRMR_PIPEA_SPR_FLIP_DONE | + DERRMR_PIPEB_PRI_FLIP_DONE | DERRMR_PIPEB_SPR_FLIP_DONE | + DERRMR_PIPEC_PRI_FLIP_DONE | DERRMR_PIPEC_SPR_FLIP_DONE; + /* Unmasked bits will cause GuC response message to be sent */ + I915_WRITE(DE_GUCRMR, ~irqs); + + /* route USER_INTERRUPT to Host, all others are sent to GuC. */ + irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + /* These three registers have the same bit definitions */ + I915_WRITE(GUC_BCS_RCS_IER, ~irqs); + I915_WRITE(GUC_VCS2_VCS1_IER, ~irqs); + I915_WRITE(GUC_WD_VECS_IER, ~irqs); +} + +static u32 get_gttype(struct drm_i915_private *dev_priv) +{ + /* XXX: GT type based on PCI device ID? field seems unused by fw */ + return 0; +} + +static u32 get_core_family(struct drm_i915_private *dev_priv) +{ + switch (INTEL_INFO(dev_priv)->gen) { + case 9: + return GFXCORE_FAMILY_GEN9; + + default: + DRM_ERROR("GUC: unsupported core family\n"); + return GFXCORE_FAMILY_UNKNOWN; + } +} + +static void set_guc_init_params(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + u32 params[GUC_CTL_MAX_DWORDS]; + int i; + + memset(¶ms, 0, sizeof(params)); + + params[GUC_CTL_DEVICE_INFO] |= + (get_gttype(dev_priv) << GUC_CTL_GTTYPE_SHIFT) | + (get_core_family(dev_priv) << GUC_CTL_COREFAMILY_SHIFT); + + /* + * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one + * second. This ARAR is calculated by: + * Scheduler-Quantum-in-ns / ARAT-increment-in-ns = 1000000000 / 10 + */ + params[GUC_CTL_ARAT_HIGH] = 0; + params[GUC_CTL_ARAT_LOW] = 100000000; + + params[GUC_CTL_WA] |= GUC_CTL_WA_UK_BY_DRIVER; + + params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER | + GUC_CTL_VCS2_ENABLED; + + if (i915.guc_log_level >= 0) { + params[GUC_CTL_LOG_PARAMS] = guc->log_flags; + params[GUC_CTL_DEBUG] = + i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT; + } + + /* If GuC submission is enabled, set up additional parameters here */ + if (i915.enable_guc_submission) { + u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj); + u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; + + pgs >>= PAGE_SHIFT; + params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) | + (ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT); + + params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS; + + /* Unmask this bit to enable the GuC's internal scheduler */ + params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER; + } + + I915_WRITE(SOFT_SCRATCH(0), 0); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + I915_WRITE(SOFT_SCRATCH(1 + i), params[i]); +} + +/* + * Read the GuC status register (GUC_STATUS) and store it in the + * specified location; then return a boolean indicating whether + * the value matches either of two values representing completion + * of the GuC boot process. + * + * This is used for polling the GuC status in a wait_for_atomic() + * loop below. + */ +static inline bool guc_ucode_response(struct drm_i915_private *dev_priv, + u32 *status) +{ + u32 val = I915_READ(GUC_STATUS); + *status = val; + return ((val & GS_UKERNEL_MASK) == GS_UKERNEL_READY || + (val & GS_UKERNEL_MASK) == GS_UKERNEL_LAPIC_DONE); +} + +/* + * Transfer the firmware image to RAM for execution by the microcontroller. + * + * GuC Firmware layout: + * +-------------------------------+ ---- + * | CSS header | 128B + * | contains major/minor version | + * +-------------------------------+ ---- + * | uCode | + * +-------------------------------+ ---- + * | RSA signature | 256B + * +-------------------------------+ ---- + * + * Architecturally, the DMA engine is bidirectional, and can potentially even + * transfer between GTT locations. This functionality is left out of the API + * for now as there is no need for it. + * + * Note that GuC needs the CSS header plus uKernel code to be copied by the + * DMA engine in one operation, whereas the RSA signature is loaded via MMIO. + */ + +#define UOS_CSS_HEADER_OFFSET 0 +#define UOS_VER_MINOR_OFFSET 0x44 +#define UOS_VER_MAJOR_OFFSET 0x46 +#define UOS_CSS_HEADER_SIZE 0x80 +#define UOS_RSA_SIG_SIZE 0x100 + +static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv) +{ + struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + struct drm_i915_gem_object *fw_obj = guc_fw->guc_fw_obj; + unsigned long offset; + struct sg_table *sg = fw_obj->pages; + u32 status, ucode_size, rsa[UOS_RSA_SIG_SIZE / sizeof(u32)]; + int i, ret = 0; + + /* uCode size, also is where RSA signature starts */ + offset = ucode_size = guc_fw->guc_fw_size - UOS_RSA_SIG_SIZE; + I915_WRITE(DMA_COPY_SIZE, ucode_size); + + /* Copy RSA signature from the fw image to HW for verification */ + sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, UOS_RSA_SIG_SIZE, offset); + for (i = 0; i < UOS_RSA_SIG_SIZE / sizeof(u32); i++) + I915_WRITE(UOS_RSA_SCRATCH_0 + i * sizeof(u32), rsa[i]); + + /* Set the source address for the new blob */ + offset = i915_gem_obj_ggtt_offset(fw_obj); + I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); + I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); + + /* + * Set the DMA destination. Current uCode expects the code to be + * loaded at 8k; locations below this are used for the stack. + */ + I915_WRITE(DMA_ADDR_1_LOW, 0x2000); + I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); + + /* Finally start the DMA */ + I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA)); + + /* + * Spin-wait for the DMA to complete & the GuC to start up. + * NB: Docs recommend not using the interrupt for completion. + * Measurements indicate this should take no more than 20ms, so a + * timeout here indicates that the GuC has failed and is unusable. + * (Higher levels of the driver will attempt to fall back to + * execlist mode if this happens.) + */ + ret = wait_for_atomic(guc_ucode_response(dev_priv, &status), 100); + + DRM_DEBUG_DRIVER("DMA status 0x%x, GuC status 0x%x\n", + I915_READ(DMA_CTRL), status); + + if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { + DRM_ERROR("GuC firmware signature verification failed\n"); + ret = -ENOEXEC; + } + + DRM_DEBUG_DRIVER("returning %d\n", ret); + + return ret; +} + +/* + * Load the GuC firmware blob into the MinuteIA. + */ +static int guc_ucode_xfer(struct drm_i915_private *dev_priv) +{ + struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + struct drm_device *dev = dev_priv->dev; + int ret; + + ret = i915_gem_object_set_to_gtt_domain(guc_fw->guc_fw_obj, false); + if (ret) { + DRM_DEBUG_DRIVER("set-domain failed %d\n", ret); + return ret; + } + + ret = i915_gem_obj_ggtt_pin(guc_fw->guc_fw_obj, 0, 0); + if (ret) { + DRM_DEBUG_DRIVER("pin failed %d\n", ret); + return ret; + } + + /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* init WOPCM */ + I915_WRITE(GUC_WOPCM_SIZE, GUC_WOPCM_SIZE_VALUE); + I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE); + + /* Enable MIA caching. GuC clock gating is disabled. */ + I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE); + + /* WaC6DisallowByGfxPause*/ + I915_WRITE(GEN6_GFXPAUSE, 0x30FFF); + + if (IS_BROXTON(dev)) + I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); + else + I915_WRITE(GEN9_GT_PM_CONFIG, GT_DOORBELL_ENABLE); + + if (IS_GEN9(dev)) { + /* DOP Clock Gating Enable for GuC clocks */ + I915_WRITE(GEN7_MISCCPCTL, (GEN8_DOP_CLOCK_GATE_GUC_ENABLE | + I915_READ(GEN7_MISCCPCTL))); + + /* allows for 5us before GT can go to RC6 */ + I915_WRITE(GUC_ARAT_C6DIS, 0x1FF); + } + + set_guc_init_params(dev_priv); + + ret = guc_ucode_xfer_dma(dev_priv); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + /* + * We keep the object pages for reuse during resume. But we can unpin it + * now that DMA has completed, so it doesn't continue to take up space. + */ + i915_gem_object_ggtt_unpin(guc_fw->guc_fw_obj); + + return ret; +} + +/** + * intel_guc_ucode_load() - load GuC uCode into the device + * @dev: drm device + * + * Called from gem_init_hw() during driver loading and also after a GPU reset. + * + * The firmware image should have already been fetched into memory by the + * earlier call to intel_guc_ucode_init(), so here we need only check that + * is succeeded, and then transfer the image to the h/w. + * + * Return: non-zero code on error + */ +int intel_guc_ucode_load(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + int err = 0; + + DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", + intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), + intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); + + direct_interrupts_to_host(dev_priv); + i915_guc_submission_disable(dev); + + if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_NONE) + return 0; + + if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_SUCCESS && + guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL) + return -ENOEXEC; + + guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING; + + DRM_DEBUG_DRIVER("GuC fw fetch status %s\n", + intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status)); + + switch (guc_fw->guc_fw_fetch_status) { + case GUC_FIRMWARE_FAIL: + /* something went wrong :( */ + err = -EIO; + goto fail; + + case GUC_FIRMWARE_NONE: + case GUC_FIRMWARE_PENDING: + default: + /* "can't happen" */ + WARN_ONCE(1, "GuC fw %s invalid guc_fw_fetch_status %s [%d]\n", + guc_fw->guc_fw_path, + intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), + guc_fw->guc_fw_fetch_status); + err = -ENXIO; + goto fail; + + case GUC_FIRMWARE_SUCCESS: + break; + } + + err = i915_guc_submission_init(dev); + if (err) + goto fail; + + err = guc_ucode_xfer(dev_priv); + if (err) + goto fail; + + guc_fw->guc_fw_load_status = GUC_FIRMWARE_SUCCESS; + + DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", + intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), + intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); + + if (i915.enable_guc_submission) { + err = i915_guc_submission_enable(dev); + if (err) + goto fail; + direct_interrupts_to_guc(dev_priv); + } + + return 0; + +fail: + if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING) + guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL; + + direct_interrupts_to_host(dev_priv); + i915_guc_submission_disable(dev); + + return err; +} + +static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) +{ + struct drm_i915_gem_object *obj; + const struct firmware *fw; + const u8 *css_header; + const size_t minsize = UOS_CSS_HEADER_SIZE + UOS_RSA_SIG_SIZE; + const size_t maxsize = GUC_WOPCM_SIZE_VALUE + UOS_RSA_SIG_SIZE + - 0x8000; /* 32k reserved (8K stack + 24k context) */ + int err; + + DRM_DEBUG_DRIVER("before requesting firmware: GuC fw fetch status %s\n", + intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status)); + + err = request_firmware(&fw, guc_fw->guc_fw_path, &dev->pdev->dev); + if (err) + goto fail; + if (!fw) + goto fail; + + DRM_DEBUG_DRIVER("fetch GuC fw from %s succeeded, fw %p\n", + guc_fw->guc_fw_path, fw); + DRM_DEBUG_DRIVER("firmware file size %zu (minimum %zu, maximum %zu)\n", + fw->size, minsize, maxsize); + + /* Check the size of the blob befoe examining buffer contents */ + if (fw->size < minsize || fw->size > maxsize) + goto fail; + + /* + * The GuC firmware image has the version number embedded at a well-known + * offset within the firmware blob; note that major / minor version are + * TWO bytes each (i.e. u16), although all pointers and offsets are defined + * in terms of bytes (u8). + */ + css_header = fw->data + UOS_CSS_HEADER_OFFSET; + guc_fw->guc_fw_major_found = *(u16 *)(css_header + UOS_VER_MAJOR_OFFSET); + guc_fw->guc_fw_minor_found = *(u16 *)(css_header + UOS_VER_MINOR_OFFSET); + + if (guc_fw->guc_fw_major_found != guc_fw->guc_fw_major_wanted || + guc_fw->guc_fw_minor_found < guc_fw->guc_fw_minor_wanted) { + DRM_ERROR("GuC firmware version %d.%d, required %d.%d\n", + guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found, + guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted); + err = -ENOEXEC; + goto fail; + } + + DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", + guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found, + guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted); + + obj = i915_gem_object_create_from_data(dev, fw->data, fw->size); + if (IS_ERR_OR_NULL(obj)) { + err = obj ? PTR_ERR(obj) : -ENOMEM; + goto fail; + } + + guc_fw->guc_fw_obj = obj; + guc_fw->guc_fw_size = fw->size; + + DRM_DEBUG_DRIVER("GuC fw fetch status SUCCESS, obj %p\n", + guc_fw->guc_fw_obj); + + release_firmware(fw); + guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_SUCCESS; + return; + +fail: + DRM_DEBUG_DRIVER("GuC fw fetch status FAIL; err %d, fw %p, obj %p\n", + err, fw, guc_fw->guc_fw_obj); + DRM_ERROR("Failed to fetch GuC firmware from %s (error %d)\n", + guc_fw->guc_fw_path, err); + + obj = guc_fw->guc_fw_obj; + if (obj) + drm_gem_object_unreference(&obj->base); + guc_fw->guc_fw_obj = NULL; + + release_firmware(fw); /* OK even if fw is NULL */ + guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL; +} + +/** + * intel_guc_ucode_init() - define parameters and fetch firmware + * @dev: drm device + * + * Called early during driver load, but after GEM is initialised. + * The device struct_mutex must be held by the caller, as we're + * going to allocate a GEM object to hold the firmware image. + * + * The firmware will be transferred to the GuC's memory later, + * when intel_guc_ucode_load() is called. + */ +void intel_guc_ucode_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + const char *fw_path; + + if (!HAS_GUC_SCHED(dev)) + i915.enable_guc_submission = false; + + if (!HAS_GUC_UCODE(dev)) { + fw_path = NULL; + } else if (IS_SKYLAKE(dev)) { + fw_path = I915_SKL_GUC_UCODE; + guc_fw->guc_fw_major_wanted = 4; + guc_fw->guc_fw_minor_wanted = 3; + } else { + i915.enable_guc_submission = false; + fw_path = ""; /* unknown device */ + } + + guc_fw->guc_dev = dev; + guc_fw->guc_fw_path = fw_path; + guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE; + guc_fw->guc_fw_load_status = GUC_FIRMWARE_NONE; + + if (fw_path == NULL) + return; + + if (*fw_path == '\0') { + DRM_ERROR("No GuC firmware known for this platform\n"); + guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL; + return; + } + + guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_PENDING; + DRM_DEBUG_DRIVER("GuC firmware pending, path %s\n", fw_path); + guc_fw_fetch(dev, guc_fw); + /* status must now be FAIL or SUCCESS */ +} + +/** + * intel_guc_ucode_fini() - clean up all allocated resources + * @dev: drm device + */ +void intel_guc_ucode_fini(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; + + direct_interrupts_to_host(dev_priv); + i915_guc_submission_fini(dev); + + if (guc_fw->guc_fw_obj) + drm_gem_object_unreference(&guc_fw->guc_fw_obj->base); + guc_fw->guc_fw_obj = NULL; + + guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE; +} diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index dcd336bcdfe7..feb31d891482 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -848,8 +848,8 @@ static void intel_hdmi_prepare(struct intel_encoder *encoder) u32 hdmi_val; hdmi_val = SDVO_ENCODING_HDMI; - if (!HAS_PCH_SPLIT(dev)) - hdmi_val |= intel_hdmi->color_range; + if (!HAS_PCH_SPLIT(dev) && crtc->config->limited_color_range) + hdmi_val |= HDMI_COLOR_RANGE_16_235; if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC) hdmi_val |= SDVO_VSYNC_ACTIVE_HIGH; if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC) @@ -1260,11 +1260,12 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, if (intel_hdmi->color_range_auto) { /* See CEA-861-E - 5.1 Default Encoding Parameters */ - if (pipe_config->has_hdmi_sink && - drm_match_cea_mode(adjusted_mode) > 1) - intel_hdmi->color_range = HDMI_COLOR_RANGE_16_235; - else - intel_hdmi->color_range = 0; + pipe_config->limited_color_range = + pipe_config->has_hdmi_sink && + drm_match_cea_mode(adjusted_mode) > 1; + } else { + pipe_config->limited_color_range = + intel_hdmi->limited_color_range; } if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) { @@ -1273,9 +1274,6 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, clock_12bpc *= 2; } - if (intel_hdmi->color_range) - pipe_config->limited_color_range = true; - if (HAS_PCH_SPLIT(dev) && !HAS_DDI(dev)) pipe_config->has_pch_encoder = true; @@ -1470,7 +1468,7 @@ intel_hdmi_set_property(struct drm_connector *connector, if (property == dev_priv->broadcast_rgb_property) { bool old_auto = intel_hdmi->color_range_auto; - uint32_t old_range = intel_hdmi->color_range; + bool old_range = intel_hdmi->limited_color_range; switch (val) { case INTEL_BROADCAST_RGB_AUTO: @@ -1478,18 +1476,18 @@ intel_hdmi_set_property(struct drm_connector *connector, break; case INTEL_BROADCAST_RGB_FULL: intel_hdmi->color_range_auto = false; - intel_hdmi->color_range = 0; + intel_hdmi->limited_color_range = false; break; case INTEL_BROADCAST_RGB_LIMITED: intel_hdmi->color_range_auto = false; - intel_hdmi->color_range = HDMI_COLOR_RANGE_16_235; + intel_hdmi->limited_color_range = true; break; default: return -EINVAL; } if (old_auto == intel_hdmi->color_range_auto && - old_range == intel_hdmi->color_range) + old_range == intel_hdmi->limited_color_range) return 0; goto done; @@ -1617,6 +1615,50 @@ static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder) mutex_unlock(&dev_priv->sb_lock); } +static void chv_data_lane_soft_reset(struct intel_encoder *encoder, + bool reset) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base)); + struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + enum pipe pipe = crtc->pipe; + uint32_t val; + + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch)); + if (reset) + val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); + else + val |= DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET; + vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW0(ch), val); + + if (crtc->config->lane_count > 2) { + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW0(ch)); + if (reset) + val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); + else + val |= DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET; + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val); + } + + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch)); + val |= CHV_PCS_REQ_SOFTRESET_EN; + if (reset) + val &= ~DPIO_PCS_CLK_SOFT_RESET; + else + val |= DPIO_PCS_CLK_SOFT_RESET; + vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW1(ch), val); + + if (crtc->config->lane_count > 2) { + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW1(ch)); + val |= CHV_PCS_REQ_SOFTRESET_EN; + if (reset) + val &= ~DPIO_PCS_CLK_SOFT_RESET; + else + val |= DPIO_PCS_CLK_SOFT_RESET; + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW1(ch), val); + } +} + static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); @@ -1630,8 +1672,21 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder) intel_hdmi_prepare(encoder); + /* + * Must trick the second common lane into life. + * Otherwise we can't even access the PLL. + */ + if (ch == DPIO_CH0 && pipe == PIPE_B) + dport->release_cl2_override = + !chv_phy_powergate_ch(dev_priv, DPIO_PHY0, DPIO_CH1, true); + + chv_phy_powergate_lanes(encoder, true, 0x0); + mutex_lock(&dev_priv->sb_lock); + /* Assert data lane reset */ + chv_data_lane_soft_reset(encoder, true); + /* program left/right clock distribution */ if (pipe != PIPE_B) { val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW5_CH0); @@ -1683,6 +1738,39 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder) mutex_unlock(&dev_priv->sb_lock); } +static void chv_hdmi_post_pll_disable(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum pipe pipe = to_intel_crtc(encoder->base.crtc)->pipe; + u32 val; + + mutex_lock(&dev_priv->sb_lock); + + /* disable left/right clock distribution */ + if (pipe != PIPE_B) { + val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW5_CH0); + val &= ~(CHV_BUFLEFTENA1_MASK | CHV_BUFRIGHTENA1_MASK); + vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW5_CH0, val); + } else { + val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW1_CH1); + val &= ~(CHV_BUFLEFTENA2_MASK | CHV_BUFRIGHTENA2_MASK); + vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val); + } + + mutex_unlock(&dev_priv->sb_lock); + + /* + * Leave the power down bit cleared for at least one + * lane so that chv_powergate_phy_ch() will power + * on something when the channel is otherwise unused. + * When the port is off and the override is removed + * the lanes power down anyway, so otherwise it doesn't + * really matter what the state of power down bits is + * after this. + */ + chv_phy_powergate_lanes(encoder, false, 0x0); +} + static void vlv_hdmi_post_disable(struct intel_encoder *encoder) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); @@ -1701,33 +1789,13 @@ static void vlv_hdmi_post_disable(struct intel_encoder *encoder) static void chv_hdmi_post_disable(struct intel_encoder *encoder) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_crtc *intel_crtc = - to_intel_crtc(encoder->base.crtc); - enum dpio_channel ch = vlv_dport_to_channel(dport); - enum pipe pipe = intel_crtc->pipe; - u32 val; mutex_lock(&dev_priv->sb_lock); - /* Propagate soft reset to data lane reset */ - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch)); - val |= CHV_PCS_REQ_SOFTRESET_EN; - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW1(ch), val); - - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW1(ch)); - val |= CHV_PCS_REQ_SOFTRESET_EN; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW1(ch), val); - - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch)); - val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW0(ch), val); - - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW0(ch)); - val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val); + /* Assert data lane reset */ + chv_data_lane_soft_reset(encoder, true); mutex_unlock(&dev_priv->sb_lock); } @@ -1758,23 +1826,6 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder) val &= ~DPIO_LANEDESKEW_STRAP_OVRD; vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val); - /* Deassert soft data lane reset*/ - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch)); - val |= CHV_PCS_REQ_SOFTRESET_EN; - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW1(ch), val); - - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW1(ch)); - val |= CHV_PCS_REQ_SOFTRESET_EN; - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW1(ch), val); - - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch)); - val |= (DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); - vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW0(ch), val); - - val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW0(ch)); - val |= (DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); - vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val); - /* Program Tx latency optimal setting */ for (i = 0; i < 4; i++) { /* Set the upar bit */ @@ -1817,6 +1868,9 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder) DPIO_TX1_STAGGER_MULT(7) | DPIO_TX2_STAGGER_MULT(5)); + /* Deassert data lane reset */ + chv_data_lane_soft_reset(encoder, false); + /* Clear calc init */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch)); val &= ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3); @@ -1851,31 +1905,33 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder) for (i = 0; i < 4; i++) { val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i)); + val &= ~DPIO_SWING_MARGIN000_MASK; val |= 102 << DPIO_SWING_MARGIN000_SHIFT; + + /* + * Supposedly this value shouldn't matter when unique transition + * scale is disabled, but in fact it does matter. Let's just + * always program the same value and hope it's OK. + */ + val &= ~(0xff << DPIO_UNIQ_TRANS_SCALE_SHIFT); + val |= 0x9a << DPIO_UNIQ_TRANS_SCALE_SHIFT; + vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val); } - /* Disable unique transition scale */ + /* + * The document said it needs to set bit 27 for ch0 and bit 26 + * for ch1. Might be a typo in the doc. + * For now, for this unique transition scale selection, set bit + * 27 for ch0 and ch1. + */ for (i = 0; i < 4; i++) { val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW3(ch, i)); val &= ~DPIO_TX_UNIQ_TRANS_SCALE_EN; vlv_dpio_write(dev_priv, pipe, CHV_TX_DW3(ch, i), val); } - /* Additional steps for 1200mV-0dB */ -#if 0 - val = vlv_dpio_read(dev_priv, pipe, VLV_TX_DW3(ch)); - if (ch) - val |= DPIO_TX_UNIQ_TRANS_SCALE_CH1; - else - val |= DPIO_TX_UNIQ_TRANS_SCALE_CH0; - vlv_dpio_write(dev_priv, pipe, VLV_TX_DW3(ch), val); - - vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(ch), - vlv_dpio_read(dev_priv, pipe, VLV_TX_DW2(ch)) | - (0x9a << DPIO_UNIQ_TRANS_SCALE_SHIFT)); -#endif /* Start swing calculation */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch)); val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3; @@ -1899,6 +1955,12 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder) g4x_enable_hdmi(encoder); vlv_wait_port_ready(dev_priv, dport, 0x0); + + /* Second common lane will stay alive on its own now */ + if (dport->release_cl2_override) { + chv_phy_powergate_ch(dev_priv, DPIO_PHY0, DPIO_CH1, false); + dport->release_cl2_override = false; + } } static void intel_hdmi_destroy(struct drm_connector *connector) @@ -1974,7 +2036,14 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, intel_hdmi->ddc_bus = GMBUS_PIN_1_BXT; else intel_hdmi->ddc_bus = GMBUS_PIN_DPB; - intel_encoder->hpd_pin = HPD_PORT_B; + /* + * On BXT A0/A1, sw needs to activate DDIA HPD logic and + * interrupts to check the external panel connection. + */ + if (IS_BROXTON(dev_priv) && (INTEL_REVID(dev) < BXT_REVID_B0)) + intel_encoder->hpd_pin = HPD_PORT_A; + else + intel_encoder->hpd_pin = HPD_PORT_B; break; case PORT_C: if (IS_BROXTON(dev_priv)) @@ -2097,6 +2166,7 @@ void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port) intel_encoder->pre_enable = chv_hdmi_pre_enable; intel_encoder->enable = vlv_enable_hdmi; intel_encoder->post_disable = chv_hdmi_post_disable; + intel_encoder->post_pll_disable = chv_hdmi_post_pll_disable; } else if (IS_VALLEYVIEW(dev)) { intel_encoder->pre_pll_enable = vlv_hdmi_pre_pll_enable; intel_encoder->pre_enable = vlv_hdmi_pre_enable; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 72e0edd7bbde..40cbba4ea4ba 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -196,13 +196,21 @@ reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \ } +#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \ + reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \ + reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \ +} + enum { ADVANCED_CONTEXT = 0, - LEGACY_CONTEXT, + LEGACY_32B_CONTEXT, ADVANCED_AD_CONTEXT, LEGACY_64B_CONTEXT }; -#define GEN8_CTX_MODE_SHIFT 3 +#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 +#define GEN8_CTX_ADDRESSING_MODE(dev) (USES_FULL_48BIT_PPGTT(dev) ?\ + LEGACY_64B_CONTEXT :\ + LEGACY_32B_CONTEXT) enum { FAULT_AND_HANG = 0, FAULT_AND_HALT, /* Debug only */ @@ -228,6 +236,12 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists { WARN_ON(i915.enable_ppgtt == -1); + /* On platforms with execlist available, vGPU will only + * support execlist mode, no ring buffer mode. + */ + if (HAS_LOGICAL_RING_CONTEXTS(dev) && intel_vgpu_active(dev)) + return 1; + if (INTEL_INFO(dev)->gen >= 9) return 1; @@ -255,25 +269,27 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists */ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) { - u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj); + u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) + + LRC_PPHWSP_PN * PAGE_SIZE; /* LRCA is required to be 4K aligned so the more significant 20 bits * are globally unique */ return lrca >> 12; } -static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request *rq) +uint64_t intel_lr_context_descriptor(struct intel_context *ctx, + struct intel_engine_cs *ring) { - struct intel_engine_cs *ring = rq->ring; struct drm_device *dev = ring->dev; - struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; uint64_t desc; - uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj); + uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) + + LRC_PPHWSP_PN * PAGE_SIZE; WARN_ON(lrca & 0xFFFFFFFF00000FFFULL); desc = GEN8_CTX_VALID; - desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT; + desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT; if (IS_GEN8(ctx_obj->base.dev)) desc |= GEN8_CTX_L3LLC_COHERENT; desc |= GEN8_CTX_PRIVILEGE; @@ -304,13 +320,13 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0, uint64_t desc[2]; if (rq1) { - desc[1] = execlists_ctx_descriptor(rq1); + desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->ring); rq1->elsp_submitted++; } else { desc[1] = 0; } - desc[0] = execlists_ctx_descriptor(rq0); + desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->ring); rq0->elsp_submitted++; /* You must always write both descriptors in the order below. */ @@ -342,16 +358,18 @@ static int execlists_update_context(struct drm_i915_gem_request *rq) WARN_ON(!i915_gem_obj_is_pinned(ctx_obj)); WARN_ON(!i915_gem_obj_is_pinned(rb_obj)); - page = i915_gem_object_get_page(ctx_obj, 1); + page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); reg_state = kmap_atomic(page); reg_state[CTX_RING_TAIL+1] = rq->tail; reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj); - /* True PPGTT with dynamic page allocation: update PDP registers and - * point the unallocated PDPs to the scratch page - */ - if (ppgtt) { + if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + /* True 32b PPGTT with dynamic page allocation: update PDP + * registers and point the unallocated PDPs to scratch page. + * PML4 is allocated during ppgtt init, so this is not needed + * in 48-bit mode. + */ ASSIGN_CTX_PDP(ppgtt, reg_state, 3); ASSIGN_CTX_PDP(ppgtt, reg_state, 2); ASSIGN_CTX_PDP(ppgtt, reg_state, 1); @@ -538,8 +556,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) i915_gem_request_reference(request); - request->tail = request->ringbuf->tail; - spin_lock_irq(&ring->execlist_lock); list_for_each_entry(cursor, &ring->execlist_queue, execlist_link) @@ -692,13 +708,19 @@ static void intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) { struct intel_engine_cs *ring = request->ring; + struct drm_i915_private *dev_priv = request->i915; intel_logical_ring_advance(request->ringbuf); + request->tail = request->ringbuf->tail; + if (intel_ring_stopped(ring)) return; - execlists_context_queue(request); + if (dev_priv->guc.execbuf_client) + i915_guc_submit(dev_priv->guc.execbuf_client, request); + else + execlists_context_queue(request); } static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf) @@ -988,6 +1010,7 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) static int intel_lr_context_pin(struct drm_i915_gem_request *rq) { + struct drm_i915_private *dev_priv = rq->i915; struct intel_engine_cs *ring = rq->ring; struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; struct intel_ringbuffer *ringbuf = rq->ringbuf; @@ -995,8 +1018,8 @@ static int intel_lr_context_pin(struct drm_i915_gem_request *rq) WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); if (rq->ctx->engine[ring->id].pin_count++ == 0) { - ret = i915_gem_obj_ggtt_pin(ctx_obj, - GEN8_LR_CONTEXT_ALIGN, 0); + ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP); if (ret) goto reset_pin_count; @@ -1005,6 +1028,10 @@ static int intel_lr_context_pin(struct drm_i915_gem_request *rq) goto unpin_ctx_obj; ctx_obj->dirty = true; + + /* Invalidate GuC TLB. */ + if (i915.enable_guc_submission) + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); } return ret; @@ -1111,7 +1138,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring, if (IS_SKYLAKE(ring->dev) && INTEL_REVID(ring->dev) <= SKL_REVID_E0) l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS; - wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8(1) | + wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit(batch, index, GEN8_L3SQCREG4); wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256); @@ -1129,7 +1156,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring, wa_ctx_emit(batch, index, 0); wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8(1) | + wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit(batch, index, GEN8_L3SQCREG4); wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256); @@ -1517,12 +1544,16 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * Ideally, we should set Force PD Restore in ctx descriptor, * but we can't. Force Restore would be a second option, but * it is unsafe in case of lite-restore (because the ctx is - * not idle). */ + * not idle). PML4 is allocated during ppgtt init so this is + * not needed in 48-bit.*/ if (req->ctx->ppgtt && (intel_ring_flag(req->ring) & req->ctx->ppgtt->pd_dirty_rings)) { - ret = intel_logical_ring_emit_pdps(req); - if (ret) - return ret; + if (!USES_FULL_48BIT_PPGTT(req->i915) && + !intel_vgpu_active(req->i915->dev)) { + ret = intel_logical_ring_emit_pdps(req); + if (ret) + return ret; + } req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring); } @@ -1688,6 +1719,34 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); } +static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) +{ + + /* + * On BXT A steppings there is a HW coherency issue whereby the + * MI_STORE_DATA_IMM storing the completed request's seqno + * occasionally doesn't invalidate the CPU cache. Work around this by + * clflushing the corresponding cacheline whenever the caller wants + * the coherency to be guaranteed. Note that this cacheline is known + * to be clean at this point, since we only write it in + * bxt_a_set_seqno(), where we also do a clflush after the write. So + * this clflush in practice becomes an invalidate operation. + */ + + if (!lazy_coherency) + intel_flush_status_page(ring, I915_GEM_HWS_INDEX); + + return intel_read_status_page(ring, I915_GEM_HWS_INDEX); +} + +static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno) +{ + intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); + + /* See bxt_a_get_seqno() explaining the reason for the clflush. */ + intel_flush_status_page(ring, I915_GEM_HWS_INDEX); +} + static int gen8_emit_request(struct drm_i915_gem_request *request) { struct intel_ringbuffer *ringbuf = request->ringbuf; @@ -1857,8 +1916,13 @@ static int logical_render_ring_init(struct drm_device *dev) ring->init_hw = gen8_init_render_ring; ring->init_context = gen8_init_rcs_context; ring->cleanup = intel_fini_pipe_control; - ring->get_seqno = gen8_get_seqno; - ring->set_seqno = gen8_set_seqno; + if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) { + ring->get_seqno = bxt_a_get_seqno; + ring->set_seqno = bxt_a_set_seqno; + } else { + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; + } ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush_render; ring->irq_get = gen8_logical_ring_get_irq; @@ -1904,8 +1968,13 @@ static int logical_bsd_ring_init(struct drm_device *dev) GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; ring->init_hw = gen8_init_common_ring; - ring->get_seqno = gen8_get_seqno; - ring->set_seqno = gen8_set_seqno; + if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) { + ring->get_seqno = bxt_a_get_seqno; + ring->set_seqno = bxt_a_set_seqno; + } else { + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; + } ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; @@ -1954,8 +2023,13 @@ static int logical_blt_ring_init(struct drm_device *dev) GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT; ring->init_hw = gen8_init_common_ring; - ring->get_seqno = gen8_get_seqno; - ring->set_seqno = gen8_set_seqno; + if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) { + ring->get_seqno = bxt_a_get_seqno; + ring->set_seqno = bxt_a_set_seqno; + } else { + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; + } ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; @@ -1979,8 +2053,13 @@ static int logical_vebox_ring_init(struct drm_device *dev) GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT; ring->init_hw = gen8_init_common_ring; - ring->get_seqno = gen8_get_seqno; - ring->set_seqno = gen8_set_seqno; + if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) { + ring->get_seqno = bxt_a_get_seqno; + ring->set_seqno = bxt_a_set_seqno; + } else { + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; + } ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; @@ -2126,7 +2205,7 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ - page = i915_gem_object_get_page(ctx_obj, 1); + page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); reg_state = kmap_atomic(page); /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM @@ -2203,13 +2282,24 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); - /* With dynamic page allocation, PDPs may not be allocated at this point, - * Point the unallocated PDPs to the scratch page - */ - ASSIGN_CTX_PDP(ppgtt, reg_state, 3); - ASSIGN_CTX_PDP(ppgtt, reg_state, 2); - ASSIGN_CTX_PDP(ppgtt, reg_state, 1); - ASSIGN_CTX_PDP(ppgtt, reg_state, 0); + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + /* 64b PPGTT (48bit canonical) + * PDP0_DESCRIPTOR contains the base address to PML4 and + * other PDP Descriptors are ignored. + */ + ASSIGN_CTX_PML4(ppgtt, reg_state); + } else { + /* 32b PPGTT + * PDP*_DESCRIPTOR contains the base address of space supported. + * With dynamic page allocation, PDPs may not be allocated at + * this point. Point the unallocated PDPs to the scratch page + */ + ASSIGN_CTX_PDP(ppgtt, reg_state, 3); + ASSIGN_CTX_PDP(ppgtt, reg_state, 2); + ASSIGN_CTX_PDP(ppgtt, reg_state, 1); + ASSIGN_CTX_PDP(ppgtt, reg_state, 0); + } + if (ring->id == RCS) { reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE; @@ -2285,12 +2375,13 @@ static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring, struct drm_i915_gem_object *default_ctx_obj) { struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct page *page; - /* The status page is offset 0 from the default context object - * in LRC mode. */ - ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj); - ring->status_page.page_addr = - kmap(sg_page(default_ctx_obj->pages->sgl)); + /* The HWSP is part of the default context object in LRC mode. */ + ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj) + + LRC_PPHWSP_PN * PAGE_SIZE; + page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN); + ring->status_page.page_addr = kmap(page); ring->status_page.obj = default_ctx_obj; I915_WRITE(RING_HWS_PGA(ring->mmio_base), @@ -2316,6 +2407,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, { const bool is_global_default_ctx = (ctx == ring->default_context); struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *ctx_obj; uint32_t context_size; struct intel_ringbuffer *ringbuf; @@ -2326,6 +2418,9 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, context_size = round_up(get_lr_context_size(ring), 4096); + /* One extra page as the sharing data between driver and GuC */ + context_size += PAGE_SIZE * LRC_PPHWSP_PN; + ctx_obj = i915_gem_alloc_object(dev, context_size); if (!ctx_obj) { DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n"); @@ -2333,13 +2428,18 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, } if (is_global_default_ctx) { - ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0); + ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP); if (ret) { DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret); drm_gem_object_unreference(&ctx_obj->base); return ret; } + + /* Invalidate GuC TLB. */ + if (i915.enable_guc_submission) + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); } ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); @@ -2352,7 +2452,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, ringbuf->ring = ring; - ringbuf->size = 32 * PAGE_SIZE; + ringbuf->size = 4 * PAGE_SIZE; ringbuf->effective_size = ringbuf->size; ringbuf->head = 0; ringbuf->tail = 0; @@ -2452,7 +2552,7 @@ void intel_lr_context_reset(struct drm_device *dev, WARN(1, "Failed get_pages for context obj\n"); continue; } - page = i915_gem_object_get_page(ctx_obj, 1); + page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); reg_state = kmap_atomic(page); reg_state[CTX_RING_HEAD+1] = 0; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 64f89f9982a2..4cc54b371afd 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -68,12 +68,20 @@ static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, } /* Logical Ring Contexts */ + +/* One extra page is added before LRC for GuC as shared data */ +#define LRC_GUCSHR_PN (0) +#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + 1) +#define LRC_STATE_PN (LRC_PPHWSP_PN + 1) + void intel_lr_context_free(struct intel_context *ctx); int intel_lr_context_deferred_create(struct intel_context *ctx, struct intel_engine_cs *ring); void intel_lr_context_unpin(struct drm_i915_gem_request *req); void intel_lr_context_reset(struct drm_device *dev, struct intel_context *ctx); +uint64_t intel_lr_context_descriptor(struct intel_context *ctx, + struct intel_engine_cs *ring); /* Execlists */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 881b5d13592e..0794dc84ff01 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -289,11 +289,14 @@ intel_lvds_mode_valid(struct drm_connector *connector, { struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode; + int max_pixclk = to_i915(connector->dev)->max_dotclk_freq; if (mode->hdisplay > fixed_mode->hdisplay) return MODE_PANEL; if (mode->vdisplay > fixed_mode->vdisplay) return MODE_PANEL; + if (fixed_mode->clock > max_pixclk) + return MODE_CLOCK_HIGH; return MODE_OK; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2e85fda94963..95b0b4b55fa6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -377,6 +377,13 @@ intel_ring_sync_index(struct intel_engine_cs *ring, return idx; } +static inline void +intel_flush_status_page(struct intel_engine_cs *ring, int reg) +{ + drm_clflush_virt_range(&ring->status_page.page_addr[reg], + sizeof(uint32_t)); +} + static inline u32 intel_read_status_page(struct intel_engine_cs *ring, int reg) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index af7fdb3bd663..3f682a1a08ce 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -855,6 +855,25 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, static void vlv_display_power_well_init(struct drm_i915_private *dev_priv) { + enum pipe pipe; + + /* + * Enable the CRI clock source so we can get at the + * display and the reference clock for VGA + * hotplug / manual detection. Supposedly DSI also + * needs the ref clock up and running. + * + * CHV DPLL B/C have some issues if VGA mode is enabled. + */ + for_each_pipe(dev_priv->dev, pipe) { + u32 val = I915_READ(DPLL(pipe)); + + val |= DPLL_REF_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS; + if (pipe != PIPE_A) + val |= DPLL_INTEGRATED_CRI_CLK_VLV; + + I915_WRITE(DPLL(pipe), val); + } spin_lock_irq(&dev_priv->irq_lock); valleyview_enable_display_irqs(dev_priv); @@ -906,13 +925,7 @@ static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, { WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC); - /* - * Enable the CRI clock source so we can get at the - * display and the reference clock for VGA - * hotplug / manual detection. - */ - I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | DPLL_VGA_MODE_DIS | - DPLL_REF_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); + /* since ref/cri clock was enabled */ udelay(1); /* >10ns for cmnreset, >0ns for sidereset */ vlv_set_power_well(dev_priv, power_well, true); @@ -947,30 +960,126 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, false); } +#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1) + +static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, + int power_well_id) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + int i; + + for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { + if (power_well->data == power_well_id) + return power_well; + } + + return NULL; +} + +#define BITS_SET(val, bits) (((val) & (bits)) == (bits)) + +static void assert_chv_phy_status(struct drm_i915_private *dev_priv) +{ + struct i915_power_well *cmn_bc = + lookup_power_well(dev_priv, PUNIT_POWER_WELL_DPIO_CMN_BC); + struct i915_power_well *cmn_d = + lookup_power_well(dev_priv, PUNIT_POWER_WELL_DPIO_CMN_D); + u32 phy_control = dev_priv->chv_phy_control; + u32 phy_status = 0; + u32 tmp; + + if (cmn_bc->ops->is_enabled(dev_priv, cmn_bc)) { + phy_status |= PHY_POWERGOOD(DPIO_PHY0); + + /* this assumes override is only used to enable lanes */ + if ((phy_control & PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH0)) == 0) + phy_control |= PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH0); + + if ((phy_control & PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH1)) == 0) + phy_control |= PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH1); + + /* CL1 is on whenever anything is on in either channel */ + if (BITS_SET(phy_control, + PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH0) | + PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH1))) + phy_status |= PHY_STATUS_CMN_LDO(DPIO_PHY0, DPIO_CH0); + + /* + * The DPLLB check accounts for the pipe B + port A usage + * with CL2 powered up but all the lanes in the second channel + * powered down. + */ + if (BITS_SET(phy_control, + PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH1)) && + (I915_READ(DPLL(PIPE_B)) & DPLL_VCO_ENABLE) == 0) + phy_status |= PHY_STATUS_CMN_LDO(DPIO_PHY0, DPIO_CH1); + + if (BITS_SET(phy_control, + PHY_CH_POWER_DOWN_OVRD(0x3, DPIO_PHY0, DPIO_CH0))) + phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH0, 0); + if (BITS_SET(phy_control, + PHY_CH_POWER_DOWN_OVRD(0xc, DPIO_PHY0, DPIO_CH0))) + phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH0, 1); + + if (BITS_SET(phy_control, + PHY_CH_POWER_DOWN_OVRD(0x3, DPIO_PHY0, DPIO_CH1))) + phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH1, 0); + if (BITS_SET(phy_control, + PHY_CH_POWER_DOWN_OVRD(0xc, DPIO_PHY0, DPIO_CH1))) + phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH1, 1); + } + + if (cmn_d->ops->is_enabled(dev_priv, cmn_d)) { + phy_status |= PHY_POWERGOOD(DPIO_PHY1); + + /* this assumes override is only used to enable lanes */ + if ((phy_control & PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY1, DPIO_CH0)) == 0) + phy_control |= PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY1, DPIO_CH0); + + if (BITS_SET(phy_control, + PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY1, DPIO_CH0))) + phy_status |= PHY_STATUS_CMN_LDO(DPIO_PHY1, DPIO_CH0); + + if (BITS_SET(phy_control, + PHY_CH_POWER_DOWN_OVRD(0x3, DPIO_PHY1, DPIO_CH0))) + phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY1, DPIO_CH0, 0); + if (BITS_SET(phy_control, + PHY_CH_POWER_DOWN_OVRD(0xc, DPIO_PHY1, DPIO_CH0))) + phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY1, DPIO_CH0, 1); + } + + /* + * The PHY may be busy with some initial calibration and whatnot, + * so the power state can take a while to actually change. + */ + if (wait_for((tmp = I915_READ(DISPLAY_PHY_STATUS)) == phy_status, 10)) + WARN(phy_status != tmp, + "Unexpected PHY_STATUS 0x%08x, expected 0x%08x (PHY_CONTROL=0x%08x)\n", + tmp, phy_status, dev_priv->chv_phy_control); +} + +#undef BITS_SET + static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { enum dpio_phy phy; + enum pipe pipe; + uint32_t tmp; WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC && power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D); - /* - * Enable the CRI clock source so we can get at the - * display and the reference clock for VGA - * hotplug / manual detection. - */ if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { + pipe = PIPE_A; phy = DPIO_PHY0; - I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | DPLL_VGA_MODE_DIS | - DPLL_REF_CLK_ENABLE_VLV); - I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | DPLL_VGA_MODE_DIS | - DPLL_REF_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); } else { + pipe = PIPE_C; phy = DPIO_PHY1; - I915_WRITE(DPLL(PIPE_C), I915_READ(DPLL(PIPE_C)) | DPLL_VGA_MODE_DIS | - DPLL_REF_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); } + + /* since ref/cri clock was enabled */ udelay(1); /* >10ns for cmnreset, >0ns for sidereset */ vlv_set_power_well(dev_priv, power_well, true); @@ -978,8 +1087,38 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, if (wait_for(I915_READ(DISPLAY_PHY_STATUS) & PHY_POWERGOOD(phy), 1)) DRM_ERROR("Display PHY %d is not power up\n", phy); + mutex_lock(&dev_priv->sb_lock); + + /* Enable dynamic power down */ + tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW28); + tmp |= DPIO_DYNPWRDOWNEN_CH0 | DPIO_CL1POWERDOWNEN | + DPIO_SUS_CLK_CONFIG_GATE_CLKREQ; + vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW28, tmp); + + if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { + tmp = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW6_CH1); + tmp |= DPIO_DYNPWRDOWNEN_CH1; + vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW6_CH1, tmp); + } else { + /* + * Force the non-existing CL2 off. BXT does this + * too, so maybe it saves some power even though + * CL2 doesn't exist? + */ + tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW30); + tmp |= DPIO_CL2_LDOFUSE_PWRENB; + vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, tmp); + } + + mutex_unlock(&dev_priv->sb_lock); + dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(phy); I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control); + + DRM_DEBUG_KMS("Enabled DPIO PHY%d (PHY_CONTROL=0x%08x)\n", + phy, dev_priv->chv_phy_control); + + assert_chv_phy_status(dev_priv); } static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, @@ -1003,6 +1142,124 @@ static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control); vlv_set_power_well(dev_priv, power_well, false); + + DRM_DEBUG_KMS("Disabled DPIO PHY%d (PHY_CONTROL=0x%08x)\n", + phy, dev_priv->chv_phy_control); + + assert_chv_phy_status(dev_priv); +} + +static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpio_phy phy, + enum dpio_channel ch, bool override, unsigned int mask) +{ + enum pipe pipe = phy == DPIO_PHY0 ? PIPE_A : PIPE_C; + u32 reg, val, expected, actual; + + if (ch == DPIO_CH0) + reg = _CHV_CMN_DW0_CH0; + else + reg = _CHV_CMN_DW6_CH1; + + mutex_lock(&dev_priv->sb_lock); + val = vlv_dpio_read(dev_priv, pipe, reg); + mutex_unlock(&dev_priv->sb_lock); + + /* + * This assumes !override is only used when the port is disabled. + * All lanes should power down even without the override when + * the port is disabled. + */ + if (!override || mask == 0xf) { + expected = DPIO_ALLDL_POWERDOWN | DPIO_ANYDL_POWERDOWN; + /* + * If CH1 common lane is not active anymore + * (eg. for pipe B DPLL) the entire channel will + * shut down, which causes the common lane registers + * to read as 0. That means we can't actually check + * the lane power down status bits, but as the entire + * register reads as 0 it's a good indication that the + * channel is indeed entirely powered down. + */ + if (ch == DPIO_CH1 && val == 0) + expected = 0; + } else if (mask != 0x0) { + expected = DPIO_ANYDL_POWERDOWN; + } else { + expected = 0; + } + + if (ch == DPIO_CH0) + actual = val >> DPIO_ANYDL_POWERDOWN_SHIFT_CH0; + else + actual = val >> DPIO_ANYDL_POWERDOWN_SHIFT_CH1; + actual &= DPIO_ALLDL_POWERDOWN | DPIO_ANYDL_POWERDOWN; + + WARN(actual != expected, + "Unexpected DPIO lane power down: all %d, any %d. Expected: all %d, any %d. (0x%x = 0x%08x)\n", + !!(actual & DPIO_ALLDL_POWERDOWN), !!(actual & DPIO_ANYDL_POWERDOWN), + !!(expected & DPIO_ALLDL_POWERDOWN), !!(expected & DPIO_ANYDL_POWERDOWN), + reg, val); +} + +bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy, + enum dpio_channel ch, bool override) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + bool was_override; + + mutex_lock(&power_domains->lock); + + was_override = dev_priv->chv_phy_control & PHY_CH_POWER_DOWN_OVRD_EN(phy, ch); + + if (override == was_override) + goto out; + + if (override) + dev_priv->chv_phy_control |= PHY_CH_POWER_DOWN_OVRD_EN(phy, ch); + else + dev_priv->chv_phy_control &= ~PHY_CH_POWER_DOWN_OVRD_EN(phy, ch); + + I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control); + + DRM_DEBUG_KMS("Power gating DPIO PHY%d CH%d (DPIO_PHY_CONTROL=0x%08x)\n", + phy, ch, dev_priv->chv_phy_control); + + assert_chv_phy_status(dev_priv); + +out: + mutex_unlock(&power_domains->lock); + + return was_override; +} + +void chv_phy_powergate_lanes(struct intel_encoder *encoder, + bool override, unsigned int mask) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct i915_power_domains *power_domains = &dev_priv->power_domains; + enum dpio_phy phy = vlv_dport_to_phy(enc_to_dig_port(&encoder->base)); + enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base)); + + mutex_lock(&power_domains->lock); + + dev_priv->chv_phy_control &= ~PHY_CH_POWER_DOWN_OVRD(0xf, phy, ch); + dev_priv->chv_phy_control |= PHY_CH_POWER_DOWN_OVRD(mask, phy, ch); + + if (override) + dev_priv->chv_phy_control |= PHY_CH_POWER_DOWN_OVRD_EN(phy, ch); + else + dev_priv->chv_phy_control &= ~PHY_CH_POWER_DOWN_OVRD_EN(phy, ch); + + I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control); + + DRM_DEBUG_KMS("Power gating DPIO PHY%d CH%d lanes 0x%x (PHY_CONTROL=0x%08x)\n", + phy, ch, mask, dev_priv->chv_phy_control); + + assert_chv_phy_status(dev_priv); + + assert_chv_phy_powergate(dev_priv, phy, ch, override, mask); + + mutex_unlock(&power_domains->lock); } static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, @@ -1166,8 +1423,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, intel_runtime_pm_put(dev_priv); } -#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1) - #define HSW_ALWAYS_ON_POWER_DOMAINS ( \ BIT(POWER_DOMAIN_PIPE_A) | \ BIT(POWER_DOMAIN_TRANSCODER_EDP) | \ @@ -1429,21 +1684,6 @@ static struct i915_power_well chv_power_wells[] = { }, }; -static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, - int power_well_id) -{ - struct i915_power_domains *power_domains = &dev_priv->power_domains; - struct i915_power_well *power_well; - int i; - - for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { - if (power_well->data == power_well_id) - return power_well; - } - - return NULL; -} - bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, int power_well_id) { @@ -1629,19 +1869,72 @@ static void chv_phy_control_init(struct drm_i915_private *dev_priv) * DISPLAY_PHY_CONTROL can get corrupted if read. As a * workaround never ever read DISPLAY_PHY_CONTROL, and * instead maintain a shadow copy ourselves. Use the actual - * power well state to reconstruct the expected initial - * value. + * power well state and lane status to reconstruct the + * expected initial value. */ dev_priv->chv_phy_control = PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY0) | PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY1) | - PHY_CH_POWER_MODE(PHY_CH_SU_PSR, DPIO_PHY0, DPIO_CH0) | - PHY_CH_POWER_MODE(PHY_CH_SU_PSR, DPIO_PHY0, DPIO_CH1) | - PHY_CH_POWER_MODE(PHY_CH_SU_PSR, DPIO_PHY1, DPIO_CH0); - if (cmn_bc->ops->is_enabled(dev_priv, cmn_bc)) + PHY_CH_POWER_MODE(PHY_CH_DEEP_PSR, DPIO_PHY0, DPIO_CH0) | + PHY_CH_POWER_MODE(PHY_CH_DEEP_PSR, DPIO_PHY0, DPIO_CH1) | + PHY_CH_POWER_MODE(PHY_CH_DEEP_PSR, DPIO_PHY1, DPIO_CH0); + + /* + * If all lanes are disabled we leave the override disabled + * with all power down bits cleared to match the state we + * would use after disabling the port. Otherwise enable the + * override and set the lane powerdown bits accding to the + * current lane status. + */ + if (cmn_bc->ops->is_enabled(dev_priv, cmn_bc)) { + uint32_t status = I915_READ(DPLL(PIPE_A)); + unsigned int mask; + + mask = status & DPLL_PORTB_READY_MASK; + if (mask == 0xf) + mask = 0x0; + else + dev_priv->chv_phy_control |= + PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH0); + + dev_priv->chv_phy_control |= + PHY_CH_POWER_DOWN_OVRD(mask, DPIO_PHY0, DPIO_CH0); + + mask = (status & DPLL_PORTC_READY_MASK) >> 4; + if (mask == 0xf) + mask = 0x0; + else + dev_priv->chv_phy_control |= + PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH1); + + dev_priv->chv_phy_control |= + PHY_CH_POWER_DOWN_OVRD(mask, DPIO_PHY0, DPIO_CH1); + dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(DPIO_PHY0); - if (cmn_d->ops->is_enabled(dev_priv, cmn_d)) + } + + if (cmn_d->ops->is_enabled(dev_priv, cmn_d)) { + uint32_t status = I915_READ(DPIO_PHY_STATUS); + unsigned int mask; + + mask = status & DPLL_PORTD_READY_MASK; + + if (mask == 0xf) + mask = 0x0; + else + dev_priv->chv_phy_control |= + PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY1, DPIO_CH0); + + dev_priv->chv_phy_control |= + PHY_CH_POWER_DOWN_OVRD(mask, DPIO_PHY1, DPIO_CH0); + dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(DPIO_PHY1); + } + + I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control); + + DRM_DEBUG_KMS("Initial PHY_CONTROL=0x%08x\n", + dev_priv->chv_phy_control); } static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv) @@ -1687,7 +1980,9 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv) power_domains->initializing = true; if (IS_CHERRYVIEW(dev)) { + mutex_lock(&power_domains->lock); chv_phy_control_init(dev_priv); + mutex_unlock(&power_domains->lock); } else if (IS_VALLEYVIEW(dev)) { mutex_lock(&power_domains->lock); vlv_cmnlane_wa(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index c98098e884cc..ca3dd7c682bd 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -53,7 +53,7 @@ #define IS_DIGITAL(c) (c->output_flag & (SDVO_TMDS_MASK | SDVO_LVDS_MASK)) -static const char *tv_format_names[] = { +static const char * const tv_format_names[] = { "NTSC_M" , "NTSC_J" , "NTSC_443", "PAL_B" , "PAL_D" , "PAL_G" , "PAL_H" , "PAL_I" , "PAL_M" , @@ -63,7 +63,7 @@ static const char *tv_format_names[] = { "SECAM_60" }; -#define TV_FORMAT_NUM (sizeof(tv_format_names) / sizeof(*tv_format_names)) +#define TV_FORMAT_NUM ARRAY_SIZE(tv_format_names) struct intel_sdvo { struct intel_encoder base; @@ -452,7 +452,7 @@ static void intel_sdvo_debug_write(struct intel_sdvo *intel_sdvo, u8 cmd, DRM_DEBUG_KMS("%s: W: %02X %s\n", SDVO_NAME(intel_sdvo), cmd, buffer); } -static const char *cmd_status_names[] = { +static const char * const cmd_status_names[] = { "Power on", "Success", "Not supported", diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 9d8af2f8a875..ca7e26430e66 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -76,7 +76,7 @@ static int usecs_to_scanlines(const struct drm_display_mode *mode, int usecs) * avoid random delays. The value written to @start_vbl_count should be * supplied to intel_pipe_update_end() for error checking. */ -void intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl_count) +void intel_pipe_update_start(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; const struct drm_display_mode *mode = &crtc->config->base.adjusted_mode; @@ -95,7 +95,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl_count) max = vblank_start - 1; local_irq_disable(); - *start_vbl_count = 0; + crtc->start_vbl_count = 0; if (min <= 0 || max <= 0) return; @@ -134,9 +134,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl_count) drm_crtc_vblank_put(&crtc->base); - *start_vbl_count = dev->driver->get_vblank_counter(dev, pipe); + crtc->start_vbl_time = ktime_get(); + crtc->start_vbl_count = dev->driver->get_vblank_counter(dev, pipe); - trace_i915_pipe_update_vblank_evaded(crtc, min, max, *start_vbl_count); + trace_i915_pipe_update_vblank_evaded(crtc, min, max, + crtc->start_vbl_count); } /** @@ -148,19 +150,21 @@ void intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl_count) * re-enables interrupts and verifies the update was actually completed * before a vblank using the value of @start_vbl_count. */ -void intel_pipe_update_end(struct intel_crtc *crtc, u32 start_vbl_count) +void intel_pipe_update_end(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; enum pipe pipe = crtc->pipe; u32 end_vbl_count = dev->driver->get_vblank_counter(dev, pipe); + ktime_t end_vbl_time = ktime_get(); trace_i915_pipe_update_end(crtc, end_vbl_count); local_irq_enable(); - if (start_vbl_count && start_vbl_count != end_vbl_count) - DRM_ERROR("Atomic update failure on pipe %c (start=%u end=%u)\n", - pipe_name(pipe), start_vbl_count, end_vbl_count); + if (crtc->start_vbl_count && crtc->start_vbl_count != end_vbl_count) + DRM_ERROR("Atomic update failure on pipe %c (start=%u end=%u) time %lld us\n", + pipe_name(pipe), crtc->start_vbl_count, end_vbl_count, + ktime_us_delta(end_vbl_time, crtc->start_vbl_time)); } static void diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 0568ae6ec9dd..cbe39dcc7bc0 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1291,7 +1291,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector) return; - for (i = 0; i < sizeof(tv_modes) / sizeof(*tv_modes); i++) { + for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { tv_mode = tv_modes + i; if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9d3c2e420d2b..dec20d60daf0 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -52,8 +52,7 @@ static const char * const forcewake_domain_names[] = { const char * intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id) { - BUILD_BUG_ON((sizeof(forcewake_domain_names)/sizeof(const char *)) != - FW_DOMAIN_ID_COUNT); + BUILD_BUG_ON(ARRAY_SIZE(forcewake_domain_names) != FW_DOMAIN_ID_COUNT); if (id >= 0 && id < FW_DOMAIN_ID_COUNT) return forcewake_domain_names[id]; @@ -770,6 +769,7 @@ static u##x \ gen9_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_READ_HEADER(x); \ + hsw_unclaimed_reg_debug(dev_priv, reg, true, true); \ if (!SKL_NEEDS_FORCE_WAKE((dev_priv), (reg))) \ fw_engine = 0; \ else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(reg)) \ @@ -783,6 +783,7 @@ gen9_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \ if (fw_engine) \ __force_wake_get(dev_priv, fw_engine); \ val = __raw_i915_read##x(dev_priv, reg); \ + hsw_unclaimed_reg_debug(dev_priv, reg, true, false); \ GEN6_READ_FOOTER; \ } @@ -983,6 +984,7 @@ gen9_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, \ bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_WRITE_HEADER; \ + hsw_unclaimed_reg_debug(dev_priv, reg, false, true); \ if (!SKL_NEEDS_FORCE_WAKE((dev_priv), (reg)) || \ is_gen9_shadowed(dev_priv, reg)) \ fw_engine = 0; \ @@ -997,6 +999,8 @@ gen9_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, \ if (fw_engine) \ __force_wake_get(dev_priv, fw_engine); \ __raw_i915_write##x(dev_priv, reg, val); \ + hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \ + hsw_unclaimed_reg_detect(dev_priv); \ GEN6_WRITE_FOOTER; \ } diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 499e9f625aef..8c52d0ef1fc9 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -634,6 +634,13 @@ drm_dp_enhanced_frame_cap(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) (dpcd[DP_MAX_LANE_COUNT] & DP_ENHANCED_FRAME_CAP); } +static inline bool +drm_dp_tps3_supported(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) +{ + return dpcd[DP_DPCD_REV] >= 0x12 && + dpcd[DP_MAX_LANE_COUNT] & DP_TPS3_SUPPORTED; +} + /* * DisplayPort AUX channel */ |