diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
87 files changed, 14857 insertions, 12103 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 684fc1cd08fa..a998c2bce70a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -3,15 +3,20 @@ # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror +subdir-ccflags-y += \ + $(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA) # Please keep these build lists sorted! # core driver code i915-y := i915_drv.o \ i915_irq.o \ + i915_memcpy.o \ + i915_mm.o \ i915_params.o \ i915_pci.o \ i915_suspend.o \ + i915_sw_fence.o \ i915_sysfs.o \ intel_csr.o \ intel_device_info.o \ @@ -25,7 +30,6 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o i915-y += i915_cmd_parser.o \ i915_gem_batch_pool.o \ i915_gem_context.o \ - i915_gem_debug.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ i915_gem_execbuffer.o \ @@ -33,6 +37,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_gtt.o \ i915_gem.o \ i915_gem_render_state.o \ + i915_gem_request.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ @@ -40,6 +45,7 @@ i915-y += i915_cmd_parser.o \ i915_gpu_error.o \ i915_trace_points.o \ intel_breadcrumbs.o \ + intel_engine_cs.o \ intel_lrc.o \ intel_mocs.o \ intel_ringbuffer.o \ @@ -109,6 +115,6 @@ i915-y += intel_gvt.o include $(src)/gvt/Makefile endif -obj-$(CONFIG_DRM_I915) += i915.o +obj-$(CONFIG_DRM_I915) += i915.o CFLAGS_i915_trace_points.o := -I$(src) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index b0fd6a7b0603..70980f82a15b 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -62,23 +62,23 @@ * The parser always rejects such commands. * * The majority of the problematic commands fall in the MI_* range, with only a - * few specific commands on each ring (e.g. PIPE_CONTROL and MI_FLUSH_DW). + * few specific commands on each engine (e.g. PIPE_CONTROL and MI_FLUSH_DW). * * Implementation: - * Each ring maintains tables of commands and registers which the parser uses in - * scanning batch buffers submitted to that ring. + * Each engine maintains tables of commands and registers which the parser + * uses in scanning batch buffers submitted to that engine. * * Since the set of commands that the parser must check for is significantly * smaller than the number of commands supported, the parser tables contain only * those commands required by the parser. This generally works because command * opcode ranges have standard command length encodings. So for commands that * the parser does not need to check, it can easily skip them. This is - * implemented via a per-ring length decoding vfunc. + * implemented via a per-engine length decoding vfunc. * * Unfortunately, there are a number of commands that do not follow the standard * length encoding for their opcode range, primarily amongst the MI_* commands. * To handle this, the parser provides a way to define explicit "skip" entries - * in the per-ring command tables. + * in the per-engine command tables. * * Other command table entries map fairly directly to high level categories * mentioned above: rejected, master-only, register whitelist. The parser @@ -86,24 +86,25 @@ * general bitmasking mechanism. */ -#define STD_MI_OPCODE_MASK 0xFF800000 -#define STD_3D_OPCODE_MASK 0xFFFF0000 -#define STD_2D_OPCODE_MASK 0xFFC00000 -#define STD_MFX_OPCODE_MASK 0xFFFF0000 +#define STD_MI_OPCODE_SHIFT (32 - 9) +#define STD_3D_OPCODE_SHIFT (32 - 16) +#define STD_2D_OPCODE_SHIFT (32 - 10) +#define STD_MFX_OPCODE_SHIFT (32 - 16) +#define MIN_OPCODE_SHIFT 16 #define CMD(op, opm, f, lm, fl, ...) \ { \ .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ - .cmd = { (op), (opm) }, \ + .cmd = { (op), ~0u << (opm) }, \ .length = { (lm) }, \ __VA_ARGS__ \ } /* Convenience macros to compress the tables */ -#define SMI STD_MI_OPCODE_MASK -#define S3D STD_3D_OPCODE_MASK -#define S2D STD_2D_OPCODE_MASK -#define SMFX STD_MFX_OPCODE_MASK +#define SMI STD_MI_OPCODE_SHIFT +#define S3D STD_3D_OPCODE_SHIFT +#define S2D STD_2D_OPCODE_SHIFT +#define SMFX STD_MFX_OPCODE_SHIFT #define F true #define S CMD_DESC_SKIP #define R CMD_DESC_REJECT @@ -350,6 +351,9 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), }; +static const struct drm_i915_cmd_descriptor noop_desc = + CMD(MI_NOOP, SMI, F, 1, S); + #undef CMD #undef SMI #undef S3D @@ -458,6 +462,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = { REG32(GEN7_GPGPU_DISPATCHDIMX), REG32(GEN7_GPGPU_DISPATCHDIMY), REG32(GEN7_GPGPU_DISPATCHDIMZ), + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 0), REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 1), REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 2), @@ -473,6 +478,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = { REG32(GEN7_L3SQCREG1), REG32(GEN7_L3CNTLREG2), REG32(GEN7_L3CNTLREG3), + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), }; static const struct drm_i915_reg_descriptor hsw_render_regs[] = { @@ -502,7 +508,10 @@ static const struct drm_i915_reg_descriptor hsw_render_regs[] = { }; static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { + REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), REG32(BCS_SWCTRL), + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), }; static const struct drm_i915_reg_descriptor ivb_master_regs[] = { @@ -603,7 +612,7 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } -static bool validate_cmds_sorted(struct intel_engine_cs *engine, +static bool validate_cmds_sorted(const struct intel_engine_cs *engine, const struct drm_i915_cmd_table *cmd_tables, int cmd_table_count) { @@ -624,8 +633,10 @@ static bool validate_cmds_sorted(struct intel_engine_cs *engine, u32 curr = desc->cmd.value & desc->cmd.mask; if (curr < previous) { - DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n", - engine->id, i, j, curr, previous); + DRM_ERROR("CMD: %s [%d] command table not sorted: " + "table=%d entry=%d cmd=0x%08X prev=0x%08X\n", + engine->name, engine->id, + i, j, curr, previous); ret = false; } @@ -636,7 +647,7 @@ static bool validate_cmds_sorted(struct intel_engine_cs *engine, return ret; } -static bool check_sorted(int ring_id, +static bool check_sorted(const struct intel_engine_cs *engine, const struct drm_i915_reg_descriptor *reg_table, int reg_count) { @@ -648,8 +659,10 @@ static bool check_sorted(int ring_id, u32 curr = i915_mmio_reg_offset(reg_table[i].addr); if (curr < previous) { - DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n", - ring_id, i, curr, previous); + DRM_ERROR("CMD: %s [%d] register table not sorted: " + "entry=%d reg=0x%08X prev=0x%08X\n", + engine->name, engine->id, + i, curr, previous); ret = false; } @@ -666,7 +679,7 @@ static bool validate_regs_sorted(struct intel_engine_cs *engine) for (i = 0; i < engine->reg_table_count; i++) { table = &engine->reg_tables[i]; - if (!check_sorted(engine->id, table->regs, table->num_regs)) + if (!check_sorted(engine, table->regs, table->num_regs)) return false; } @@ -687,12 +700,26 @@ struct cmd_node { * non-opcode bits being set. But if we don't include those bits, some 3D * commands may hash to the same bucket due to not including opcode bits that * make the command unique. For now, we will risk hashing to the same bucket. - * - * If we attempt to generate a perfect hash, we should be able to look at bits - * 31:29 of a command from a batch buffer and use the full mask for that - * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this. */ -#define CMD_HASH_MASK STD_MI_OPCODE_MASK +static inline u32 cmd_header_key(u32 x) +{ + u32 shift; + + switch (x >> INSTR_CLIENT_SHIFT) { + default: + case INSTR_MI_CLIENT: + shift = STD_MI_OPCODE_SHIFT; + break; + case INSTR_RC_CLIENT: + shift = STD_3D_OPCODE_SHIFT; + break; + case INSTR_BC_CLIENT: + shift = STD_2D_OPCODE_SHIFT; + break; + } + + return x >> shift; +} static int init_hash_table(struct intel_engine_cs *engine, const struct drm_i915_cmd_table *cmd_tables, @@ -716,7 +743,7 @@ static int init_hash_table(struct intel_engine_cs *engine, desc_node->desc = desc; hash_add(engine->cmd_hash, &desc_node->node, - desc->cmd.value & CMD_HASH_MASK); + cmd_header_key(desc->cmd.value)); } } @@ -736,23 +763,21 @@ static void fini_hash_table(struct intel_engine_cs *engine) } /** - * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer + * intel_engine_init_cmd_parser() - set cmd parser related fields for an engine * @engine: the engine to initialize * * Optionally initializes fields related to batch buffer command parsing in the * struct intel_engine_cs based on whether the platform requires software * command parsing. - * - * Return: non-zero if initialization fails */ -int i915_cmd_parser_init_ring(struct intel_engine_cs *engine) +void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) { const struct drm_i915_cmd_table *cmd_tables; int cmd_table_count; int ret; if (!IS_GEN7(engine->i915)) - return 0; + return; switch (engine->id) { case RCS: @@ -806,36 +831,38 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *engine) engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; default: - DRM_ERROR("CMD: cmd_parser_init with unknown ring: %d\n", - engine->id); - BUG(); + MISSING_CASE(engine->id); + return; } - BUG_ON(!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)); - BUG_ON(!validate_regs_sorted(engine)); - - WARN_ON(!hash_empty(engine->cmd_hash)); + if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) { + DRM_ERROR("%s: command descriptions are not sorted\n", + engine->name); + return; + } + if (!validate_regs_sorted(engine)) { + DRM_ERROR("%s: registers are not sorted\n", engine->name); + return; + } ret = init_hash_table(engine, cmd_tables, cmd_table_count); if (ret) { - DRM_ERROR("CMD: cmd_parser_init failed!\n"); + DRM_ERROR("%s: initialised failed!\n", engine->name); fini_hash_table(engine); - return ret; + return; } engine->needs_cmd_parser = true; - - return 0; } /** - * i915_cmd_parser_fini_ring() - clean up cmd parser related fields + * intel_engine_cleanup_cmd_parser() - clean up cmd parser related fields * @engine: the engine to clean up * * Releases any resources related to command parsing that may have been - * initialized for the specified ring. + * initialized for the specified engine. */ -void i915_cmd_parser_fini_ring(struct intel_engine_cs *engine) +void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) { if (!engine->needs_cmd_parser) return; @@ -850,12 +877,9 @@ find_cmd_in_table(struct intel_engine_cs *engine, struct cmd_node *desc_node; hash_for_each_possible(engine->cmd_hash, desc_node, node, - cmd_header & CMD_HASH_MASK) { + cmd_header_key(cmd_header)) { const struct drm_i915_cmd_descriptor *desc = desc_node->desc; - u32 masked_cmd = desc->cmd.mask & cmd_header; - u32 masked_value = desc->cmd.value & desc->cmd.mask; - - if (masked_cmd == masked_value) + if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0) return desc; } @@ -866,18 +890,21 @@ find_cmd_in_table(struct intel_engine_cs *engine, * Returns a pointer to a descriptor for the command specified by cmd_header. * * The caller must supply space for a default descriptor via the default_desc - * parameter. If no descriptor for the specified command exists in the ring's + * parameter. If no descriptor for the specified command exists in the engine's * command parser tables, this function fills in default_desc based on the - * ring's default length encoding and returns default_desc. + * engine's default length encoding and returns default_desc. */ static const struct drm_i915_cmd_descriptor* find_cmd(struct intel_engine_cs *engine, u32 cmd_header, + const struct drm_i915_cmd_descriptor *desc, struct drm_i915_cmd_descriptor *default_desc) { - const struct drm_i915_cmd_descriptor *desc; u32 mask; + if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0) + return desc; + desc = find_cmd_in_table(engine, cmd_header); if (desc) return desc; @@ -886,152 +913,140 @@ find_cmd(struct intel_engine_cs *engine, if (!mask) return NULL; - BUG_ON(!default_desc); - default_desc->flags = CMD_DESC_SKIP; + default_desc->cmd.value = cmd_header; + default_desc->cmd.mask = ~0u << MIN_OPCODE_SHIFT; default_desc->length.mask = mask; - + default_desc->flags = CMD_DESC_SKIP; return default_desc; } static const struct drm_i915_reg_descriptor * -find_reg(const struct drm_i915_reg_descriptor *table, - int count, u32 addr) +__find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr) { - int i; - - for (i = 0; i < count; i++) { - if (i915_mmio_reg_offset(table[i].addr) == addr) - return &table[i]; + int start = 0, end = count; + while (start < end) { + int mid = start + (end - start) / 2; + int ret = addr - i915_mmio_reg_offset(table[mid].addr); + if (ret < 0) + end = mid; + else if (ret > 0) + start = mid + 1; + else + return &table[mid]; } - return NULL; } static const struct drm_i915_reg_descriptor * -find_reg_in_tables(const struct drm_i915_reg_table *tables, - int count, bool is_master, u32 addr) +find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) { - int i; - const struct drm_i915_reg_table *table; - const struct drm_i915_reg_descriptor *reg; + const struct drm_i915_reg_table *table = engine->reg_tables; + int count = engine->reg_table_count; - for (i = 0; i < count; i++) { - table = &tables[i]; + do { if (!table->master || is_master) { - reg = find_reg(table->regs, table->num_regs, - addr); + const struct drm_i915_reg_descriptor *reg; + + reg = __find_reg(table->regs, table->num_regs, addr); if (reg != NULL) return reg; } - } + } while (table++, --count); return NULL; } -static u32 *vmap_batch(struct drm_i915_gem_object *obj, - unsigned start, unsigned len) -{ - int i; - void *addr = NULL; - struct sg_page_iter sg_iter; - int first_page = start >> PAGE_SHIFT; - int last_page = (len + start + 4095) >> PAGE_SHIFT; - int npages = last_page - first_page; - struct page **pages; - - pages = drm_malloc_ab(npages, sizeof(*pages)); - if (pages == NULL) { - DRM_DEBUG_DRIVER("Failed to get space for pages\n"); - goto finish; - } - - i = 0; - for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) { - pages[i++] = sg_page_iter_page(&sg_iter); - if (i == npages) - break; - } - - addr = vmap(pages, i, 0, PAGE_KERNEL); - if (addr == NULL) { - DRM_DEBUG_DRIVER("Failed to vmap pages\n"); - goto finish; - } - -finish: - if (pages) - drm_free_large(pages); - return (u32*)addr; -} - -/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */ -static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, +/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ +static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, struct drm_i915_gem_object *src_obj, u32 batch_start_offset, - u32 batch_len) + u32 batch_len, + bool *needs_clflush_after) { - int needs_clflush = 0; - void *src_base, *src; - void *dst = NULL; + unsigned int src_needs_clflush; + unsigned int dst_needs_clflush; + void *dst, *src; int ret; - if (batch_len > dest_obj->base.size || - batch_len + batch_start_offset > src_obj->base.size) - return ERR_PTR(-E2BIG); - - if (WARN_ON(dest_obj->pages_pin_count == 0)) - return ERR_PTR(-ENODEV); - - ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush); - if (ret) { - DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n"); + ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush); + if (ret) return ERR_PTR(ret); - } - src_base = vmap_batch(src_obj, batch_start_offset, batch_len); - if (!src_base) { - DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n"); - ret = -ENOMEM; + ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush); + if (ret) { + dst = ERR_PTR(ret); goto unpin_src; } - ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); - if (ret) { - DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n"); - goto unmap_src; + dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); + if (IS_ERR(dst)) + goto unpin_dst; + + src = ERR_PTR(-ENODEV); + if (src_needs_clflush && + i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, NULL, 0)) { + src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); + if (!IS_ERR(src)) { + i915_memcpy_from_wc(dst, + src + batch_start_offset, + ALIGN(batch_len, 16)); + i915_gem_object_unpin_map(src_obj); + } } - - dst = vmap_batch(dest_obj, 0, batch_len); - if (!dst) { - DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n"); - ret = -ENOMEM; - goto unmap_src; + if (IS_ERR(src)) { + void *ptr; + int offset, n; + + offset = offset_in_page(batch_start_offset); + + /* We can avoid clflushing partial cachelines before the write + * if we only every write full cache-lines. Since we know that + * both the source and destination are in multiples of + * PAGE_SIZE, we can simply round up to the next cacheline. + * We don't care about copying too much here as we only + * validate up to the end of the batch. + */ + if (dst_needs_clflush & CLFLUSH_BEFORE) + batch_len = roundup(batch_len, + boot_cpu_data.x86_clflush_size); + + ptr = dst; + for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) { + int len = min_t(int, batch_len, PAGE_SIZE - offset); + + src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); + if (src_needs_clflush) + drm_clflush_virt_range(src + offset, len); + memcpy(ptr, src + offset, len); + kunmap_atomic(src); + + ptr += len; + batch_len -= len; + offset = 0; + } } - src = src_base + offset_in_page(batch_start_offset); - if (needs_clflush) - drm_clflush_virt_range(src, batch_len); - - memcpy(dst, src, batch_len); + /* dst_obj is returned with vmap pinned */ + *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; -unmap_src: - vunmap(src_base); +unpin_dst: + i915_gem_obj_finish_shmem_access(dst_obj); unpin_src: - i915_gem_object_unpin_pages(src_obj); - - return ret ? ERR_PTR(ret) : dst; + i915_gem_obj_finish_shmem_access(src_obj); + return dst; } /** - * i915_needs_cmd_parser() - should a given ring use software command parsing? + * intel_engine_needs_cmd_parser() - should a given engine use software + * command parsing? * @engine: the engine in question * * Only certain platforms require software batch buffer command parsing, and * only when enabled via module parameter. * - * Return: true if the ring requires software command parsing + * Return: true if the engine requires software command parsing */ -bool i915_needs_cmd_parser(struct intel_engine_cs *engine) +bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine) { if (!engine->needs_cmd_parser) return false; @@ -1048,6 +1063,9 @@ static bool check_cmd(const struct intel_engine_cs *engine, const bool is_master, bool *oacontrol_set) { + if (desc->flags & CMD_DESC_SKIP) + return true; + if (desc->flags & CMD_DESC_REJECT) { DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd); return false; @@ -1072,14 +1090,11 @@ static bool check_cmd(const struct intel_engine_cs *engine, offset += step) { const u32 reg_addr = cmd[offset] & desc->reg.mask; const struct drm_i915_reg_descriptor *reg = - find_reg_in_tables(engine->reg_tables, - engine->reg_table_count, - is_master, - reg_addr); + find_reg(engine, is_master, reg_addr); if (!reg) { - DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", - reg_addr, *cmd, engine->id); + DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n", + reg_addr, *cmd, engine->exec_id); return false; } @@ -1159,11 +1174,11 @@ static bool check_cmd(const struct intel_engine_cs *engine, desc->bits[i].mask; if (dword != desc->bits[i].expected) { - DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n", + DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n", *cmd, desc->bits[i].mask, desc->bits[i].expected, - dword, engine->id); + dword, engine->exec_id); return false; } } @@ -1189,23 +1204,26 @@ static bool check_cmd(const struct intel_engine_cs *engine, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ -int i915_parse_cmds(struct intel_engine_cs *engine, - struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, - u32 batch_start_offset, - u32 batch_len, - bool is_master) +int intel_engine_cmd_parser(struct intel_engine_cs *engine, + struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, + u32 batch_start_offset, + u32 batch_len, + bool is_master) { - u32 *cmd, *batch_base, *batch_end; - struct drm_i915_cmd_descriptor default_desc = { 0 }; + u32 *cmd, *batch_end; + struct drm_i915_cmd_descriptor default_desc = noop_desc; + const struct drm_i915_cmd_descriptor *desc = &default_desc; bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ + bool needs_clflush_after = false; int ret = 0; - batch_base = copy_batch(shadow_batch_obj, batch_obj, - batch_start_offset, batch_len); - if (IS_ERR(batch_base)) { + cmd = copy_batch(shadow_batch_obj, batch_obj, + batch_start_offset, batch_len, + &needs_clflush_after); + if (IS_ERR(cmd)) { DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n"); - return PTR_ERR(batch_base); + return PTR_ERR(cmd); } /* @@ -1213,17 +1231,14 @@ int i915_parse_cmds(struct intel_engine_cs *engine, * large or larger and copy_batch() will write MI_NOPs to the extra * space. Parsing should be faster in some cases this way. */ - batch_end = batch_base + (batch_len / sizeof(*batch_end)); - - cmd = batch_base; + batch_end = cmd + (batch_len / sizeof(*batch_end)); while (cmd < batch_end) { - const struct drm_i915_cmd_descriptor *desc; u32 length; if (*cmd == MI_BATCH_BUFFER_END) break; - desc = find_cmd(engine, *cmd, &default_desc); + desc = find_cmd(engine, *cmd, desc, &default_desc); if (!desc) { DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", *cmd); @@ -1274,7 +1289,9 @@ int i915_parse_cmds(struct intel_engine_cs *engine, ret = -EINVAL; } - vunmap(batch_base); + if (ret == 0 && needs_clflush_after) + drm_clflush_virt_range(shadow_batch_obj->mapping, batch_len); + i915_gem_object_unpin_map(shadow_batch_obj); return ret; } @@ -1295,7 +1312,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) /* If the command parser is not enabled, report 0 - unsupported */ for_each_engine(engine, dev_priv) { - if (i915_needs_cmd_parser(engine)) { + if (intel_engine_needs_cmd_parser(engine)) { active = true; break; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 844fea795bae..27b0e34dadec 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -40,11 +40,10 @@ #include <drm/i915_drm.h> #include "i915_drv.h" -enum { - ACTIVE_LIST, - INACTIVE_LIST, - PINNED_LIST, -}; +static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) +{ + return to_i915(node->minor->dev); +} /* As the drm_debugfs_init() routines are called before dev->dev_private is * allocated we need to hook into the minor for release. */ @@ -63,7 +62,7 @@ drm_add_fake_info_node(struct drm_minor *minor, node->minor = minor; node->dent = ent; - node->info_ent = (void *) key; + node->info_ent = (void *)key; mutex_lock(&minor->debugfs_lock); list_add(&node->list, &minor->debugfs_list); @@ -74,12 +73,11 @@ drm_add_fake_info_node(struct drm_minor *minor, static int i915_capabilities(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - const struct intel_device_info *info = INTEL_INFO(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + const struct intel_device_info *info = INTEL_INFO(dev_priv); - seq_printf(m, "gen: %d\n", info->gen); - seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev)); + seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv)); + seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv)); #define PRINT_FLAG(x) seq_printf(m, #x ": %s\n", yesno(info->x)) #define SEP_SEMICOLON ; DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG, SEP_SEMICOLON); @@ -91,7 +89,7 @@ static int i915_capabilities(struct seq_file *m, void *data) static char get_active_flag(struct drm_i915_gem_object *obj) { - return obj->active ? '*' : ' '; + return i915_gem_object_is_active(obj) ? '*' : ' '; } static char get_pin_flag(struct drm_i915_gem_object *obj) @@ -101,7 +99,7 @@ static char get_pin_flag(struct drm_i915_gem_object *obj) static char get_tiling_flag(struct drm_i915_gem_object *obj) { - switch (obj->tiling_mode) { + switch (i915_gem_object_get_tiling(obj)) { default: case I915_TILING_NONE: return ' '; case I915_TILING_X: return 'X'; @@ -111,7 +109,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj) static char get_global_flag(struct drm_i915_gem_object *obj) { - return i915_gem_obj_to_ggtt(obj) ? 'g' : ' '; + return i915_gem_object_to_ggtt(obj, NULL) ? 'g' : ' '; } static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) @@ -125,7 +123,7 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj) struct i915_vma *vma; list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (vma->is_ggtt && drm_mm_node_allocated(&vma->node)) + if (i915_vma_is_ggtt(vma) && drm_mm_node_allocated(&vma->node)) size += vma->node.size; } @@ -138,6 +136,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct intel_engine_cs *engine; struct i915_vma *vma; + unsigned int frontbuffer_bits; int pin_count = 0; enum intel_engine_id id; @@ -155,30 +154,36 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->base.write_domain); for_each_engine_id(engine, dev_priv, id) seq_printf(m, "%x ", - i915_gem_request_get_seqno(obj->last_read_req[id])); - seq_printf(m, "] %x %x%s%s%s", - i915_gem_request_get_seqno(obj->last_write_req), - i915_gem_request_get_seqno(obj->last_fenced_req), - i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), + i915_gem_active_get_seqno(&obj->last_read[id], + &obj->base.dev->struct_mutex)); + seq_printf(m, "] %x %s%s%s", + i915_gem_active_get_seqno(&obj->last_write, + &obj->base.dev->struct_mutex), + i915_cache_level_str(dev_priv, obj->cache_level), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); if (obj->base.name) seq_printf(m, " (name: %d)", obj->base.name); list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (vma->pin_count > 0) + if (i915_vma_is_pinned(vma)) pin_count++; } seq_printf(m, " (pinned x %d)", pin_count); if (obj->pin_display) seq_printf(m, " (display)"); - if (obj->fence_reg != I915_FENCE_REG_NONE) - seq_printf(m, " (fence: %d)", obj->fence_reg); list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; + seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", - vma->is_ggtt ? "g" : "pp", + i915_vma_is_ggtt(vma) ? "g" : "pp", vma->node.start, vma->node.size); - if (vma->is_ggtt) + if (i915_vma_is_ggtt(vma)) seq_printf(m, ", type: %u", vma->ggtt_view.type); + if (vma->fence) + seq_printf(m, " , fence: %d%s", + vma->fence->id, + i915_gem_active_isset(&vma->last_fence) ? "*" : ""); seq_puts(m, ")"); } if (obj->stolen) @@ -192,58 +197,15 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) *t = '\0'; seq_printf(m, " (%s mappable)", s); } - if (obj->last_write_req != NULL) - seq_printf(m, " (%s)", - i915_gem_request_get_engine(obj->last_write_req)->name); - if (obj->frontbuffer_bits) - seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); -} - -static int i915_gem_object_list_info(struct seq_file *m, void *data) -{ - struct drm_info_node *node = m->private; - uintptr_t list = (uintptr_t) node->info_ent->data; - struct list_head *head; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct i915_vma *vma; - u64 total_obj_size, total_gtt_size; - int count, ret; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; + engine = i915_gem_active_get_engine(&obj->last_write, + &dev_priv->drm.struct_mutex); + if (engine) + seq_printf(m, " (%s)", engine->name); - /* FIXME: the user of this interface might want more than just GGTT */ - switch (list) { - case ACTIVE_LIST: - seq_puts(m, "Active:\n"); - head = &ggtt->base.active_list; - break; - case INACTIVE_LIST: - seq_puts(m, "Inactive:\n"); - head = &ggtt->base.inactive_list; - break; - default: - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } - - total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(vma, head, vm_link) { - seq_printf(m, " "); - describe_obj(m, vma->obj); - seq_printf(m, "\n"); - total_obj_size += vma->obj->base.size; - total_gtt_size += vma->node.size; - count++; - } - mutex_unlock(&dev->struct_mutex); - - seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n", - count, total_obj_size, total_gtt_size); - return 0; + frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); + if (frontbuffer_bits) + seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits); } static int obj_rank_by_stolen(void *priv, @@ -263,9 +225,8 @@ static int obj_rank_by_stolen(void *priv, static int i915_gem_stolen_list_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct drm_i915_gem_object *obj; u64 total_obj_size, total_gtt_size; LIST_HEAD(stolen); @@ -311,17 +272,6 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) return 0; } -#define count_objects(list, member) do { \ - list_for_each_entry(obj, list, member) { \ - size += i915_gem_obj_total_ggtt_size(obj); \ - ++count; \ - if (obj->map_and_fenceable) { \ - mappable_size += i915_gem_obj_ggtt_size(obj); \ - ++mappable_count; \ - } \ - } \ -} while (0) - struct file_stats { struct drm_i915_file_private *file_priv; unsigned long count; @@ -338,46 +288,29 @@ static int per_file_stats(int id, void *ptr, void *data) stats->count++; stats->total += obj->base.size; - + if (!obj->bind_count) + stats->unbound += obj->base.size; if (obj->base.name || obj->base.dma_buf) stats->shared += obj->base.size; - if (USES_FULL_PPGTT(obj->base.dev)) { - list_for_each_entry(vma, &obj->vma_list, obj_link) { - struct i915_hw_ppgtt *ppgtt; - - if (!drm_mm_node_allocated(&vma->node)) - continue; + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; - if (vma->is_ggtt) { - stats->global += obj->base.size; - continue; - } + if (i915_vma_is_ggtt(vma)) { + stats->global += vma->node.size; + } else { + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm); - ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base); - if (ppgtt->file_priv != stats->file_priv) + if (ppgtt->base.file != stats->file_priv) continue; - - if (obj->active) /* XXX per-vma statistic */ - stats->active += obj->base.size; - else - stats->inactive += obj->base.size; - - return 0; - } - } else { - if (i915_gem_obj_ggtt_bound(obj)) { - stats->global += obj->base.size; - if (obj->active) - stats->active += obj->base.size; - else - stats->inactive += obj->base.size; - return 0; } - } - if (!list_empty(&obj->global_list)) - stats->unbound += obj->base.size; + if (i915_vma_is_active(vma)) + stats->active += vma->node.size; + else + stats->inactive += vma->node.size; + } return 0; } @@ -424,9 +357,9 @@ static int per_file_ctx_stats(int id, void *ptr, void *data) for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { if (ctx->engine[n].state) - per_file_stats(0, ctx->engine[n].state, data); - if (ctx->engine[n].ringbuf) - per_file_stats(0, ctx->engine[n].ringbuf->obj, data); + per_file_stats(0, ctx->engine[n].state->obj, data); + if (ctx->engine[n].ring) + per_file_stats(0, ctx->engine[n].ring->vma->obj, data); } return 0; @@ -435,48 +368,34 @@ static int per_file_ctx_stats(int id, void *ptr, void *data) static void print_context_stats(struct seq_file *m, struct drm_i915_private *dev_priv) { + struct drm_device *dev = &dev_priv->drm; struct file_stats stats; struct drm_file *file; memset(&stats, 0, sizeof(stats)); - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev->struct_mutex); if (dev_priv->kernel_context) per_file_ctx_stats(0, dev_priv->kernel_context, &stats); - list_for_each_entry(file, &dev_priv->drm.filelist, lhead) { + list_for_each_entry(file, &dev->filelist, lhead) { struct drm_i915_file_private *fpriv = file->driver_priv; idr_for_each(&fpriv->context_idr, per_file_ctx_stats, &stats); } - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev->struct_mutex); print_file_stats(m, "[k]contexts", stats); } -#define count_vmas(list, member) do { \ - list_for_each_entry(vma, list, member) { \ - size += i915_gem_obj_total_ggtt_size(vma->obj); \ - ++count; \ - if (vma->obj->map_and_fenceable) { \ - mappable_size += i915_gem_obj_ggtt_size(vma->obj); \ - ++mappable_count; \ - } \ - } \ -} while (0) - -static int i915_gem_object_info(struct seq_file *m, void* data) +static int i915_gem_object_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct i915_ggtt *ggtt = &dev_priv->ggtt; - u32 count, mappable_count, purgeable_count; - u64 size, mappable_size, purgeable_size; - unsigned long pin_mapped_count = 0, pin_mapped_purgeable_count = 0; - u64 pin_mapped_size = 0, pin_mapped_purgeable_size = 0; + u32 count, mapped_count, purgeable_count, dpy_count; + u64 size, mapped_size, purgeable_size, dpy_size; struct drm_i915_gem_object *obj; struct drm_file *file; - struct i915_vma *vma; int ret; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -487,70 +406,53 @@ static int i915_gem_object_info(struct seq_file *m, void* data) dev_priv->mm.object_count, dev_priv->mm.object_memory); - size = count = mappable_size = mappable_count = 0; - count_objects(&dev_priv->mm.bound_list, global_list); - seq_printf(m, "%u [%u] objects, %llu [%llu] bytes in gtt\n", - count, mappable_count, size, mappable_size); - - size = count = mappable_size = mappable_count = 0; - count_vmas(&ggtt->base.active_list, vm_link); - seq_printf(m, " %u [%u] active objects, %llu [%llu] bytes\n", - count, mappable_count, size, mappable_size); + size = count = 0; + mapped_size = mapped_count = 0; + purgeable_size = purgeable_count = 0; + list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { + size += obj->base.size; + ++count; - size = count = mappable_size = mappable_count = 0; - count_vmas(&ggtt->base.inactive_list, vm_link); - seq_printf(m, " %u [%u] inactive objects, %llu [%llu] bytes\n", - count, mappable_count, size, mappable_size); + if (obj->madv == I915_MADV_DONTNEED) { + purgeable_size += obj->base.size; + ++purgeable_count; + } - size = count = purgeable_size = purgeable_count = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { - size += obj->base.size, ++count; - if (obj->madv == I915_MADV_DONTNEED) - purgeable_size += obj->base.size, ++purgeable_count; if (obj->mapping) { - pin_mapped_count++; - pin_mapped_size += obj->base.size; - if (obj->pages_pin_count == 0) { - pin_mapped_purgeable_count++; - pin_mapped_purgeable_size += obj->base.size; - } + mapped_count++; + mapped_size += obj->base.size; } } seq_printf(m, "%u unbound objects, %llu bytes\n", count, size); - size = count = mappable_size = mappable_count = 0; + size = count = dpy_size = dpy_count = 0; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - if (obj->fault_mappable) { - size += i915_gem_obj_ggtt_size(obj); - ++count; - } + size += obj->base.size; + ++count; + if (obj->pin_display) { - mappable_size += i915_gem_obj_ggtt_size(obj); - ++mappable_count; + dpy_size += obj->base.size; + ++dpy_count; } + if (obj->madv == I915_MADV_DONTNEED) { purgeable_size += obj->base.size; ++purgeable_count; } + if (obj->mapping) { - pin_mapped_count++; - pin_mapped_size += obj->base.size; - if (obj->pages_pin_count == 0) { - pin_mapped_purgeable_count++; - pin_mapped_purgeable_size += obj->base.size; - } + mapped_count++; + mapped_size += obj->base.size; } } + seq_printf(m, "%u bound objects, %llu bytes\n", + count, size); seq_printf(m, "%u purgeable objects, %llu bytes\n", purgeable_count, purgeable_size); - seq_printf(m, "%u pinned mappable objects, %llu bytes\n", - mappable_count, mappable_size); - seq_printf(m, "%u fault mappable objects, %llu bytes\n", - count, size); - seq_printf(m, - "%lu [%lu] pin mapped objects, %llu [%llu] bytes [purgeable]\n", - pin_mapped_count, pin_mapped_purgeable_count, - pin_mapped_size, pin_mapped_purgeable_size); + seq_printf(m, "%u mapped objects, %llu bytes\n", + mapped_count, mapped_size); + seq_printf(m, "%u display objects (pinned), %llu bytes\n", + dpy_count, dpy_size); seq_printf(m, "%llu [%llu] gtt total\n", ggtt->base.total, ggtt->mappable_end - ggtt->base.start); @@ -563,6 +465,8 @@ static int i915_gem_object_info(struct seq_file *m, void* data) print_context_stats(m, dev_priv); list_for_each_entry_reverse(file, &dev->filelist, lhead) { struct file_stats stats; + struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_gem_request *request; struct task_struct *task; memset(&stats, 0, sizeof(stats)); @@ -576,10 +480,17 @@ static int i915_gem_object_info(struct seq_file *m, void* data) * still alive (e.g. get_pid(current) => fork() => exit()). * Therefore, we need to protect this ->comm access using RCU. */ + mutex_lock(&dev->struct_mutex); + request = list_first_entry_or_null(&file_priv->mm.request_list, + struct drm_i915_gem_request, + client_list); rcu_read_lock(); - task = pid_task(file->pid, PIDTYPE_PID); + task = pid_task(request && request->ctx->pid ? + request->ctx->pid : file->pid, + PIDTYPE_PID); print_file_stats(m, task ? task->comm : "<unknown>", stats); rcu_read_unlock(); + mutex_unlock(&dev->struct_mutex); } mutex_unlock(&dev->filelist_mutex); @@ -589,9 +500,9 @@ static int i915_gem_object_info(struct seq_file *m, void* data) static int i915_gem_gtt_info(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - uintptr_t list = (uintptr_t) node->info_ent->data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(node); + struct drm_device *dev = &dev_priv->drm; + bool show_pin_display_only = !!node->info_ent->data; struct drm_i915_gem_object *obj; u64 total_obj_size, total_gtt_size; int count, ret; @@ -602,7 +513,7 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data) total_obj_size = total_gtt_size = count = 0; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - if (list == PINNED_LIST && !i915_gem_obj_is_pinned(obj)) + if (show_pin_display_only && !obj->pin_display) continue; seq_puts(m, " "); @@ -623,9 +534,8 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data) static int i915_gem_pageflip_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_crtc *crtc; int ret; @@ -672,7 +582,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) intel_crtc_get_vblank_counter(crtc)); seq_printf(m, "%d prepares\n", atomic_read(&work->pending)); - if (INTEL_INFO(dev)->gen >= 4) + if (INTEL_GEN(dev_priv) >= 4) addr = I915_HI_DISPBASE(I915_READ(DSPSURF(crtc->plane))); else addr = I915_READ(DSPADDR(crtc->plane)); @@ -693,9 +603,8 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) static int i915_gem_batch_pool_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct drm_i915_gem_object *obj; struct intel_engine_cs *engine; int total = 0; @@ -738,9 +647,8 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data) static int i915_gem_request_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_engine_cs *engine; struct drm_i915_gem_request *req; int ret, any; @@ -754,21 +662,20 @@ static int i915_gem_request_info(struct seq_file *m, void *data) int count; count = 0; - list_for_each_entry(req, &engine->request_list, list) + list_for_each_entry(req, &engine->request_list, link) count++; if (count == 0) continue; seq_printf(m, "%s requests: %d\n", engine->name, count); - list_for_each_entry(req, &engine->request_list, list) { + list_for_each_entry(req, &engine->request_list, link) { + struct pid *pid = req->ctx->pid; struct task_struct *task; rcu_read_lock(); - task = NULL; - if (req->pid) - task = pid_task(req->pid, PIDTYPE_PID); + task = pid ? pid_task(pid, PIDTYPE_PID) : NULL; seq_printf(m, " %x @ %d: %s [%d]\n", - req->seqno, + req->fence.seqno, (int) (jiffies - req->emitted_jiffies), task ? task->comm : "<unknown>", task ? task->pid : -1); @@ -793,8 +700,6 @@ static void i915_ring_seqno_info(struct seq_file *m, seq_printf(m, "Current sequence (%s): %x\n", engine->name, intel_engine_get_seqno(engine)); - seq_printf(m, "Current user interrupts (%s): %lx\n", - engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups)); spin_lock(&b->lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { @@ -808,41 +713,25 @@ static void i915_ring_seqno_info(struct seq_file *m, static int i915_gem_seqno_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_engine_cs *engine; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - intel_runtime_pm_get(dev_priv); for_each_engine(engine, dev_priv) i915_ring_seqno_info(m, engine); - intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); - return 0; } static int i915_interrupt_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_engine_cs *engine; - int ret, i, pipe; + int i, pipe; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; intel_runtime_pm_get(dev_priv); - if (IS_CHERRYVIEW(dev)) { + if (IS_CHERRYVIEW(dev_priv)) { seq_printf(m, "Master Interrupt Control:\t%08x\n", I915_READ(GEN8_MASTER_IRQ)); @@ -881,7 +770,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(GEN8_PCU_IIR)); seq_printf(m, "PCU interrupt enable:\t%08x\n", I915_READ(GEN8_PCU_IER)); - } else if (INTEL_INFO(dev)->gen >= 8) { + } else if (INTEL_GEN(dev_priv) >= 8) { seq_printf(m, "Master Interrupt Control:\t%08x\n", I915_READ(GEN8_MASTER_IRQ)); @@ -937,7 +826,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(GEN8_PCU_IIR)); seq_printf(m, "PCU interrupt enable:\t%08x\n", I915_READ(GEN8_PCU_IER)); - } else if (IS_VALLEYVIEW(dev)) { + } else if (IS_VALLEYVIEW(dev_priv)) { seq_printf(m, "Display IER:\t%08x\n", I915_READ(VLV_IER)); seq_printf(m, "Display IIR:\t%08x\n", @@ -975,7 +864,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) seq_printf(m, "DPINVGTT:\t%08x\n", I915_READ(DPINVGTT)); - } else if (!HAS_PCH_SPLIT(dev)) { + } else if (!HAS_PCH_SPLIT(dev_priv)) { seq_printf(m, "Interrupt enable: %08x\n", I915_READ(IER)); seq_printf(m, "Interrupt identity: %08x\n", @@ -1007,7 +896,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(GTIMR)); } for_each_engine(engine, dev_priv) { - if (INTEL_INFO(dev)->gen >= 6) { + if (INTEL_GEN(dev_priv) >= 6) { seq_printf(m, "Graphics Interrupt mask (%s): %08x\n", engine->name, I915_READ_IMR(engine)); @@ -1015,16 +904,14 @@ static int i915_interrupt_info(struct seq_file *m, void *data) i915_ring_seqno_info(m, engine); } intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); return 0; } static int i915_gem_fence_regs_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; int i, ret; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -1033,14 +920,14 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs); for (i = 0; i < dev_priv->num_fence_regs; i++) { - struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj; + struct i915_vma *vma = dev_priv->fence_regs[i].vma; seq_printf(m, "Fence %d, pin count = %d, object = ", i, dev_priv->fence_regs[i].pin_count); - if (obj == NULL) + if (!vma) seq_puts(m, "unused"); else - describe_obj(m, obj); + describe_obj(m, vma->obj); seq_putc(m, '\n'); } @@ -1051,8 +938,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) static int i915_hws_info(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(node); struct intel_engine_cs *engine; const u32 *hws; int i; @@ -1077,33 +963,25 @@ i915_error_state_write(struct file *filp, loff_t *ppos) { struct i915_error_state_file_priv *error_priv = filp->private_data; - struct drm_device *dev = error_priv->dev; - int ret; DRM_DEBUG_DRIVER("Resetting error state\n"); - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - i915_destroy_error_state(dev); - mutex_unlock(&dev->struct_mutex); + i915_destroy_error_state(error_priv->dev); return cnt; } static int i915_error_state_open(struct inode *inode, struct file *file) { - struct drm_device *dev = inode->i_private; + struct drm_i915_private *dev_priv = inode->i_private; struct i915_error_state_file_priv *error_priv; error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL); if (!error_priv) return -ENOMEM; - error_priv->dev = dev; + error_priv->dev = &dev_priv->drm; - i915_error_state_get(dev, error_priv); + i915_error_state_get(&dev_priv->drm, error_priv); file->private_data = error_priv; @@ -1129,7 +1007,8 @@ static ssize_t i915_error_state_read(struct file *file, char __user *userbuf, ssize_t ret_count = 0; int ret; - ret = i915_error_state_buf_init(&error_str, to_i915(error_priv->dev), count, *pos); + ret = i915_error_state_buf_init(&error_str, + to_i915(error_priv->dev), count, *pos); if (ret) return ret; @@ -1162,16 +1041,15 @@ static const struct file_operations i915_error_state_fops = { static int i915_next_seqno_get(void *data, u64 *val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; int ret; - ret = mutex_lock_interruptible(&dev->struct_mutex); + ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); if (ret) return ret; *val = dev_priv->next_seqno; - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev_priv->drm.struct_mutex); return 0; } @@ -1179,7 +1057,8 @@ i915_next_seqno_get(void *data, u64 *val) static int i915_next_seqno_set(void *data, u64 val) { - struct drm_device *dev = data; + struct drm_i915_private *dev_priv = data; + struct drm_device *dev = &dev_priv->drm; int ret; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -1198,16 +1077,13 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, static int i915_frequency_info(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; int ret = 0; intel_runtime_pm_get(dev_priv); - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - if (IS_GEN5(dev)) { + if (IS_GEN5(dev_priv)) { u16 rgvswctl = I915_READ16(MEMSWCTL); u16 rgvstat = I915_READ16(MEMSTAT_ILK); @@ -1217,7 +1093,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) MEMSTAT_VID_SHIFT); seq_printf(m, "Current P-state: %d\n", (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); - } else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { + } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 freq_sts; mutex_lock(&dev_priv->rps.hw_lock); @@ -1244,7 +1120,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); mutex_unlock(&dev_priv->rps.hw_lock); - } else if (INTEL_INFO(dev)->gen >= 6) { + } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; u32 gt_perf_status; u32 rp_state_cap; @@ -1256,7 +1132,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) int max_freq; rp_state_limits = I915_READ(GEN6_RP_STATE_LIMITS); - if (IS_BROXTON(dev)) { + if (IS_BROXTON(dev_priv)) { rp_state_cap = I915_READ(BXT_RP_STATE_CAP); gt_perf_status = I915_READ(BXT_GT_PERF_STATUS); } else { @@ -1272,11 +1148,11 @@ static int i915_frequency_info(struct seq_file *m, void *unused) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); reqf = I915_READ(GEN6_RPNSWREQ); - if (IS_GEN9(dev)) + if (IS_GEN9(dev_priv)) reqf >>= 23; else { reqf &= ~GEN6_TURBO_DISABLE; - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) reqf >>= 24; else reqf >>= 25; @@ -1294,9 +1170,9 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; rpcurdown = I915_READ(GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; rpprevdown = I915_READ(GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - if (IS_GEN9(dev)) + if (IS_GEN9(dev_priv)) cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; - else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; else cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; @@ -1305,7 +1181,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); mutex_unlock(&dev->struct_mutex); - if (IS_GEN6(dev) || IS_GEN7(dev)) { + if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { pm_ier = I915_READ(GEN6_PMIER); pm_imr = I915_READ(GEN6_PMIMR); pm_isr = I915_READ(GEN6_PMISR); @@ -1323,7 +1199,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "pm_intr_keep: 0x%08x\n", dev_priv->rps.pm_intr_keep); seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); seq_printf(m, "Render p-state ratio: %d\n", - (gt_perf_status & (IS_GEN9(dev) ? 0x1ff00 : 0xff00)) >> 8); + (gt_perf_status & (IS_GEN9(dev_priv) ? 0x1ff00 : 0xff00)) >> 8); seq_printf(m, "Render p-state VID: %d\n", gt_perf_status & 0xff); seq_printf(m, "Render p-state limit: %d\n", @@ -1352,22 +1228,22 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Down threshold: %d%%\n", dev_priv->rps.down_threshold); - max_freq = (IS_BROXTON(dev) ? rp_state_cap >> 0 : + max_freq = (IS_BROXTON(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; - max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ? + max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; - max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ? + max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); - max_freq = (IS_BROXTON(dev) ? rp_state_cap >> 16 : + max_freq = (IS_BROXTON(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; - max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ? + max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); @@ -1381,6 +1257,8 @@ static int i915_frequency_info(struct seq_file *m, void *unused) intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); seq_printf(m, "Min freq: %d MHz\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); + seq_printf(m, "Boost freq: %d MHz\n", + intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); seq_printf(m, "Max freq: %d MHz\n", intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); seq_printf(m, @@ -1401,9 +1279,7 @@ out: static int i915_hangcheck_info(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_engine_cs *engine; u64 acthd[I915_NUM_ENGINES]; u32 seqno[I915_NUM_ENGINES]; @@ -1411,6 +1287,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) enum intel_engine_id id; int j; + if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) + seq_printf(m, "Wedged\n"); + if (test_bit(I915_RESET_IN_PROGRESS, &dev_priv->gpu_error.flags)) + seq_printf(m, "Reset in progress\n"); + if (waitqueue_active(&dev_priv->gpu_error.wait_queue)) + seq_printf(m, "Waiter holding struct mutex\n"); + if (waitqueue_active(&dev_priv->gpu_error.reset_queue)) + seq_printf(m, "struct_mutex blocked for reset\n"); + if (!i915.enable_hangcheck) { seq_printf(m, "Hangcheck disabled\n"); return 0; @@ -1419,7 +1304,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); for_each_engine_id(engine, dev_priv, id) { - acthd[id] = intel_ring_get_active_head(engine); + acthd[id] = intel_engine_get_active_head(engine); seqno[id] = intel_engine_get_seqno(engine); } @@ -1440,11 +1325,10 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) engine->hangcheck.seqno, seqno[id], engine->last_submitted_seqno); - seq_printf(m, "\twaiters? %d\n", - intel_engine_has_waiter(engine)); - seq_printf(m, "\tuser interrupts = %lx [current %lx]\n", - engine->hangcheck.user_interrupts, - READ_ONCE(engine->breadcrumbs.irq_wakeups)); + seq_printf(m, "\twaiters? %s, fake irq active? %s\n", + yesno(intel_engine_has_waiter(engine)), + yesno(test_bit(engine->id, + &dev_priv->gpu_error.missed_irq_rings))); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, (long long)acthd[id]); @@ -1472,9 +1356,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) static int ironlake_drpc_info(struct seq_file *m) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; u32 rgvmodectl, rstdbyctl; u16 crstandvid; int ret; @@ -1540,9 +1423,7 @@ static int ironlake_drpc_info(struct seq_file *m) static int i915_forcewake_domains(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_uncore_forcewake_domain *fw_domain; spin_lock_irq(&dev_priv->uncore.lock); @@ -1558,9 +1439,7 @@ static int i915_forcewake_domains(struct seq_file *m, void *data) static int vlv_drpc_info(struct seq_file *m) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); u32 rpmodectl1, rcctl1, pw_status; intel_runtime_pm_get(dev_priv); @@ -1598,10 +1477,10 @@ static int vlv_drpc_info(struct seq_file *m) static int gen6_drpc_info(struct seq_file *m) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0; + u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; unsigned forcewake_count; int count = 0, ret; @@ -1629,6 +1508,10 @@ static int gen6_drpc_info(struct seq_file *m) rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); + if (INTEL_GEN(dev_priv) >= 9) { + gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE); + gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); + } mutex_unlock(&dev->struct_mutex); mutex_lock(&dev_priv->rps.hw_lock); sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); @@ -1647,6 +1530,12 @@ static int gen6_drpc_info(struct seq_file *m) yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); seq_printf(m, "RC6 Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (INTEL_GEN(dev_priv) >= 9) { + seq_printf(m, "Render Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE)); + seq_printf(m, "Media Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } seq_printf(m, "Deep RC6 Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE)); seq_printf(m, "Deepest RC6 Enabled: %s\n", @@ -1675,6 +1564,14 @@ static int gen6_drpc_info(struct seq_file *m) seq_printf(m, "Core Power Down: %s\n", yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK)); + if (INTEL_GEN(dev_priv) >= 9) { + seq_printf(m, "Render Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + seq_printf(m, "Media Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + } /* Not exactly sure what this is */ seq_printf(m, "RC6 \"Locked to RPn\" residency since boot: %u\n", @@ -1692,17 +1589,16 @@ static int gen6_drpc_info(struct seq_file *m) GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); seq_printf(m, "RC6++ voltage: %dmV\n", GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); - return 0; + return i915_forcewake_domains(m, NULL); } static int i915_drpc_info(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = node_to_i915(m->private); - if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) return vlv_drpc_info(m); - else if (INTEL_INFO(dev)->gen >= 6) + else if (INTEL_GEN(dev_priv) >= 6) return gen6_drpc_info(m); else return ironlake_drpc_info(m); @@ -1710,9 +1606,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused) static int i915_frontbuffer_tracking(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); seq_printf(m, "FB tracking busy bits: 0x%08x\n", dev_priv->fb_tracking.busy_bits); @@ -1725,11 +1619,9 @@ static int i915_frontbuffer_tracking(struct seq_file *m, void *unused) static int i915_fbc_status(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); - if (!HAS_FBC(dev)) { + if (!HAS_FBC(dev_priv)) { seq_puts(m, "FBC unsupported on this chipset\n"); return 0; } @@ -1743,7 +1635,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused) seq_printf(m, "FBC disabled: %s\n", dev_priv->fbc.no_fbc_reason); - if (INTEL_INFO(dev_priv)->gen >= 7) + if (INTEL_GEN(dev_priv) >= 7) seq_printf(m, "Compressing: %s\n", yesno(I915_READ(FBC_STATUS2) & FBC_COMPRESSION_MASK)); @@ -1756,10 +1648,9 @@ static int i915_fbc_status(struct seq_file *m, void *unused) static int i915_fbc_fc_get(void *data, u64 *val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; - if (INTEL_INFO(dev)->gen < 7 || !HAS_FBC(dev)) + if (INTEL_GEN(dev_priv) < 7 || !HAS_FBC(dev_priv)) return -ENODEV; *val = dev_priv->fbc.false_color; @@ -1769,11 +1660,10 @@ static int i915_fbc_fc_get(void *data, u64 *val) static int i915_fbc_fc_set(void *data, u64 val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; u32 reg; - if (INTEL_INFO(dev)->gen < 7 || !HAS_FBC(dev)) + if (INTEL_GEN(dev_priv) < 7 || !HAS_FBC(dev_priv)) return -ENODEV; mutex_lock(&dev_priv->fbc.lock); @@ -1795,11 +1685,9 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_fc_fops, static int i915_ips_status(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); - if (!HAS_IPS(dev)) { + if (!HAS_IPS(dev_priv)) { seq_puts(m, "not supported\n"); return 0; } @@ -1809,7 +1697,7 @@ static int i915_ips_status(struct seq_file *m, void *unused) seq_printf(m, "Enabled by kernel parameter: %s\n", yesno(i915.enable_ips)); - if (INTEL_INFO(dev)->gen >= 8) { + if (INTEL_GEN(dev_priv) >= 8) { seq_puts(m, "Currently: unknown\n"); } else { if (I915_READ(IPS_CTL) & IPS_ENABLE) @@ -1825,23 +1713,21 @@ static int i915_ips_status(struct seq_file *m, void *unused) static int i915_sr_status(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); bool sr_enabled = false; intel_runtime_pm_get(dev_priv); - if (HAS_PCH_SPLIT(dev)) + if (HAS_PCH_SPLIT(dev_priv)) sr_enabled = I915_READ(WM1_LP_ILK) & WM1_LP_SR_EN; - else if (IS_CRESTLINE(dev) || IS_G4X(dev) || - IS_I945G(dev) || IS_I945GM(dev)) + else if (IS_CRESTLINE(dev_priv) || IS_G4X(dev_priv) || + IS_I945G(dev_priv) || IS_I945GM(dev_priv)) sr_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN; - else if (IS_I915GM(dev)) + else if (IS_I915GM(dev_priv)) sr_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN; - else if (IS_PINEVIEW(dev)) + else if (IS_PINEVIEW(dev_priv)) sr_enabled = I915_READ(DSPFW3) & PINEVIEW_SELF_REFRESH_EN; - else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; intel_runtime_pm_put(dev_priv); @@ -1854,13 +1740,12 @@ static int i915_sr_status(struct seq_file *m, void *unused) static int i915_emon_status(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; unsigned long temp, chipset, gfx; int ret; - if (!IS_GEN5(dev)) + if (!IS_GEN5(dev_priv)) return -ENODEV; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -1882,27 +1767,23 @@ static int i915_emon_status(struct seq_file *m, void *unused) static int i915_ring_freq_table(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); int ret = 0; int gpu_freq, ia_freq; unsigned int max_gpu_freq, min_gpu_freq; - if (!HAS_CORE_RING_FREQ(dev)) { + if (!HAS_LLC(dev_priv)) { seq_puts(m, "unsupported on this chipset\n"); return 0; } intel_runtime_pm_get(dev_priv); - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); if (ret) goto out; - if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER; @@ -1922,7 +1803,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) &ia_freq); seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", intel_gpu_freq(dev_priv, (gpu_freq * - (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ? + (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); @@ -1937,9 +1818,8 @@ out: static int i915_opregion(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_opregion *opregion = &dev_priv->opregion; int ret; @@ -1958,10 +1838,7 @@ out: static int i915_vbt(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_opregion *opregion = &dev_priv->opregion; + struct intel_opregion *opregion = &node_to_i915(m->private)->opregion; if (opregion->vbt) seq_write(m, opregion->vbt, opregion->vbt_size); @@ -1971,8 +1848,8 @@ static int i915_vbt(struct seq_file *m, void *unused) static int i915_gem_framebuffer_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_framebuffer *fbdev_fb = NULL; struct drm_framebuffer *drm_fb; int ret; @@ -1982,8 +1859,8 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) return ret; #ifdef CONFIG_DRM_FBDEV_EMULATION - if (to_i915(dev)->fbdev) { - fbdev_fb = to_intel_framebuffer(to_i915(dev)->fbdev->helper.fb); + if (dev_priv->fbdev) { + fbdev_fb = to_intel_framebuffer(dev_priv->fbdev->helper.fb); seq_printf(m, "fbcon size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ", fbdev_fb->base.width, @@ -2019,19 +1896,17 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) return 0; } -static void describe_ctx_ringbuf(struct seq_file *m, - struct intel_ringbuffer *ringbuf) +static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) { seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)", - ringbuf->space, ringbuf->head, ringbuf->tail, - ringbuf->last_retired_head); + ring->space, ring->head, ring->tail, + ring->last_retired_head); } static int i915_context_status(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_engine_cs *engine; struct i915_gem_context *ctx; int ret; @@ -2042,18 +1917,17 @@ static int i915_context_status(struct seq_file *m, void *unused) list_for_each_entry(ctx, &dev_priv->context_list, link) { seq_printf(m, "HW context %u ", ctx->hw_id); - if (IS_ERR(ctx->file_priv)) { - seq_puts(m, "(deleted) "); - } else if (ctx->file_priv) { - struct pid *pid = ctx->file_priv->file->pid; + if (ctx->pid) { struct task_struct *task; - task = get_pid_task(pid, PIDTYPE_PID); + task = get_pid_task(ctx->pid, PIDTYPE_PID); if (task) { seq_printf(m, "(%s [%d]) ", task->comm, task->pid); put_task_struct(task); } + } else if (IS_ERR(ctx->file_priv)) { + seq_puts(m, "(deleted) "); } else { seq_puts(m, "(kernel) "); } @@ -2067,9 +1941,9 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_printf(m, "%s: ", engine->name); seq_putc(m, ce->initialised ? 'I' : 'i'); if (ce->state) - describe_obj(m, ce->state); - if (ce->ringbuf) - describe_ctx_ringbuf(m, ce->ringbuf); + describe_obj(m, ce->state->obj); + if (ce->ring) + describe_ctx_ring(m, ce->ring); seq_putc(m, '\n'); } @@ -2085,36 +1959,34 @@ static void i915_dump_lrc_obj(struct seq_file *m, struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state; + struct i915_vma *vma = ctx->engine[engine->id].state; struct page *page; - uint32_t *reg_state; int j; - unsigned long ggtt_offset = 0; seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id); - if (ctx_obj == NULL) { - seq_puts(m, "\tNot allocated\n"); + if (!vma) { + seq_puts(m, "\tFake context\n"); return; } - if (!i915_gem_obj_ggtt_bound(ctx_obj)) - seq_puts(m, "\tNot bound in GGTT\n"); - else - ggtt_offset = i915_gem_obj_ggtt_offset(ctx_obj); + if (vma->flags & I915_VMA_GLOBAL_BIND) + seq_printf(m, "\tBound in GGTT at 0x%08x\n", + i915_ggtt_offset(vma)); - if (i915_gem_object_get_pages(ctx_obj)) { - seq_puts(m, "\tFailed to get pages for context object\n"); + if (i915_gem_object_get_pages(vma->obj)) { + seq_puts(m, "\tFailed to get pages for context object\n\n"); return; } - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - if (!WARN_ON(page == NULL)) { - reg_state = kmap_atomic(page); + page = i915_gem_object_get_page(vma->obj, LRC_STATE_PN); + if (page) { + u32 *reg_state = kmap_atomic(page); for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) { - seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n", - ggtt_offset + 4096 + (j * 4), + seq_printf(m, + "\t[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x\n", + j * 4, reg_state[j], reg_state[j + 1], reg_state[j + 2], reg_state[j + 3]); } @@ -2126,9 +1998,8 @@ static void i915_dump_lrc_obj(struct seq_file *m, static int i915_dump_lrc(struct seq_file *m, void *unused) { - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_engine_cs *engine; struct i915_gem_context *ctx; int ret; @@ -2153,9 +2024,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused) static int i915_execlists(struct seq_file *m, void *data) { - struct drm_info_node *node = (struct drm_info_node *)m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_engine_cs *engine; u32 status_pointer; u8 read_pointer; @@ -2190,7 +2060,7 @@ static int i915_execlists(struct seq_file *m, void *data) status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer); - read_pointer = engine->next_context_status_buffer; + read_pointer = GEN8_CSB_READ_PTR(status_pointer); write_pointer = GEN8_CSB_WRITE_PTR(status_pointer); if (read_pointer > write_pointer) write_pointer += GEN8_CSB_ENTRIES; @@ -2256,9 +2126,8 @@ static const char *swizzle_string(unsigned swizzle) static int i915_swizzle_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; int ret; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -2271,7 +2140,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data) seq_printf(m, "bit6 swizzle for Y-tiling = %s\n", swizzle_string(dev_priv->mm.bit_6_swizzle_y)); - if (IS_GEN3(dev) || IS_GEN4(dev)) { + if (IS_GEN3(dev_priv) || IS_GEN4(dev_priv)) { seq_printf(m, "DDC = 0x%08x\n", I915_READ(DCC)); seq_printf(m, "DDC2 = 0x%08x\n", @@ -2280,7 +2149,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data) I915_READ16(C0DRB3)); seq_printf(m, "C1DRB3 = 0x%04x\n", I915_READ16(C1DRB3)); - } else if (INTEL_INFO(dev)->gen >= 6) { + } else if (INTEL_GEN(dev_priv) >= 6) { seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n", I915_READ(MAD_DIMM_C0)); seq_printf(m, "MAD_DIMM_C1 = 0x%08x\n", @@ -2289,7 +2158,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data) I915_READ(MAD_DIMM_C2)); seq_printf(m, "TILECTL = 0x%08x\n", I915_READ(TILECTL)); - if (INTEL_INFO(dev)->gen >= 8) + if (INTEL_GEN(dev_priv) >= 8) seq_printf(m, "GAMTARBMODE = 0x%08x\n", I915_READ(GAMTARBMODE)); else @@ -2329,9 +2198,9 @@ static int per_file_ctx(int id, void *ptr, void *data) return 0; } -static void gen8_ppgtt_info(struct seq_file *m, struct drm_device *dev) +static void gen8_ppgtt_info(struct seq_file *m, + struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_engine_cs *engine; struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; int i; @@ -2350,9 +2219,9 @@ static void gen8_ppgtt_info(struct seq_file *m, struct drm_device *dev) } } -static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev) +static void gen6_ppgtt_info(struct seq_file *m, + struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_engine_cs *engine; if (IS_GEN6(dev_priv)) @@ -2384,22 +2253,23 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev) static int i915_ppgtt_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct drm_file *file; + int ret; - int ret = mutex_lock_interruptible(&dev->struct_mutex); + mutex_lock(&dev->filelist_mutex); + ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret) - return ret; + goto out_unlock; + intel_runtime_pm_get(dev_priv); - if (INTEL_INFO(dev)->gen >= 8) - gen8_ppgtt_info(m, dev); - else if (INTEL_INFO(dev)->gen >= 6) - gen6_ppgtt_info(m, dev); + if (INTEL_GEN(dev_priv) >= 8) + gen8_ppgtt_info(m, dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_ppgtt_info(m, dev_priv); - mutex_lock(&dev->filelist_mutex); list_for_each_entry_reverse(file, &dev->filelist, lhead) { struct drm_i915_file_private *file_priv = file->driver_priv; struct task_struct *task; @@ -2407,19 +2277,19 @@ static int i915_ppgtt_info(struct seq_file *m, void *data) task = get_pid_task(file->pid, PIDTYPE_PID); if (!task) { ret = -ESRCH; - goto out_unlock; + goto out_rpm; } seq_printf(m, "\nproc: %s\n", task->comm); put_task_struct(task); idr_for_each(&file_priv->context_idr, per_file_ctx, (void *)(unsigned long)m); } -out_unlock: - mutex_unlock(&dev->filelist_mutex); +out_rpm: intel_runtime_pm_put(dev_priv); mutex_unlock(&dev->struct_mutex); - +out_unlock: + mutex_unlock(&dev->filelist_mutex); return ret; } @@ -2434,23 +2304,41 @@ static int count_irq_waiters(struct drm_i915_private *i915) return count; } +static const char *rps_power_to_str(unsigned int power) +{ + static const char * const strings[] = { + [LOW_POWER] = "low power", + [BETWEEN] = "mixed", + [HIGH_POWER] = "high power", + }; + + if (power >= ARRAY_SIZE(strings) || !strings[power]) + return "unknown"; + + return strings[power]; +} + static int i915_rps_boost_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct drm_file *file; seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled); seq_printf(m, "GPU busy? %s [%x]\n", yesno(dev_priv->gt.awake), dev_priv->gt.active_engines); seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); - seq_printf(m, "Frequency requested %d; min hard:%d, soft:%d; max soft:%d, hard:%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), + seq_printf(m, "Frequency requested %d\n", + intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit), intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit), intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", + intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), + intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), + intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); mutex_lock(&dev->filelist_mutex); spin_lock(&dev_priv->rps.client_lock); @@ -2467,27 +2355,44 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) list_empty(&file_priv->rps.link) ? "" : ", active"); rcu_read_unlock(); } - seq_printf(m, "Semaphore boosts: %d%s\n", - dev_priv->rps.semaphores.boosts, - list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active"); - seq_printf(m, "MMIO flip boosts: %d%s\n", - dev_priv->rps.mmioflips.boosts, - list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active"); - seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts); + seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts); spin_unlock(&dev_priv->rps.client_lock); mutex_unlock(&dev->filelist_mutex); + if (INTEL_GEN(dev_priv) >= 6 && + dev_priv->rps.enabled && + dev_priv->gt.active_engines) { + u32 rpup, rpupei; + u32 rpdown, rpdownei; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + rpup = I915_READ_FW(GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK; + rpupei = I915_READ_FW(GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK; + rpdown = I915_READ_FW(GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK; + rpdownei = I915_READ_FW(GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK; + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", + rps_power_to_str(dev_priv->rps.power)); + seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", + 100 * rpup / rpupei, + dev_priv->rps.up_threshold); + seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", + 100 * rpdown / rpdownei, + dev_priv->rps.down_threshold); + } else { + seq_puts(m, "\nRPS Autotuning inactive\n"); + } + return 0; } static int i915_llc(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); const bool edram = INTEL_GEN(dev_priv) > 8; - seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(dev))); + seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(dev_priv))); seq_printf(m, "%s: %lluMB\n", edram ? "eDRAM" : "eLLC", intel_uncore_edram_size(dev_priv)/1024/1024); @@ -2496,8 +2401,7 @@ static int i915_llc(struct seq_file *m, void *data) static int i915_guc_load_status_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_i915_private *dev_priv = to_i915(node->minor->dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; u32 tmp, i; @@ -2543,6 +2447,7 @@ static void i915_guc_client_info(struct seq_file *m, struct i915_guc_client *client) { struct intel_engine_cs *engine; + enum intel_engine_id id; uint64_t tot = 0; seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", @@ -2553,27 +2458,26 @@ static void i915_guc_client_info(struct seq_file *m, client->wq_size, client->wq_offset, client->wq_tail); seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space); - seq_printf(m, "\tFailed to queue: %u\n", client->q_fail); seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail); seq_printf(m, "\tLast submission result: %d\n", client->retcode); - for_each_engine(engine, dev_priv) { + for_each_engine_id(engine, dev_priv, id) { + u64 submissions = client->submissions[id]; + tot += submissions; seq_printf(m, "\tSubmissions: %llu %s\n", - client->submissions[engine->id], - engine->name); - tot += client->submissions[engine->id]; + submissions, engine->name); } seq_printf(m, "\tTotal: %llu\n", tot); } static int i915_guc_info(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_guc guc; struct i915_guc_client client = {}; struct intel_engine_cs *engine; + enum intel_engine_id id; u64 total = 0; if (!HAS_GUC_SCHED(dev_priv)) @@ -2600,11 +2504,11 @@ static int i915_guc_info(struct seq_file *m, void *data) seq_printf(m, "GuC last action error code: %d\n", guc.action_err); seq_printf(m, "\nGuC submissions:\n"); - for_each_engine(engine, dev_priv) { + for_each_engine_id(engine, dev_priv, id) { + u64 submissions = guc.submissions[id]; + total += submissions; seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n", - engine->name, guc.submissions[engine->id], - guc.last_seqno[engine->id]); - total += guc.submissions[engine->id]; + engine->name, submissions, guc.last_seqno[id]); } seq_printf(m, "\t%s: %llu\n", "Total", total); @@ -2618,18 +2522,16 @@ static int i915_guc_info(struct seq_file *m, void *data) static int i915_guc_log_dump(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *log_obj = dev_priv->guc.log_obj; - u32 *log; + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_i915_gem_object *obj; int i = 0, pg; - if (!log_obj) + if (!dev_priv->guc.log_vma) return 0; - for (pg = 0; pg < log_obj->base.size / PAGE_SIZE; pg++) { - log = kmap_atomic(i915_gem_object_get_page(log_obj, pg)); + obj = dev_priv->guc.log_vma->obj; + for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) { + u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg)); for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4) seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", @@ -2646,15 +2548,13 @@ static int i915_guc_log_dump(struct seq_file *m, void *data) static int i915_edp_psr_status(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); u32 psrperf = 0; u32 stat[3]; enum pipe pipe; bool enabled = false; - if (!HAS_PSR(dev)) { + if (!HAS_PSR(dev_priv)) { seq_puts(m, "PSR not supported\n"); return 0; } @@ -2671,7 +2571,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) seq_printf(m, "Re-enable work scheduled: %s\n", yesno(work_busy(&dev_priv->psr.work.work))); - if (HAS_DDI(dev)) + if (HAS_DDI(dev_priv)) enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE; else { for_each_pipe(dev_priv, pipe) { @@ -2688,7 +2588,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) seq_printf(m, "HW Enabled & Active bit: %s", yesno(enabled)); - if (!HAS_DDI(dev)) + if (!HAS_DDI(dev_priv)) for_each_pipe(dev_priv, pipe) { if ((stat[pipe] == VLV_EDP_PSR_ACTIVE_NORFB_UP) || (stat[pipe] == VLV_EDP_PSR_ACTIVE_SF_UPDATE)) @@ -2700,7 +2600,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) * VLV/CHV PSR has no kind of performance counter * SKL+ Perf counter is reset to 0 everytime DC state is entered */ - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { psrperf = I915_READ(EDP_PSR_PERF_CNT) & EDP_PSR_PERF_CNT_MASK; @@ -2714,8 +2614,8 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) static int i915_sink_crc(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_connector *connector; struct intel_dp *intel_dp = NULL; int ret; @@ -2754,13 +2654,11 @@ out: static int i915_energy_uJ(struct seq_file *m, void *data) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); u64 power; u32 units; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return -ENODEV; intel_runtime_pm_get(dev_priv); @@ -2780,9 +2678,8 @@ static int i915_energy_uJ(struct seq_file *m, void *data) static int i915_runtime_pm_status(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct pci_dev *pdev = dev_priv->drm.pdev; if (!HAS_RUNTIME_PM(dev_priv)) seq_puts(m, "Runtime power management not supported\n"); @@ -2792,22 +2689,20 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) yesno(!intel_irqs_enabled(dev_priv))); #ifdef CONFIG_PM seq_printf(m, "Usage count: %d\n", - atomic_read(&dev->dev->power.usage_count)); + atomic_read(&dev_priv->drm.dev->power.usage_count)); #else seq_printf(m, "Device Power Management (CONFIG_PM) disabled\n"); #endif seq_printf(m, "PCI device power state: %s [%d]\n", - pci_power_name(dev_priv->drm.pdev->current_state), - dev_priv->drm.pdev->current_state); + pci_power_name(pdev->current_state), + pdev->current_state); return 0; } static int i915_power_domain_info(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); struct i915_power_domains *power_domains = &dev_priv->power_domains; int i; @@ -2840,12 +2735,10 @@ static int i915_power_domain_info(struct seq_file *m, void *unused) static int i915_dmc_info(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_csr *csr; - if (!HAS_CSR(dev)) { + if (!HAS_CSR(dev_priv)) { seq_puts(m, "not supported\n"); return 0; } @@ -2863,12 +2756,12 @@ static int i915_dmc_info(struct seq_file *m, void *unused) seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version), CSR_VERSION_MINOR(csr->version)); - if (IS_SKYLAKE(dev) && csr->version >= CSR_VERSION(1, 6)) { + if (IS_SKYLAKE(dev_priv) && csr->version >= CSR_VERSION(1, 6)) { seq_printf(m, "DC3 -> DC5 count: %d\n", I915_READ(SKL_CSR_DC3_DC5_COUNT)); seq_printf(m, "DC5 -> DC6 count: %d\n", I915_READ(SKL_CSR_DC5_DC6_COUNT)); - } else if (IS_BROXTON(dev) && csr->version >= CSR_VERSION(1, 4)) { + } else if (IS_BROXTON(dev_priv) && csr->version >= CSR_VERSION(1, 4)) { seq_printf(m, "DC3 -> DC5 count: %d\n", I915_READ(BXT_CSR_DC3_DC5_COUNT)); } @@ -2905,8 +2798,8 @@ static void intel_encoder_info(struct seq_file *m, struct intel_crtc *intel_crtc, struct intel_encoder *intel_encoder) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct drm_crtc *crtc = &intel_crtc->base; struct intel_connector *intel_connector; struct drm_encoder *encoder; @@ -2932,8 +2825,8 @@ static void intel_encoder_info(struct seq_file *m, static void intel_crtc_info(struct seq_file *m, struct intel_crtc *intel_crtc) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct drm_crtc *crtc = &intel_crtc->base; struct intel_encoder *intel_encoder; struct drm_plane_state *plane_state = crtc->primary->state; @@ -2967,6 +2860,9 @@ static void intel_dp_info(struct seq_file *m, seq_printf(m, "\taudio support: %s\n", yesno(intel_dp->has_audio)); if (intel_connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) intel_panel_info(m, &intel_connector->panel); + + drm_dp_downstream_debug(m, intel_dp->dpcd, intel_dp->downstream_ports, + &intel_dp->aux); } static void intel_hdmi_info(struct seq_file *m, @@ -3031,12 +2927,11 @@ static void intel_connector_info(struct seq_file *m, intel_seq_print_mode(m, 2, mode); } -static bool cursor_active(struct drm_device *dev, int pipe) +static bool cursor_active(struct drm_i915_private *dev_priv, int pipe) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 state; - if (IS_845G(dev) || IS_I865G(dev)) + if (IS_845G(dev_priv) || IS_I865G(dev_priv)) state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; else state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; @@ -3044,9 +2939,9 @@ static bool cursor_active(struct drm_device *dev, int pipe) return state; } -static bool cursor_position(struct drm_device *dev, int pipe, int *x, int *y) +static bool cursor_position(struct drm_i915_private *dev_priv, + int pipe, int *x, int *y) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 pos; pos = I915_READ(CURPOS(pipe)); @@ -3059,7 +2954,7 @@ static bool cursor_position(struct drm_device *dev, int pipe, int *x, int *y) if (pos & (CURSOR_POS_SIGN << CURSOR_Y_SHIFT)) *y = -*y; - return cursor_active(dev, pipe); + return cursor_active(dev_priv, pipe); } static const char *plane_type(enum drm_plane_type type) @@ -3089,12 +2984,12 @@ static const char *plane_rotation(unsigned int rotation) */ snprintf(buf, sizeof(buf), "%s%s%s%s%s%s(0x%08x)", - (rotation & BIT(DRM_ROTATE_0)) ? "0 " : "", - (rotation & BIT(DRM_ROTATE_90)) ? "90 " : "", - (rotation & BIT(DRM_ROTATE_180)) ? "180 " : "", - (rotation & BIT(DRM_ROTATE_270)) ? "270 " : "", - (rotation & BIT(DRM_REFLECT_X)) ? "FLIPX " : "", - (rotation & BIT(DRM_REFLECT_Y)) ? "FLIPY " : "", + (rotation & DRM_ROTATE_0) ? "0 " : "", + (rotation & DRM_ROTATE_90) ? "90 " : "", + (rotation & DRM_ROTATE_180) ? "180 " : "", + (rotation & DRM_ROTATE_270) ? "270 " : "", + (rotation & DRM_REFLECT_X) ? "FLIPX " : "", + (rotation & DRM_REFLECT_Y) ? "FLIPY " : "", rotation); return buf; @@ -3102,13 +2997,14 @@ static const char *plane_rotation(unsigned int rotation) static void intel_plane_info(struct seq_file *m, struct intel_crtc *intel_crtc) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_plane *intel_plane; for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { struct drm_plane_state *state; struct drm_plane *plane = &intel_plane->base; + char *format_name; if (!plane->state) { seq_puts(m, "plane->state is NULL!\n"); @@ -3117,6 +3013,12 @@ static void intel_plane_info(struct seq_file *m, struct intel_crtc *intel_crtc) state = plane->state; + if (state->fb) { + format_name = drm_get_format_name(state->fb->pixel_format); + } else { + format_name = kstrdup("N/A", GFP_KERNEL); + } + seq_printf(m, "\t--Plane id %d: type=%s, crtc_pos=%4dx%4d, crtc_size=%4dx%4d, src_pos=%d.%04ux%d.%04u, src_size=%d.%04ux%d.%04u, format=%s, rotation=%s\n", plane->base.id, plane_type(intel_plane->base.type), @@ -3130,8 +3032,10 @@ static void intel_plane_info(struct seq_file *m, struct intel_crtc *intel_crtc) ((state->src_w & 0xffff) * 15625) >> 10, (state->src_h >> 16), ((state->src_h & 0xffff) * 15625) >> 10, - state->fb ? drm_get_format_name(state->fb->pixel_format) : "N/A", + format_name, plane_rotation(state->rotation)); + + kfree(format_name); } } @@ -3165,9 +3069,8 @@ static void intel_scaler_info(struct seq_file *m, struct intel_crtc *intel_crtc) static int i915_display_info(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_crtc *crtc; struct drm_connector *connector; @@ -3191,7 +3094,7 @@ static int i915_display_info(struct seq_file *m, void *unused) if (pipe_config->base.active) { intel_crtc_info(m, crtc); - active = cursor_position(dev, crtc->pipe, &x, &y); + active = cursor_position(dev_priv, crtc->pipe, &x, &y); seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x, active? %s\n", yesno(crtc->cursor_base), x, y, crtc->base.cursor->state->crtc_w, @@ -3220,15 +3123,14 @@ static int i915_display_info(struct seq_file *m, void *unused) static int i915_semaphore_status(struct seq_file *m, void *unused) { - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_engine_cs *engine; - int num_rings = hweight32(INTEL_INFO(dev)->ring_mask); + int num_rings = INTEL_INFO(dev_priv)->num_rings; enum intel_engine_id id; int j, ret; - if (!i915_semaphore_is_enabled(dev_priv)) { + if (!i915.semaphores) { seq_puts(m, "Semaphores are disabled\n"); return 0; } @@ -3238,11 +3140,11 @@ static int i915_semaphore_status(struct seq_file *m, void *unused) return ret; intel_runtime_pm_get(dev_priv); - if (IS_BROADWELL(dev)) { + if (IS_BROADWELL(dev_priv)) { struct page *page; uint64_t *seqno; - page = i915_gem_object_get_page(dev_priv->semaphore_obj, 0); + page = i915_gem_object_get_page(dev_priv->semaphore->obj, 0); seqno = (uint64_t *)kmap_atomic(page); for_each_engine_id(engine, dev_priv, id) { @@ -3293,9 +3195,8 @@ static int i915_semaphore_status(struct seq_file *m, void *unused) static int i915_shared_dplls_info(struct seq_file *m, void *unused) { - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; int i; drm_modeset_lock_all(dev); @@ -3323,9 +3224,8 @@ static int i915_wa_registers(struct seq_file *m, void *unused) int i; int ret; struct intel_engine_cs *engine; - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct i915_workarounds *workarounds = &dev_priv->workarounds; enum intel_engine_id id; @@ -3361,15 +3261,14 @@ static int i915_wa_registers(struct seq_file *m, void *unused) static int i915_ddb_info(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct skl_ddb_allocation *ddb; struct skl_ddb_entry *entry; enum pipe pipe; int plane; - if (INTEL_INFO(dev)->gen < 9) + if (INTEL_GEN(dev_priv) < 9) return 0; drm_modeset_lock_all(dev); @@ -3399,7 +3298,8 @@ static int i915_ddb_info(struct seq_file *m, void *unused) } static void drrs_status_per_crtc(struct seq_file *m, - struct drm_device *dev, struct intel_crtc *intel_crtc) + struct drm_device *dev, + struct intel_crtc *intel_crtc) { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_drrs *drrs = &dev_priv->drrs; @@ -3468,8 +3368,8 @@ static void drrs_status_per_crtc(struct seq_file *m, static int i915_drrs_status(struct seq_file *m, void *unused) { - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_crtc *intel_crtc; int active_crtc_cnt = 0; @@ -3492,14 +3392,14 @@ static int i915_drrs_status(struct seq_file *m, void *unused) struct pipe_crc_info { const char *name; - struct drm_device *dev; + struct drm_i915_private *dev_priv; enum pipe pipe; }; static int i915_dp_mst_info(struct seq_file *m, void *unused) { - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_device *dev = &dev_priv->drm; struct intel_encoder *intel_encoder; struct intel_digital_port *intel_dig_port; struct drm_connector *connector; @@ -3528,10 +3428,10 @@ static int i915_dp_mst_info(struct seq_file *m, void *unused) static int i915_pipe_crc_open(struct inode *inode, struct file *filep) { struct pipe_crc_info *info = inode->i_private; - struct drm_i915_private *dev_priv = to_i915(info->dev); + struct drm_i915_private *dev_priv = info->dev_priv; struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[info->pipe]; - if (info->pipe >= INTEL_INFO(info->dev)->num_pipes) + if (info->pipe >= INTEL_INFO(dev_priv)->num_pipes) return -ENODEV; spin_lock_irq(&pipe_crc->lock); @@ -3552,7 +3452,7 @@ static int i915_pipe_crc_open(struct inode *inode, struct file *filep) static int i915_pipe_crc_release(struct inode *inode, struct file *filep) { struct pipe_crc_info *info = inode->i_private; - struct drm_i915_private *dev_priv = to_i915(info->dev); + struct drm_i915_private *dev_priv = info->dev_priv; struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[info->pipe]; spin_lock_irq(&pipe_crc->lock); @@ -3579,8 +3479,7 @@ i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count, loff_t *pos) { struct pipe_crc_info *info = filep->private_data; - struct drm_device *dev = info->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = info->dev_priv; struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[info->pipe]; char buf[PIPE_CRC_BUFFER_LEN]; int n_entries; @@ -3621,7 +3520,6 @@ i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count, while (n_entries > 0) { struct intel_pipe_crc_entry *entry = &pipe_crc->entries[pipe_crc->tail]; - int ret; if (CIRC_CNT(pipe_crc->head, pipe_crc->tail, INTEL_PIPE_CRC_ENTRIES_NR) < 1) @@ -3638,8 +3536,7 @@ i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count, spin_unlock_irq(&pipe_crc->lock); - ret = copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN); - if (ret == PIPE_CRC_LINE_LEN) + if (copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN)) return -EFAULT; user_buf += PIPE_CRC_LINE_LEN; @@ -3678,11 +3575,11 @@ static struct pipe_crc_info i915_pipe_crc_data[I915_MAX_PIPES] = { static int i915_pipe_crc_create(struct dentry *root, struct drm_minor *minor, enum pipe pipe) { - struct drm_device *dev = minor->dev; + struct drm_i915_private *dev_priv = to_i915(minor->dev); struct dentry *ent; struct pipe_crc_info *info = &i915_pipe_crc_data[pipe]; - info->dev = dev; + info->dev_priv = dev_priv; ent = debugfs_create_file(info->name, S_IRUGO, root, info, &i915_pipe_crc_fops); if (!ent) @@ -3712,8 +3609,7 @@ static const char *pipe_crc_source_name(enum intel_pipe_crc_source source) static int display_crc_ctl_show(struct seq_file *m, void *data) { - struct drm_device *dev = m->private; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = m->private; int i; for (i = 0; i < I915_MAX_PIPES; i++) @@ -3725,9 +3621,7 @@ static int display_crc_ctl_show(struct seq_file *m, void *data) static int display_crc_ctl_open(struct inode *inode, struct file *file) { - struct drm_device *dev = inode->i_private; - - return single_open(file, display_crc_ctl_show, dev); + return single_open(file, display_crc_ctl_show, inode->i_private); } static int i8xx_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source, @@ -3750,9 +3644,11 @@ static int i8xx_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source, return 0; } -static int i9xx_pipe_crc_auto_source(struct drm_device *dev, enum pipe pipe, +static int i9xx_pipe_crc_auto_source(struct drm_i915_private *dev_priv, + enum pipe pipe, enum intel_pipe_crc_source *source) { + struct drm_device *dev = &dev_priv->drm; struct intel_encoder *encoder; struct intel_crtc *crtc; struct intel_digital_port *dig_port; @@ -3802,16 +3698,15 @@ static int i9xx_pipe_crc_auto_source(struct drm_device *dev, enum pipe pipe, return ret; } -static int vlv_pipe_crc_ctl_reg(struct drm_device *dev, +static int vlv_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source *source, uint32_t *val) { - struct drm_i915_private *dev_priv = to_i915(dev); bool need_stable_symbols = false; if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) { - int ret = i9xx_pipe_crc_auto_source(dev, pipe, source); + int ret = i9xx_pipe_crc_auto_source(dev_priv, pipe, source); if (ret) return ret; } @@ -3829,7 +3724,7 @@ static int vlv_pipe_crc_ctl_reg(struct drm_device *dev, need_stable_symbols = true; break; case INTEL_PIPE_CRC_SOURCE_DP_D: - if (!IS_CHERRYVIEW(dev)) + if (!IS_CHERRYVIEW(dev_priv)) return -EINVAL; *val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_D_VLV; need_stable_symbols = true; @@ -3873,16 +3768,15 @@ static int vlv_pipe_crc_ctl_reg(struct drm_device *dev, return 0; } -static int i9xx_pipe_crc_ctl_reg(struct drm_device *dev, +static int i9xx_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source *source, uint32_t *val) { - struct drm_i915_private *dev_priv = to_i915(dev); bool need_stable_symbols = false; if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) { - int ret = i9xx_pipe_crc_auto_source(dev, pipe, source); + int ret = i9xx_pipe_crc_auto_source(dev_priv, pipe, source); if (ret) return ret; } @@ -3892,24 +3786,24 @@ static int i9xx_pipe_crc_ctl_reg(struct drm_device *dev, *val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PIPE_I9XX; break; case INTEL_PIPE_CRC_SOURCE_TV: - if (!SUPPORTS_TV(dev)) + if (!SUPPORTS_TV(dev_priv)) return -EINVAL; *val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_TV_PRE; break; case INTEL_PIPE_CRC_SOURCE_DP_B: - if (!IS_G4X(dev)) + if (!IS_G4X(dev_priv)) return -EINVAL; *val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_B_G4X; need_stable_symbols = true; break; case INTEL_PIPE_CRC_SOURCE_DP_C: - if (!IS_G4X(dev)) + if (!IS_G4X(dev_priv)) return -EINVAL; *val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_C_G4X; need_stable_symbols = true; break; case INTEL_PIPE_CRC_SOURCE_DP_D: - if (!IS_G4X(dev)) + if (!IS_G4X(dev_priv)) return -EINVAL; *val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_D_G4X; need_stable_symbols = true; @@ -3933,7 +3827,7 @@ static int i9xx_pipe_crc_ctl_reg(struct drm_device *dev, if (need_stable_symbols) { uint32_t tmp = I915_READ(PORT_DFT2_G4X); - WARN_ON(!IS_G4X(dev)); + WARN_ON(!IS_G4X(dev_priv)); I915_WRITE(PORT_DFT_I9XX, I915_READ(PORT_DFT_I9XX) | DC_BALANCE_RESET); @@ -3949,10 +3843,9 @@ static int i9xx_pipe_crc_ctl_reg(struct drm_device *dev, return 0; } -static void vlv_undo_pipe_scramble_reset(struct drm_device *dev, +static void vlv_undo_pipe_scramble_reset(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t tmp = I915_READ(PORT_DFT2_G4X); switch (pipe) { @@ -3974,10 +3867,9 @@ static void vlv_undo_pipe_scramble_reset(struct drm_device *dev, } -static void g4x_undo_pipe_scramble_reset(struct drm_device *dev, +static void g4x_undo_pipe_scramble_reset(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t tmp = I915_READ(PORT_DFT2_G4X); if (pipe == PIPE_A) @@ -4018,9 +3910,10 @@ static int ilk_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source, return 0; } -static void hsw_trans_edp_pipe_A_crc_wa(struct drm_device *dev, bool enable) +static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv, + bool enable) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_device *dev = &dev_priv->drm; struct intel_crtc *crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[PIPE_A]); struct intel_crtc_state *pipe_config; @@ -4054,7 +3947,7 @@ out: drm_atomic_state_free(state); } -static int ivb_pipe_crc_ctl_reg(struct drm_device *dev, +static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source *source, uint32_t *val) @@ -4070,8 +3963,8 @@ static int ivb_pipe_crc_ctl_reg(struct drm_device *dev, *val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_SPRITE_IVB; break; case INTEL_PIPE_CRC_SOURCE_PF: - if (IS_HASWELL(dev) && pipe == PIPE_A) - hsw_trans_edp_pipe_A_crc_wa(dev, true); + if (IS_HASWELL(dev_priv) && pipe == PIPE_A) + hsw_trans_edp_pipe_A_crc_wa(dev_priv, true); *val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PF_IVB; break; @@ -4085,13 +3978,14 @@ static int ivb_pipe_crc_ctl_reg(struct drm_device *dev, return 0; } -static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe, +static int pipe_crc_set_source(struct drm_i915_private *dev_priv, + enum pipe pipe, enum intel_pipe_crc_source source) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_device *dev = &dev_priv->drm; struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe]; - struct intel_crtc *crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev, - pipe)); + struct intel_crtc *crtc = + to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe)); enum intel_display_power_domain power_domain; u32 val = 0; /* shut up gcc */ int ret; @@ -4109,16 +4003,16 @@ static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe, return -EIO; } - if (IS_GEN2(dev)) + if (IS_GEN2(dev_priv)) ret = i8xx_pipe_crc_ctl_reg(&source, &val); - else if (INTEL_INFO(dev)->gen < 5) - ret = i9xx_pipe_crc_ctl_reg(dev, pipe, &source, &val); - else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) - ret = vlv_pipe_crc_ctl_reg(dev, pipe, &source, &val); - else if (IS_GEN5(dev) || IS_GEN6(dev)) + else if (INTEL_GEN(dev_priv) < 5) + ret = i9xx_pipe_crc_ctl_reg(dev_priv, pipe, &source, &val); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + ret = vlv_pipe_crc_ctl_reg(dev_priv, pipe, &source, &val); + else if (IS_GEN5(dev_priv) || IS_GEN6(dev_priv)) ret = ilk_pipe_crc_ctl_reg(&source, &val); else - ret = ivb_pipe_crc_ctl_reg(dev, pipe, &source, &val); + ret = ivb_pipe_crc_ctl_reg(dev_priv, pipe, &source, &val); if (ret != 0) goto out; @@ -4182,12 +4076,12 @@ static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe, kfree(entries); - if (IS_G4X(dev)) - g4x_undo_pipe_scramble_reset(dev, pipe); - else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) - vlv_undo_pipe_scramble_reset(dev, pipe); - else if (IS_HASWELL(dev) && pipe == PIPE_A) - hsw_trans_edp_pipe_A_crc_wa(dev, false); + if (IS_G4X(dev_priv)) + g4x_undo_pipe_scramble_reset(dev_priv, pipe); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + vlv_undo_pipe_scramble_reset(dev_priv, pipe); + else if (IS_HASWELL(dev_priv) && pipe == PIPE_A) + hsw_trans_edp_pipe_A_crc_wa(dev_priv, false); hsw_enable_ips(crtc); } @@ -4291,7 +4185,8 @@ display_crc_ctl_parse_source(const char *buf, enum intel_pipe_crc_source *s) return -EINVAL; } -static int display_crc_ctl_parse(struct drm_device *dev, char *buf, size_t len) +static int display_crc_ctl_parse(struct drm_i915_private *dev_priv, + char *buf, size_t len) { #define N_WORDS 3 int n_words; @@ -4322,14 +4217,14 @@ static int display_crc_ctl_parse(struct drm_device *dev, char *buf, size_t len) return -EINVAL; } - return pipe_crc_set_source(dev, pipe, source); + return pipe_crc_set_source(dev_priv, pipe, source); } static ssize_t display_crc_ctl_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { struct seq_file *m = file->private_data; - struct drm_device *dev = m->private; + struct drm_i915_private *dev_priv = m->private; char *tmpbuf; int ret; @@ -4352,7 +4247,7 @@ static ssize_t display_crc_ctl_write(struct file *file, const char __user *ubuf, } tmpbuf[len] = '\0'; - ret = display_crc_ctl_parse(dev, tmpbuf, len); + ret = display_crc_ctl_parse(dev_priv, tmpbuf, len); out: kfree(tmpbuf); @@ -4373,8 +4268,8 @@ static const struct file_operations i915_display_crc_ctl_fops = { }; static ssize_t i915_displayport_test_active_write(struct file *file, - const char __user *ubuf, - size_t len, loff_t *offp) + const char __user *ubuf, + size_t len, loff_t *offp) { char *input_buffer; int status = 0; @@ -4404,7 +4299,6 @@ static ssize_t i915_displayport_test_active_write(struct file *file, DRM_DEBUG_DRIVER("Copied %d bytes from user\n", (unsigned int)len); list_for_each_entry(connector, connector_list, head) { - if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) continue; @@ -4442,7 +4336,6 @@ static int i915_displayport_test_active_show(struct seq_file *m, void *data) struct intel_dp *intel_dp; list_for_each_entry(connector, connector_list, head) { - if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) continue; @@ -4462,11 +4355,12 @@ static int i915_displayport_test_active_show(struct seq_file *m, void *data) } static int i915_displayport_test_active_open(struct inode *inode, - struct file *file) + struct file *file) { - struct drm_device *dev = inode->i_private; + struct drm_i915_private *dev_priv = inode->i_private; - return single_open(file, i915_displayport_test_active_show, dev); + return single_open(file, i915_displayport_test_active_show, + &dev_priv->drm); } static const struct file_operations i915_displayport_test_active_fops = { @@ -4486,7 +4380,6 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) struct intel_dp *intel_dp; list_for_each_entry(connector, connector_list, head) { - if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) continue; @@ -4502,11 +4395,12 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) return 0; } static int i915_displayport_test_data_open(struct inode *inode, - struct file *file) + struct file *file) { - struct drm_device *dev = inode->i_private; + struct drm_i915_private *dev_priv = inode->i_private; - return single_open(file, i915_displayport_test_data_show, dev); + return single_open(file, i915_displayport_test_data_show, + &dev_priv->drm); } static const struct file_operations i915_displayport_test_data_fops = { @@ -4525,7 +4419,6 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data) struct intel_dp *intel_dp; list_for_each_entry(connector, connector_list, head) { - if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) continue; @@ -4544,9 +4437,10 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data) static int i915_displayport_test_type_open(struct inode *inode, struct file *file) { - struct drm_device *dev = inode->i_private; + struct drm_i915_private *dev_priv = inode->i_private; - return single_open(file, i915_displayport_test_type_show, dev); + return single_open(file, i915_displayport_test_type_show, + &dev_priv->drm); } static const struct file_operations i915_displayport_test_type_fops = { @@ -4559,13 +4453,14 @@ static const struct file_operations i915_displayport_test_type_fops = { static void wm_latency_show(struct seq_file *m, const uint16_t wm[8]) { - struct drm_device *dev = m->private; + struct drm_i915_private *dev_priv = m->private; + struct drm_device *dev = &dev_priv->drm; int level; int num_levels; - if (IS_CHERRYVIEW(dev)) + if (IS_CHERRYVIEW(dev_priv)) num_levels = 3; - else if (IS_VALLEYVIEW(dev)) + else if (IS_VALLEYVIEW(dev_priv)) num_levels = 1; else num_levels = ilk_wm_max_level(dev) + 1; @@ -4579,8 +4474,8 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8]) * - WM1+ latency values in 0.5us units * - latencies are in us on gen9/vlv/chv */ - if (INTEL_INFO(dev)->gen >= 9 || IS_VALLEYVIEW(dev) || - IS_CHERRYVIEW(dev)) + if (INTEL_GEN(dev_priv) >= 9 || IS_VALLEYVIEW(dev_priv) || + IS_CHERRYVIEW(dev_priv)) latency *= 10; else if (level > 0) latency *= 5; @@ -4594,14 +4489,13 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8]) static int pri_wm_latency_show(struct seq_file *m, void *data) { - struct drm_device *dev = m->private; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = m->private; const uint16_t *latencies; - if (INTEL_INFO(dev)->gen >= 9) + if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; else - latencies = to_i915(dev)->wm.pri_latency; + latencies = dev_priv->wm.pri_latency; wm_latency_show(m, latencies); @@ -4610,14 +4504,13 @@ static int pri_wm_latency_show(struct seq_file *m, void *data) static int spr_wm_latency_show(struct seq_file *m, void *data) { - struct drm_device *dev = m->private; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = m->private; const uint16_t *latencies; - if (INTEL_INFO(dev)->gen >= 9) + if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; else - latencies = to_i915(dev)->wm.spr_latency; + latencies = dev_priv->wm.spr_latency; wm_latency_show(m, latencies); @@ -4626,14 +4519,13 @@ static int spr_wm_latency_show(struct seq_file *m, void *data) static int cur_wm_latency_show(struct seq_file *m, void *data) { - struct drm_device *dev = m->private; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = m->private; const uint16_t *latencies; - if (INTEL_INFO(dev)->gen >= 9) + if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; else - latencies = to_i915(dev)->wm.cur_latency; + latencies = dev_priv->wm.cur_latency; wm_latency_show(m, latencies); @@ -4642,48 +4534,49 @@ static int cur_wm_latency_show(struct seq_file *m, void *data) static int pri_wm_latency_open(struct inode *inode, struct file *file) { - struct drm_device *dev = inode->i_private; + struct drm_i915_private *dev_priv = inode->i_private; - if (INTEL_INFO(dev)->gen < 5) + if (INTEL_GEN(dev_priv) < 5) return -ENODEV; - return single_open(file, pri_wm_latency_show, dev); + return single_open(file, pri_wm_latency_show, dev_priv); } static int spr_wm_latency_open(struct inode *inode, struct file *file) { - struct drm_device *dev = inode->i_private; + struct drm_i915_private *dev_priv = inode->i_private; - if (HAS_GMCH_DISPLAY(dev)) + if (HAS_GMCH_DISPLAY(dev_priv)) return -ENODEV; - return single_open(file, spr_wm_latency_show, dev); + return single_open(file, spr_wm_latency_show, dev_priv); } static int cur_wm_latency_open(struct inode *inode, struct file *file) { - struct drm_device *dev = inode->i_private; + struct drm_i915_private *dev_priv = inode->i_private; - if (HAS_GMCH_DISPLAY(dev)) + if (HAS_GMCH_DISPLAY(dev_priv)) return -ENODEV; - return single_open(file, cur_wm_latency_show, dev); + return single_open(file, cur_wm_latency_show, dev_priv); } static ssize_t wm_latency_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp, uint16_t wm[8]) { struct seq_file *m = file->private_data; - struct drm_device *dev = m->private; + struct drm_i915_private *dev_priv = m->private; + struct drm_device *dev = &dev_priv->drm; uint16_t new[8] = { 0 }; int num_levels; int level; int ret; char tmp[32]; - if (IS_CHERRYVIEW(dev)) + if (IS_CHERRYVIEW(dev_priv)) num_levels = 3; - else if (IS_VALLEYVIEW(dev)) + else if (IS_VALLEYVIEW(dev_priv)) num_levels = 1; else num_levels = ilk_wm_max_level(dev) + 1; @@ -4717,14 +4610,13 @@ static ssize_t pri_wm_latency_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { struct seq_file *m = file->private_data; - struct drm_device *dev = m->private; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = m->private; uint16_t *latencies; - if (INTEL_INFO(dev)->gen >= 9) + if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; else - latencies = to_i915(dev)->wm.pri_latency; + latencies = dev_priv->wm.pri_latency; return wm_latency_write(file, ubuf, len, offp, latencies); } @@ -4733,14 +4625,13 @@ static ssize_t spr_wm_latency_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { struct seq_file *m = file->private_data; - struct drm_device *dev = m->private; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = m->private; uint16_t *latencies; - if (INTEL_INFO(dev)->gen >= 9) + if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; else - latencies = to_i915(dev)->wm.spr_latency; + latencies = dev_priv->wm.spr_latency; return wm_latency_write(file, ubuf, len, offp, latencies); } @@ -4749,14 +4640,13 @@ static ssize_t cur_wm_latency_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { struct seq_file *m = file->private_data; - struct drm_device *dev = m->private; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = m->private; uint16_t *latencies; - if (INTEL_INFO(dev)->gen >= 9) + if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; else - latencies = to_i915(dev)->wm.cur_latency; + latencies = dev_priv->wm.cur_latency; return wm_latency_write(file, ubuf, len, offp, latencies); } @@ -4791,8 +4681,7 @@ static const struct file_operations i915_cur_wm_latency_fops = { static int i915_wedged_get(void *data, u64 *val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; *val = i915_terminally_wedged(&dev_priv->gpu_error); @@ -4802,8 +4691,7 @@ i915_wedged_get(void *data, u64 *val) static int i915_wedged_set(void *data, u64 val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; /* * There is no safeguard against this debugfs entry colliding @@ -4833,8 +4721,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, static int i915_ring_missed_irq_get(void *data, u64 *val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; *val = dev_priv->gpu_error.missed_irq_rings; return 0; @@ -4843,8 +4730,8 @@ i915_ring_missed_irq_get(void *data, u64 *val) static int i915_ring_missed_irq_set(void *data, u64 val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; + struct drm_device *dev = &dev_priv->drm; int ret; /* Lock against concurrent debugfs callers */ @@ -4864,8 +4751,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops, static int i915_ring_test_irq_get(void *data, u64 *val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; *val = dev_priv->gpu_error.test_irq_rings; @@ -4875,8 +4761,7 @@ i915_ring_test_irq_get(void *data, u64 *val) static int i915_ring_test_irq_set(void *data, u64 val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; val &= INTEL_INFO(dev_priv)->ring_mask; DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val); @@ -4908,8 +4793,8 @@ i915_drop_caches_get(void *data, u64 *val) static int i915_drop_caches_set(void *data, u64 val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; + struct drm_device *dev = &dev_priv->drm; int ret; DRM_DEBUG("Dropping caches: 0x%08llx\n", val); @@ -4921,7 +4806,9 @@ i915_drop_caches_set(void *data, u64 val) return ret; if (val & DROP_ACTIVE) { - ret = i915_gem_wait_for_idle(dev_priv); + ret = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); if (ret) goto unlock; } @@ -4948,38 +4835,25 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops, static int i915_max_freq_get(void *data, u64 *val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; + struct drm_i915_private *dev_priv = data; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); - if (ret) - return ret; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - return 0; } static int i915_max_freq_set(void *data, u64 val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; u32 hw_max, hw_min; int ret; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); @@ -5015,38 +4889,25 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops, static int i915_min_freq_get(void *data, u64 *val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; + struct drm_i915_private *dev_priv = data; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); - if (ret) - return ret; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - return 0; } static int i915_min_freq_set(void *data, u64 val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; u32 hw_max, hw_min; int ret; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); @@ -5061,7 +4922,8 @@ i915_min_freq_set(void *data, u64 val) hw_max = dev_priv->rps.max_freq; hw_min = dev_priv->rps.min_freq; - if (val < hw_min || val > hw_max || val > dev_priv->rps.max_freq_softlimit) { + if (val < hw_min || + val > hw_max || val > dev_priv->rps.max_freq_softlimit) { mutex_unlock(&dev_priv->rps.hw_lock); return -EINVAL; } @@ -5082,12 +4944,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops, static int i915_cache_sharing_get(void *data, u64 *val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; + struct drm_device *dev = &dev_priv->drm; u32 snpcr; int ret; - if (!(IS_GEN6(dev) || IS_GEN7(dev))) + if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv))) return -ENODEV; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -5098,7 +4960,7 @@ i915_cache_sharing_get(void *data, u64 *val) snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev->struct_mutex); *val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT; @@ -5108,11 +4970,10 @@ i915_cache_sharing_get(void *data, u64 *val) static int i915_cache_sharing_set(void *data, u64 val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; u32 snpcr; - if (!(IS_GEN6(dev) || IS_GEN7(dev))) + if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv))) return -ENODEV; if (val > 3) @@ -5135,18 +4996,9 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_cache_sharing_fops, i915_cache_sharing_get, i915_cache_sharing_set, "%llu\n"); -struct sseu_dev_status { - unsigned int slice_total; - unsigned int subslice_total; - unsigned int subslice_per_slice; - unsigned int eu_total; - unsigned int eu_per_subslice; -}; - -static void cherryview_sseu_device_status(struct drm_device *dev, - struct sseu_dev_status *stat) +static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, + struct sseu_dev_info *sseu) { - struct drm_i915_private *dev_priv = to_i915(dev); int ss_max = 2; int ss; u32 sig1[ss_max], sig2[ss_max]; @@ -5163,28 +5015,27 @@ static void cherryview_sseu_device_status(struct drm_device *dev, /* skip disabled subslice */ continue; - stat->slice_total = 1; - stat->subslice_per_slice++; + sseu->slice_mask = BIT(0); + sseu->subslice_mask |= BIT(ss); eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) + ((sig2[ss] & CHV_EU311_PG_ENABLE) ? 0 : 2); - stat->eu_total += eu_cnt; - stat->eu_per_subslice = max(stat->eu_per_subslice, eu_cnt); + sseu->eu_total += eu_cnt; + sseu->eu_per_subslice = max_t(unsigned int, + sseu->eu_per_subslice, eu_cnt); } - stat->subslice_total = stat->subslice_per_slice; } -static void gen9_sseu_device_status(struct drm_device *dev, - struct sseu_dev_status *stat) +static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, + struct sseu_dev_info *sseu) { - struct drm_i915_private *dev_priv = to_i915(dev); int s_max = 3, ss_max = 4; int s, ss; u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2]; /* BXT has a single slice and at most 3 subslices. */ - if (IS_BROXTON(dev)) { + if (IS_BROXTON(dev_priv)) { s_max = 1; ss_max = 3; } @@ -5205,126 +5056,134 @@ static void gen9_sseu_device_status(struct drm_device *dev, GEN9_PGCTL_SSB_EU311_ACK; for (s = 0; s < s_max; s++) { - unsigned int ss_cnt = 0; - if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) /* skip disabled slice */ continue; - stat->slice_total++; + sseu->slice_mask |= BIT(s); - if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) - ss_cnt = INTEL_INFO(dev)->subslice_per_slice; + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + sseu->subslice_mask = + INTEL_INFO(dev_priv)->sseu.subslice_mask; for (ss = 0; ss < ss_max; ss++) { unsigned int eu_cnt; - if (IS_BROXTON(dev) && - !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) - /* skip disabled subslice */ - continue; + if (IS_BROXTON(dev_priv)) { + if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) + /* skip disabled subslice */ + continue; - if (IS_BROXTON(dev)) - ss_cnt++; + sseu->subslice_mask |= BIT(ss); + } eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & eu_mask[ss%2]); - stat->eu_total += eu_cnt; - stat->eu_per_subslice = max(stat->eu_per_subslice, - eu_cnt); + sseu->eu_total += eu_cnt; + sseu->eu_per_subslice = max_t(unsigned int, + sseu->eu_per_subslice, + eu_cnt); } - - stat->subslice_total += ss_cnt; - stat->subslice_per_slice = max(stat->subslice_per_slice, - ss_cnt); } } -static void broadwell_sseu_device_status(struct drm_device *dev, - struct sseu_dev_status *stat) +static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, + struct sseu_dev_info *sseu) { - struct drm_i915_private *dev_priv = to_i915(dev); - int s; u32 slice_info = I915_READ(GEN8_GT_SLICE_INFO); + int s; - stat->slice_total = hweight32(slice_info & GEN8_LSLICESTAT_MASK); + sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; - if (stat->slice_total) { - stat->subslice_per_slice = INTEL_INFO(dev)->subslice_per_slice; - stat->subslice_total = stat->slice_total * - stat->subslice_per_slice; - stat->eu_per_subslice = INTEL_INFO(dev)->eu_per_subslice; - stat->eu_total = stat->eu_per_subslice * stat->subslice_total; + if (sseu->slice_mask) { + sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; + sseu->eu_per_subslice = + INTEL_INFO(dev_priv)->sseu.eu_per_subslice; + sseu->eu_total = sseu->eu_per_subslice * + sseu_subslice_total(sseu); /* subtract fused off EU(s) from enabled slice(s) */ - for (s = 0; s < stat->slice_total; s++) { - u8 subslice_7eu = INTEL_INFO(dev)->subslice_7eu[s]; + for (s = 0; s < fls(sseu->slice_mask); s++) { + u8 subslice_7eu = + INTEL_INFO(dev_priv)->sseu.subslice_7eu[s]; - stat->eu_total -= hweight8(subslice_7eu); + sseu->eu_total -= hweight8(subslice_7eu); } } } -static int i915_sseu_status(struct seq_file *m, void *unused) +static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, + const struct sseu_dev_info *sseu) { - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct sseu_dev_status stat; + struct drm_i915_private *dev_priv = node_to_i915(m->private); + const char *type = is_available_info ? "Available" : "Enabled"; - if (INTEL_INFO(dev)->gen < 8) - return -ENODEV; + seq_printf(m, " %s Slice Mask: %04x\n", type, + sseu->slice_mask); + seq_printf(m, " %s Slice Total: %u\n", type, + hweight8(sseu->slice_mask)); + seq_printf(m, " %s Subslice Total: %u\n", type, + sseu_subslice_total(sseu)); + seq_printf(m, " %s Subslice Mask: %04x\n", type, + sseu->subslice_mask); + seq_printf(m, " %s Subslice Per Slice: %u\n", type, + hweight8(sseu->subslice_mask)); + seq_printf(m, " %s EU Total: %u\n", type, + sseu->eu_total); + seq_printf(m, " %s EU Per Subslice: %u\n", type, + sseu->eu_per_subslice); + + if (!is_available_info) + return; + + seq_printf(m, " Has Pooled EU: %s\n", yesno(HAS_POOLED_EU(dev_priv))); + if (HAS_POOLED_EU(dev_priv)) + seq_printf(m, " Min EU in pool: %u\n", sseu->min_eu_in_pool); - seq_puts(m, "SSEU Device Info\n"); - seq_printf(m, " Available Slice Total: %u\n", - INTEL_INFO(dev)->slice_total); - seq_printf(m, " Available Subslice Total: %u\n", - INTEL_INFO(dev)->subslice_total); - seq_printf(m, " Available Subslice Per Slice: %u\n", - INTEL_INFO(dev)->subslice_per_slice); - seq_printf(m, " Available EU Total: %u\n", - INTEL_INFO(dev)->eu_total); - seq_printf(m, " Available EU Per Subslice: %u\n", - INTEL_INFO(dev)->eu_per_subslice); - seq_printf(m, " Has Pooled EU: %s\n", yesno(HAS_POOLED_EU(dev))); - if (HAS_POOLED_EU(dev)) - seq_printf(m, " Min EU in pool: %u\n", - INTEL_INFO(dev)->min_eu_in_pool); seq_printf(m, " Has Slice Power Gating: %s\n", - yesno(INTEL_INFO(dev)->has_slice_pg)); + yesno(sseu->has_slice_pg)); seq_printf(m, " Has Subslice Power Gating: %s\n", - yesno(INTEL_INFO(dev)->has_subslice_pg)); + yesno(sseu->has_subslice_pg)); seq_printf(m, " Has EU Power Gating: %s\n", - yesno(INTEL_INFO(dev)->has_eu_pg)); + yesno(sseu->has_eu_pg)); +} + +static int i915_sseu_status(struct seq_file *m, void *unused) +{ + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct sseu_dev_info sseu; + + if (INTEL_GEN(dev_priv) < 8) + return -ENODEV; + + seq_puts(m, "SSEU Device Info\n"); + i915_print_sseu_info(m, true, &INTEL_INFO(dev_priv)->sseu); seq_puts(m, "SSEU Device Status\n"); - memset(&stat, 0, sizeof(stat)); - if (IS_CHERRYVIEW(dev)) { - cherryview_sseu_device_status(dev, &stat); - } else if (IS_BROADWELL(dev)) { - broadwell_sseu_device_status(dev, &stat); - } else if (INTEL_INFO(dev)->gen >= 9) { - gen9_sseu_device_status(dev, &stat); - } - seq_printf(m, " Enabled Slice Total: %u\n", - stat.slice_total); - seq_printf(m, " Enabled Subslice Total: %u\n", - stat.subslice_total); - seq_printf(m, " Enabled Subslice Per Slice: %u\n", - stat.subslice_per_slice); - seq_printf(m, " Enabled EU Total: %u\n", - stat.eu_total); - seq_printf(m, " Enabled EU Per Subslice: %u\n", - stat.eu_per_subslice); + memset(&sseu, 0, sizeof(sseu)); + + intel_runtime_pm_get(dev_priv); + + if (IS_CHERRYVIEW(dev_priv)) { + cherryview_sseu_device_status(dev_priv, &sseu); + } else if (IS_BROADWELL(dev_priv)) { + broadwell_sseu_device_status(dev_priv, &sseu); + } else if (INTEL_GEN(dev_priv) >= 9) { + gen9_sseu_device_status(dev_priv, &sseu); + } + + intel_runtime_pm_put(dev_priv); + + i915_print_sseu_info(m, false, &sseu); return 0; } static int i915_forcewake_open(struct inode *inode, struct file *file) { - struct drm_device *dev = inode->i_private; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = inode->i_private; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return 0; intel_runtime_pm_get(dev_priv); @@ -5335,10 +5194,9 @@ static int i915_forcewake_open(struct inode *inode, struct file *file) static int i915_forcewake_release(struct inode *inode, struct file *file) { - struct drm_device *dev = inode->i_private; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = inode->i_private; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return 0; intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -5355,12 +5213,11 @@ static const struct file_operations i915_forcewake_fops = { static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor) { - struct drm_device *dev = minor->dev; struct dentry *ent; ent = debugfs_create_file("i915_forcewake_user", S_IRUSR, - root, dev, + root, to_i915(minor->dev), &i915_forcewake_fops); if (!ent) return -ENOMEM; @@ -5373,12 +5230,11 @@ static int i915_debugfs_create(struct dentry *root, const char *name, const struct file_operations *fops) { - struct drm_device *dev = minor->dev; struct dentry *ent; ent = debugfs_create_file(name, S_IRUGO | S_IWUSR, - root, dev, + root, to_i915(minor->dev), fops); if (!ent) return -ENOMEM; @@ -5390,9 +5246,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, {"i915_gem_gtt", i915_gem_gtt_info, 0}, - {"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST}, - {"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST}, - {"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST}, + {"i915_gem_pin_display", i915_gem_gtt_info, 0, (void *)1}, {"i915_gem_stolen", i915_gem_stolen_list_info }, {"i915_gem_pageflip", i915_gem_pageflip_info, 0}, {"i915_gem_request", i915_gem_request_info, 0}, @@ -5467,9 +5321,8 @@ static const struct i915_debugfs_files { {"i915_dp_test_active", &i915_displayport_test_active_fops} }; -void intel_display_crc_init(struct drm_device *dev) +void intel_display_crc_init(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe; for_each_pipe(dev_priv, pipe) { @@ -5517,7 +5370,7 @@ void i915_debugfs_unregister(struct drm_i915_private *dev_priv) drm_debugfs_remove_files(i915_debugfs_list, I915_DEBUGFS_ENTRIES, minor); - drm_debugfs_remove_files((struct drm_info_list *) &i915_forcewake_fops, + drm_debugfs_remove_files((struct drm_info_list *)&i915_forcewake_fops, 1, minor); for (i = 0; i < ARRAY_SIZE(i915_pipe_crc_data); i++) { @@ -5529,7 +5382,7 @@ void i915_debugfs_unregister(struct drm_i915_private *dev_priv) for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) { struct drm_info_list *info_list = - (struct drm_info_list *) i915_debugfs_files[i].fops; + (struct drm_info_list *)i915_debugfs_files[i].fops; drm_debugfs_remove_files(info_list, 1, minor); } @@ -5609,6 +5462,40 @@ static const struct file_operations i915_dpcd_fops = { .release = single_release, }; +static int i915_panel_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct intel_dp *intel_dp = + enc_to_intel_dp(&intel_attached_encoder(connector)->base); + + if (connector->status != connector_status_connected) + return -ENODEV; + + seq_printf(m, "Panel power up delay: %d\n", + intel_dp->panel_power_up_delay); + seq_printf(m, "Panel power down delay: %d\n", + intel_dp->panel_power_down_delay); + seq_printf(m, "Backlight on delay: %d\n", + intel_dp->backlight_on_delay); + seq_printf(m, "Backlight off delay: %d\n", + intel_dp->backlight_off_delay); + + return 0; +} + +static int i915_panel_open(struct inode *inode, struct file *file) +{ + return single_open(file, i915_panel_show, inode->i_private); +} + +static const struct file_operations i915_panel_fops = { + .owner = THIS_MODULE, + .open = i915_panel_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /** * i915_debugfs_connector_add - add i915 specific connector debugfs files * @connector: pointer to a registered drm_connector @@ -5628,8 +5515,12 @@ int i915_debugfs_connector_add(struct drm_connector *connector) if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || connector->connector_type == DRM_MODE_CONNECTOR_eDP) - debugfs_create_file("i915_dpcd", S_IRUGO, root, connector, - &i915_dpcd_fops); + debugfs_create_file("i915_dpcd", S_IRUGO, root, + connector, &i915_dpcd_fops); + + if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) + debugfs_create_file("i915_panel_timings", S_IRUGO, root, + connector, &i915_panel_fops); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 95ddd56b89f0..bfb2efd8d4d4 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -77,7 +77,7 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, const char *fmt, ...) { static bool shown_bug_once; - struct device *dev = dev_priv->drm.dev; + struct device *kdev = dev_priv->drm.dev; bool is_error = level[1] <= KERN_ERR[1]; bool is_debug = level[1] == KERN_DEBUG[1]; struct va_format vaf; @@ -91,11 +91,11 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, vaf.fmt = fmt; vaf.va = &args; - dev_printk(level, dev, "[" DRM_NAME ":%ps] %pV", + dev_printk(level, kdev, "[" DRM_NAME ":%ps] %pV", __builtin_return_address(0), &vaf); if (is_error && !shown_bug_once) { - dev_notice(dev, "%s", FDO_BUG_MSG); + dev_notice(kdev, "%s", FDO_BUG_MSG); shown_bug_once = true; } @@ -228,31 +228,11 @@ static void intel_detect_pch(struct drm_device *dev) pci_dev_put(pch); } -bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv) -{ - if (INTEL_GEN(dev_priv) < 6) - return false; - - if (i915.semaphores >= 0) - return i915.semaphores; - - /* TODO: make semaphores and Execlists play nicely together */ - if (i915.enable_execlists) - return false; - -#ifdef CONFIG_INTEL_IOMMU - /* Enable semaphores on SNB when IO remapping is off */ - if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) - return false; -#endif - - return true; -} - static int i915_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; drm_i915_getparam_t *param = data; int value; @@ -263,13 +243,10 @@ static int i915_getparam(struct drm_device *dev, void *data, /* Reject all old ums/dri params. */ return -ENODEV; case I915_PARAM_CHIPSET_ID: - value = dev->pdev->device; + value = pdev->device; break; case I915_PARAM_REVISION: - value = dev->pdev->revision; - break; - case I915_PARAM_HAS_GEM: - value = 1; + value = pdev->revision; break; case I915_PARAM_NUM_FENCES_AVAIL: value = dev_priv->num_fence_regs; @@ -277,13 +254,6 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_OVERLAY: value = dev_priv->overlay ? 1 : 0; break; - case I915_PARAM_HAS_PAGEFLIPPING: - value = 1; - break; - case I915_PARAM_HAS_EXECBUF2: - /* depends on GEM */ - value = 1; - break; case I915_PARAM_HAS_BSD: value = intel_engine_initialized(&dev_priv->engine[VCS]); break; @@ -296,67 +266,34 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_BSD2: value = intel_engine_initialized(&dev_priv->engine[VCS2]); break; - case I915_PARAM_HAS_RELAXED_FENCING: - value = 1; - break; - case I915_PARAM_HAS_COHERENT_RINGS: - value = 1; - break; case I915_PARAM_HAS_EXEC_CONSTANTS: - value = INTEL_INFO(dev)->gen >= 4; - break; - case I915_PARAM_HAS_RELAXED_DELTA: - value = 1; - break; - case I915_PARAM_HAS_GEN7_SOL_RESET: - value = 1; + value = INTEL_GEN(dev_priv) >= 4; break; case I915_PARAM_HAS_LLC: - value = HAS_LLC(dev); + value = HAS_LLC(dev_priv); break; case I915_PARAM_HAS_WT: - value = HAS_WT(dev); + value = HAS_WT(dev_priv); break; case I915_PARAM_HAS_ALIASING_PPGTT: - value = USES_PPGTT(dev); - break; - case I915_PARAM_HAS_WAIT_TIMEOUT: - value = 1; + value = USES_PPGTT(dev_priv); break; case I915_PARAM_HAS_SEMAPHORES: - value = i915_semaphore_is_enabled(dev_priv); - break; - case I915_PARAM_HAS_PRIME_VMAP_FLUSH: - value = 1; + value = i915.semaphores; break; case I915_PARAM_HAS_SECURE_BATCHES: value = capable(CAP_SYS_ADMIN); break; - case I915_PARAM_HAS_PINNED_BATCHES: - value = 1; - break; - case I915_PARAM_HAS_EXEC_NO_RELOC: - value = 1; - break; - case I915_PARAM_HAS_EXEC_HANDLE_LUT: - value = 1; - break; case I915_PARAM_CMD_PARSER_VERSION: value = i915_cmd_parser_get_version(dev_priv); break; - case I915_PARAM_HAS_COHERENT_PHYS_GTT: - value = 1; - break; - case I915_PARAM_MMAP_VERSION: - value = 1; - break; case I915_PARAM_SUBSLICE_TOTAL: - value = INTEL_INFO(dev)->subslice_total; + value = sseu_subslice_total(&INTEL_INFO(dev_priv)->sseu); if (!value) return -ENODEV; break; case I915_PARAM_EU_TOTAL: - value = INTEL_INFO(dev)->eu_total; + value = INTEL_INFO(dev_priv)->sseu.eu_total; if (!value) return -ENODEV; break; @@ -364,16 +301,43 @@ static int i915_getparam(struct drm_device *dev, void *data, value = i915.enable_hangcheck && intel_has_gpu_reset(dev_priv); break; case I915_PARAM_HAS_RESOURCE_STREAMER: - value = HAS_RESOURCE_STREAMER(dev); - break; - case I915_PARAM_HAS_EXEC_SOFTPIN: - value = 1; + value = HAS_RESOURCE_STREAMER(dev_priv); break; case I915_PARAM_HAS_POOLED_EU: - value = HAS_POOLED_EU(dev); + value = HAS_POOLED_EU(dev_priv); break; case I915_PARAM_MIN_EU_IN_POOL: - value = INTEL_INFO(dev)->min_eu_in_pool; + value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool; + break; + case I915_PARAM_MMAP_GTT_VERSION: + /* Though we've started our numbering from 1, and so class all + * earlier versions as 0, in effect their value is undefined as + * the ioctl will report EINVAL for the unknown param! + */ + value = i915_gem_mmap_gtt_version(); + break; + case I915_PARAM_MMAP_VERSION: + /* Remember to bump this if the version changes! */ + case I915_PARAM_HAS_GEM: + case I915_PARAM_HAS_PAGEFLIPPING: + case I915_PARAM_HAS_EXECBUF2: /* depends on GEM */ + case I915_PARAM_HAS_RELAXED_FENCING: + case I915_PARAM_HAS_COHERENT_RINGS: + case I915_PARAM_HAS_RELAXED_DELTA: + case I915_PARAM_HAS_GEN7_SOL_RESET: + case I915_PARAM_HAS_WAIT_TIMEOUT: + case I915_PARAM_HAS_PRIME_VMAP_FLUSH: + case I915_PARAM_HAS_PINNED_BATCHES: + case I915_PARAM_HAS_EXEC_NO_RELOC: + case I915_PARAM_HAS_EXEC_HANDLE_LUT: + case I915_PARAM_HAS_COHERENT_PHYS_GTT: + case I915_PARAM_HAS_EXEC_SOFTPIN: + /* For the time being all of these are always true; + * if some supported hardware does not have one of these + * features this value needs to be provided from + * INTEL_INFO(), a feature macro, or similar. + */ + value = 1; break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); @@ -537,7 +501,7 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_ pr_info("switched on\n"); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* i915 resume handler doesn't set to D0 */ - pci_set_power_state(dev->pdev, PCI_D0); + pci_set_power_state(pdev, PCI_D0); i915_resume_switcheroo(dev); dev->switch_power_state = DRM_SWITCH_POWER_ON; } else { @@ -595,7 +559,6 @@ static void i915_gem_fini(struct drm_device *dev) } mutex_lock(&dev->struct_mutex); - i915_gem_reset(dev); i915_gem_cleanup_engines(dev); i915_gem_context_fini(dev); mutex_unlock(&dev->struct_mutex); @@ -606,6 +569,7 @@ static void i915_gem_fini(struct drm_device *dev) static int i915_load_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; int ret; if (i915_inject_load_failure()) @@ -622,13 +586,13 @@ static int i915_load_modeset_init(struct drm_device *dev) * then we do not take part in VGA arbitration and the * vga_client_register() fails with -ENODEV. */ - ret = vga_client_register(dev->pdev, dev, NULL, i915_vga_set_decode); + ret = vga_client_register(pdev, dev, NULL, i915_vga_set_decode); if (ret && ret != -ENODEV) goto out; intel_register_dsm_handler(); - ret = vga_switcheroo_register_client(dev->pdev, &i915_switcheroo_ops, false); + ret = vga_switcheroo_register_client(pdev, &i915_switcheroo_ops, false); if (ret) goto cleanup_vga_client; @@ -680,9 +644,9 @@ cleanup_irq: cleanup_csr: intel_csr_ucode_fini(dev_priv); intel_power_domains_fini(dev_priv); - vga_switcheroo_unregister_client(dev->pdev); + vga_switcheroo_unregister_client(pdev); cleanup_vga_client: - vga_client_register(dev->pdev, NULL, NULL, NULL); + vga_client_register(pdev, NULL, NULL, NULL); out: return ret; } @@ -706,7 +670,7 @@ static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv) primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; - ret = remove_conflicting_framebuffers(ap, "inteldrmfb", primary); + ret = drm_fb_helper_remove_conflicting_framebuffers(ap, "inteldrmfb", primary); kfree(ap); @@ -848,6 +812,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, mutex_init(&dev_priv->wm.wm_mutex); mutex_init(&dev_priv->pps_mutex); + i915_memcpy_init_early(dev_priv); + ret = i915_workqueues_init(dev_priv); if (ret < 0) return ret; @@ -868,7 +834,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_init_audio_hooks(dev_priv); i915_gem_load_init(&dev_priv->drm); - intel_display_crc_init(&dev_priv->drm); + intel_display_crc_init(dev_priv); intel_device_info_dump(dev_priv); @@ -900,6 +866,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) static int i915_mmio_setup(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; int mmio_bar; int mmio_size; @@ -916,7 +883,7 @@ static int i915_mmio_setup(struct drm_device *dev) mmio_size = 512 * 1024; else mmio_size = 2 * 1024 * 1024; - dev_priv->regs = pci_iomap(dev->pdev, mmio_bar, mmio_size); + dev_priv->regs = pci_iomap(pdev, mmio_bar, mmio_size); if (dev_priv->regs == NULL) { DRM_ERROR("failed to map registers\n"); @@ -932,9 +899,10 @@ static int i915_mmio_setup(struct drm_device *dev) static void i915_mmio_cleanup(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; intel_teardown_mchbar(dev); - pci_iounmap(dev->pdev, dev_priv->regs); + pci_iounmap(pdev, dev_priv->regs); } /** @@ -999,6 +967,9 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) i915.enable_ppgtt = intel_sanitize_enable_ppgtt(dev_priv, i915.enable_ppgtt); DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); + + i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores); + DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores)); } /** @@ -1010,9 +981,8 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) */ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) { + struct pci_dev *pdev = dev_priv->drm.pdev; struct drm_device *dev = &dev_priv->drm; - struct i915_ggtt *ggtt = &dev_priv->ggtt; - uint32_t aperture_size; int ret; if (i915_inject_load_failure()) @@ -1022,16 +992,10 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) intel_sanitize_options(dev_priv); - ret = i915_ggtt_init_hw(dev); + ret = i915_ggtt_probe_hw(dev_priv); if (ret) return ret; - ret = i915_ggtt_enable_hw(dev); - if (ret) { - DRM_ERROR("failed to enable GGTT\n"); - goto out_ggtt; - } - /* WARNING: Apparently we must kick fbdev drivers before vgacon, * otherwise the vga fbdev driver falls over. */ ret = i915_kick_out_firmware_fb(dev_priv); @@ -1046,11 +1010,21 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) goto out_ggtt; } - pci_set_master(dev->pdev); + ret = i915_ggtt_init_hw(dev_priv); + if (ret) + return ret; + + ret = i915_ggtt_enable_hw(dev_priv); + if (ret) { + DRM_ERROR("failed to enable GGTT\n"); + goto out_ggtt; + } + + pci_set_master(pdev); /* overlay on gen2 is broken and can't address above 1G */ if (IS_GEN2(dev)) { - ret = dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(30)); + ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(30)); if (ret) { DRM_ERROR("failed to set DMA mask\n"); @@ -1058,7 +1032,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) } } - /* 965GM sometimes incorrectly writes to hardware status page (HWS) * using 32bit addressing, overwriting memory if HWS is located * above 4GB. @@ -1068,7 +1041,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) * which also needs to be handled carefully. */ if (IS_BROADWATER(dev) || IS_CRESTLINE(dev)) { - ret = dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(32)); + ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { DRM_ERROR("failed to set DMA mask\n"); @@ -1077,19 +1050,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) } } - aperture_size = ggtt->mappable_end; - - ggtt->mappable = - io_mapping_create_wc(ggtt->mappable_base, - aperture_size); - if (!ggtt->mappable) { - ret = -EIO; - goto out_ggtt; - } - - ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, - aperture_size); - pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); @@ -1111,14 +1071,14 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) * stuck interrupts on some machines. */ if (!IS_I945G(dev) && !IS_I945GM(dev)) { - if (pci_enable_msi(dev->pdev) < 0) + if (pci_enable_msi(pdev) < 0) DRM_DEBUG_DRIVER("can't enable MSI"); } return 0; out_ggtt: - i915_ggtt_cleanup_hw(dev); + i915_ggtt_cleanup_hw(dev_priv); return ret; } @@ -1129,16 +1089,13 @@ out_ggtt: */ static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv) { - struct drm_device *dev = &dev_priv->drm; - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct pci_dev *pdev = dev_priv->drm.pdev; - if (dev->pdev->msi_enabled) - pci_disable_msi(dev->pdev); + if (pdev->msi_enabled) + pci_disable_msi(pdev); pm_qos_remove_request(&dev_priv->pm_qos); - arch_phys_wc_del(ggtt->mtrr); - io_mapping_free(ggtt->mappable); - i915_ggtt_cleanup_hw(dev); + i915_ggtt_cleanup_hw(dev_priv); } /** @@ -1164,7 +1121,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) /* Reveal our presence to userspace */ if (drm_dev_register(dev, 0) == 0) { i915_debugfs_register(dev_priv); - i915_setup_sysfs(dev); + i915_setup_sysfs(dev_priv); } else DRM_ERROR("Failed to register driver for userspace access!\n"); @@ -1201,7 +1158,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) acpi_video_unregister(); intel_opregion_unregister(dev_priv); - i915_teardown_sysfs(&dev_priv->drm); + i915_teardown_sysfs(dev_priv); i915_debugfs_unregister(dev_priv); drm_dev_unregister(&dev_priv->drm); @@ -1281,6 +1238,11 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) intel_runtime_pm_enable(dev_priv); + /* Everything is in place, we can now relax! */ + DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n", + driver.name, driver.major, driver.minor, driver.patchlevel, + driver.date, pci_name(pdev), dev_priv->drm.primary->index); + intel_runtime_pm_put(dev_priv); return 0; @@ -1305,6 +1267,7 @@ out_free_priv: void i915_driver_unload(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; intel_fbdev_fini(dev); @@ -1333,8 +1296,8 @@ void i915_driver_unload(struct drm_device *dev) kfree(dev_priv->vbt.lfp_lvds_vbt_mode); dev_priv->vbt.lfp_lvds_vbt_mode = NULL; - vga_switcheroo_unregister_client(dev->pdev); - vga_client_register(dev->pdev, NULL, NULL, NULL); + vga_switcheroo_unregister_client(pdev); + vga_client_register(pdev, NULL, NULL, NULL); intel_csr_ucode_fini(dev_priv); @@ -1343,7 +1306,7 @@ void i915_driver_unload(struct drm_device *dev) i915_destroy_error_state(dev); /* Flush any outstanding unpin_work. */ - flush_workqueue(dev_priv->wq); + drain_workqueue(dev_priv->wq); intel_guc_fini(dev); i915_gem_fini(dev); @@ -1431,6 +1394,7 @@ static bool suspend_to_idle(struct drm_i915_private *dev_priv) static int i915_drm_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; pci_power_t opregion_target_state; int error; @@ -1447,19 +1411,17 @@ static int i915_drm_suspend(struct drm_device *dev) drm_kms_helper_poll_disable(dev); - pci_save_state(dev->pdev); + pci_save_state(pdev); error = i915_gem_suspend(dev); if (error) { - dev_err(&dev->pdev->dev, + dev_err(&pdev->dev, "GEM idle failed, resume might fail\n"); goto out; } intel_guc_suspend(dev); - intel_suspend_gt_powersave(dev_priv); - intel_display_suspend(dev); intel_dp_mst_suspend(dev); @@ -1495,9 +1457,10 @@ out: return error; } -static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation) +static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) { - struct drm_i915_private *dev_priv = to_i915(drm_dev); + struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; bool fw_csr; int ret; @@ -1531,7 +1494,7 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation) goto out; } - pci_disable_device(drm_dev->pdev); + pci_disable_device(pdev); /* * During hibernation on some platforms the BIOS may try to access * the device even though it's already in D3 and hang the machine. So @@ -1545,7 +1508,7 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation) * Acer Aspire 1830T */ if (!(hibernation && INTEL_INFO(dev_priv)->gen < 6)) - pci_set_power_state(drm_dev->pdev, PCI_D3hot); + pci_set_power_state(pdev, PCI_D3hot); dev_priv->suspended_to_idle = suspend_to_idle(dev_priv); @@ -1585,18 +1548,18 @@ static int i915_drm_resume(struct drm_device *dev) int ret; disable_rpm_wakeref_asserts(dev_priv); + intel_sanitize_gt_powersave(dev_priv); - ret = i915_ggtt_enable_hw(dev); + ret = i915_ggtt_enable_hw(dev_priv); if (ret) DRM_ERROR("failed to re-enable GGTT\n"); intel_csr_ucode_resume(dev_priv); - mutex_lock(&dev->struct_mutex); - i915_gem_restore_gtt_mappings(dev); - mutex_unlock(&dev->struct_mutex); + i915_gem_resume(dev); i915_restore_state(dev); + intel_pps_unlock_regs_wa(dev_priv); intel_opregion_setup(dev_priv); intel_init_pch_refclk(dev); @@ -1615,7 +1578,7 @@ static int i915_drm_resume(struct drm_device *dev) mutex_lock(&dev->struct_mutex); if (i915_gem_init_hw(dev)) { DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); - atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); + i915_gem_set_wedged(dev_priv); } mutex_unlock(&dev->struct_mutex); @@ -1652,6 +1615,7 @@ static int i915_drm_resume(struct drm_device *dev) intel_opregion_notify_adapter(dev_priv, PCI_D0); + intel_autoenable_gt_powersave(dev_priv); drm_kms_helper_poll_enable(dev); enable_rpm_wakeref_asserts(dev_priv); @@ -1662,6 +1626,7 @@ static int i915_drm_resume(struct drm_device *dev) static int i915_drm_resume_early(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; int ret; /* @@ -1684,7 +1649,7 @@ static int i915_drm_resume_early(struct drm_device *dev) * the device powered we can also remove the following set power state * call. */ - ret = pci_set_power_state(dev->pdev, PCI_D0); + ret = pci_set_power_state(pdev, PCI_D0); if (ret) { DRM_ERROR("failed to set PCI D0 power state (%d)\n", ret); goto out; @@ -1703,12 +1668,12 @@ static int i915_drm_resume_early(struct drm_device *dev) * depend on the device enable refcount we can't anyway depend on them * disabling/enabling the device. */ - if (pci_enable_device(dev->pdev)) { + if (pci_enable_device(pdev)) { ret = -EIO; goto out; } - pci_set_master(dev->pdev); + pci_set_master(pdev); disable_rpm_wakeref_asserts(dev_priv); @@ -1760,8 +1725,10 @@ int i915_resume_switcheroo(struct drm_device *dev) * i915_reset - reset chip after a hang * @dev: drm device to reset * - * Reset the chip. Useful if a hang is detected. Returns zero on successful - * reset or otherwise an error code. + * Reset the chip. Useful if a hang is detected. Marks the device as wedged + * on failure. + * + * Caller must hold the struct_mutex. * * Procedure is fairly simple: * - reset the chip using the reset reg @@ -1771,31 +1738,22 @@ int i915_resume_switcheroo(struct drm_device *dev) * - re-init interrupt state * - re-init display */ -int i915_reset(struct drm_i915_private *dev_priv) +void i915_reset(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; struct i915_gpu_error *error = &dev_priv->gpu_error; - unsigned reset_counter; int ret; - intel_reset_gt_powersave(dev_priv); + lockdep_assert_held(&dev->struct_mutex); - mutex_lock(&dev->struct_mutex); + if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags)) + return; /* Clear any previous failed attempts at recovery. Time to try again. */ - atomic_andnot(I915_WEDGED, &error->reset_counter); - - /* Clear the reset-in-progress flag and increment the reset epoch. */ - reset_counter = atomic_inc_return(&error->reset_counter); - if (WARN_ON(__i915_reset_in_progress(reset_counter))) { - ret = -EIO; - goto error; - } + __clear_bit(I915_WEDGED, &error->flags); + error->reset_count++; pr_notice("drm/i915: Resetting chip after gpu hang\n"); - - i915_gem_reset(dev); - ret = intel_gpu_reset(dev_priv, ALL_ENGINES); if (ret) { if (ret != -ENODEV) @@ -1805,6 +1763,7 @@ int i915_reset(struct drm_i915_private *dev_priv) goto error; } + i915_gem_reset(dev_priv); intel_overlay_reset(dev_priv); /* Ok, now get things going again... */ @@ -1827,44 +1786,34 @@ int i915_reset(struct drm_i915_private *dev_priv) goto error; } - mutex_unlock(&dev->struct_mutex); - - /* - * rps/rc6 re-init is necessary to restore state lost after the - * reset and the re-install of gt irqs. Skip for ironlake per - * previous concerns that it doesn't respond well to some forms - * of re-init after reset. - */ - if (INTEL_INFO(dev)->gen > 5) - intel_enable_gt_powersave(dev_priv); - - return 0; +wakeup: + wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS); + return; error: - atomic_or(I915_WEDGED, &error->reset_counter); - mutex_unlock(&dev->struct_mutex); - return ret; + i915_gem_set_wedged(dev_priv); + goto wakeup; } -static int i915_pm_suspend(struct device *dev) +static int i915_pm_suspend(struct device *kdev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct pci_dev *pdev = to_pci_dev(kdev); + struct drm_device *dev = pci_get_drvdata(pdev); - if (!drm_dev) { - dev_err(dev, "DRM not initialized, aborting suspend.\n"); + if (!dev) { + dev_err(kdev, "DRM not initialized, aborting suspend.\n"); return -ENODEV; } - if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - return i915_drm_suspend(drm_dev); + return i915_drm_suspend(dev); } -static int i915_pm_suspend_late(struct device *dev) +static int i915_pm_suspend_late(struct device *kdev) { - struct drm_device *drm_dev = &dev_to_i915(dev)->drm; + struct drm_device *dev = &kdev_to_i915(kdev)->drm; /* * We have a suspend ordering issue with the snd-hda driver also @@ -1875,57 +1824,67 @@ static int i915_pm_suspend_late(struct device *dev) * FIXME: This should be solved with a special hdmi sink device or * similar so that power domains can be employed. */ - if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - return i915_drm_suspend_late(drm_dev, false); + return i915_drm_suspend_late(dev, false); } -static int i915_pm_poweroff_late(struct device *dev) +static int i915_pm_poweroff_late(struct device *kdev) { - struct drm_device *drm_dev = &dev_to_i915(dev)->drm; + struct drm_device *dev = &kdev_to_i915(kdev)->drm; - if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - return i915_drm_suspend_late(drm_dev, true); + return i915_drm_suspend_late(dev, true); } -static int i915_pm_resume_early(struct device *dev) +static int i915_pm_resume_early(struct device *kdev) { - struct drm_device *drm_dev = &dev_to_i915(dev)->drm; + struct drm_device *dev = &kdev_to_i915(kdev)->drm; - if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - return i915_drm_resume_early(drm_dev); + return i915_drm_resume_early(dev); } -static int i915_pm_resume(struct device *dev) +static int i915_pm_resume(struct device *kdev) { - struct drm_device *drm_dev = &dev_to_i915(dev)->drm; + struct drm_device *dev = &kdev_to_i915(kdev)->drm; - if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF) + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - return i915_drm_resume(drm_dev); + return i915_drm_resume(dev); } /* freeze: before creating the hibernation_image */ -static int i915_pm_freeze(struct device *dev) +static int i915_pm_freeze(struct device *kdev) { - return i915_pm_suspend(dev); + int ret; + + ret = i915_pm_suspend(kdev); + if (ret) + return ret; + + ret = i915_gem_freeze(kdev_to_i915(kdev)); + if (ret) + return ret; + + return 0; } -static int i915_pm_freeze_late(struct device *dev) +static int i915_pm_freeze_late(struct device *kdev) { int ret; - ret = i915_pm_suspend_late(dev); + ret = i915_pm_suspend_late(kdev); if (ret) return ret; - ret = i915_gem_freeze_late(dev_to_i915(dev)); + ret = i915_gem_freeze_late(kdev_to_i915(kdev)); if (ret) return ret; @@ -1933,25 +1892,25 @@ static int i915_pm_freeze_late(struct device *dev) } /* thaw: called after creating the hibernation image, but before turning off. */ -static int i915_pm_thaw_early(struct device *dev) +static int i915_pm_thaw_early(struct device *kdev) { - return i915_pm_resume_early(dev); + return i915_pm_resume_early(kdev); } -static int i915_pm_thaw(struct device *dev) +static int i915_pm_thaw(struct device *kdev) { - return i915_pm_resume(dev); + return i915_pm_resume(kdev); } /* restore: called after loading the hibernation image. */ -static int i915_pm_restore_early(struct device *dev) +static int i915_pm_restore_early(struct device *kdev) { - return i915_pm_resume_early(dev); + return i915_pm_resume_early(kdev); } -static int i915_pm_restore(struct device *dev) +static int i915_pm_restore(struct device *kdev) { - return i915_pm_resume(dev); + return i915_pm_resume(kdev); } /* @@ -2313,9 +2272,9 @@ static int vlv_resume_prepare(struct drm_i915_private *dev_priv, return ret; } -static int intel_runtime_suspend(struct device *device) +static int intel_runtime_suspend(struct device *kdev) { - struct pci_dev *pdev = to_pci_dev(device); + struct pci_dev *pdev = to_pci_dev(kdev); struct drm_device *dev = pci_get_drvdata(pdev); struct drm_i915_private *dev_priv = to_i915(dev); int ret; @@ -2341,7 +2300,7 @@ static int intel_runtime_suspend(struct device *device) * Bump the expiration timestamp, otherwise the suspend won't * be rescheduled. */ - pm_runtime_mark_last_busy(device); + pm_runtime_mark_last_busy(kdev); return -EAGAIN; } @@ -2420,9 +2379,9 @@ static int intel_runtime_suspend(struct device *device) return 0; } -static int intel_runtime_resume(struct device *device) +static int intel_runtime_resume(struct device *kdev) { - struct pci_dev *pdev = to_pci_dev(device); + struct pci_dev *pdev = to_pci_dev(kdev); struct drm_device *dev = pci_get_drvdata(pdev); struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; @@ -2462,7 +2421,6 @@ static int intel_runtime_resume(struct device *device) * we can do is to hope that things will still work (and disable RPM). */ i915_gem_init_swizzling(dev); - gen6_update_ring_freq(dev_priv); intel_runtime_pm_enable_interrupts(dev_priv); @@ -2618,6 +2576,7 @@ static struct drm_driver driver = { .postclose = i915_driver_postclose, .set_busid = drm_pci_set_busid, + .gem_close_object = i915_gem_close_object, .gem_free_object = i915_gem_free_object, .gem_vm_ops = &i915_gem_vm_ops, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 21f939074abc..8b9ee4e390c0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -61,6 +61,7 @@ #include "i915_gem.h" #include "i915_gem_gtt.h" #include "i915_gem_render_state.h" +#include "i915_gem_request.h" #include "intel_gvt.h" @@ -69,7 +70,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20160711" +#define DRIVER_DATE "20160919" #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ @@ -401,7 +402,7 @@ struct drm_i915_file_private { unsigned boosts; } rps; - unsigned int bsd_ring; + unsigned int bsd_engine; }; /* Used by dp and fdi links */ @@ -431,8 +432,6 @@ void intel_link_compute_m_n(int bpp, int nlanes, #define DRIVER_MINOR 6 #define DRIVER_PATCHLEVEL 0 -#define WATCH_LISTS 0 - struct opregion_header; struct opregion_acpi; struct opregion_swsci; @@ -456,15 +455,21 @@ struct intel_opregion { struct intel_overlay; struct intel_overlay_error_state; -#define I915_FENCE_REG_NONE -1 -#define I915_MAX_NUM_FENCES 32 -/* 32 fences + sign bit for FENCE_REG_NONE */ -#define I915_MAX_NUM_FENCE_BITS 6 - struct drm_i915_fence_reg { - struct list_head lru_list; - struct drm_i915_gem_object *obj; + struct list_head link; + struct drm_i915_private *i915; + struct i915_vma *vma; int pin_count; + int id; + /** + * Whether the tiling parameters for the currently + * associated fence register have changed. Note that + * for the purposes of tracking tiling changes we also + * treat the unfenced register, the register slot that + * the object occupies whilst it executes a fenced + * command (such as BLT on gen2/3), as a "fence". + */ + bool dirty; }; struct sdvo_device_mapping { @@ -476,130 +481,6 @@ struct sdvo_device_mapping { u8 ddc_pin; }; -struct intel_display_error_state; - -struct drm_i915_error_state { - struct kref ref; - struct timeval time; - - char error_msg[128]; - bool simulated; - int iommu; - u32 reset_count; - u32 suspend_count; - - /* Generic register state */ - u32 eir; - u32 pgtbl_er; - u32 ier; - u32 gtier[4]; - u32 ccid; - u32 derrmr; - u32 forcewake; - u32 error; /* gen6+ */ - u32 err_int; /* gen7 */ - u32 fault_data0; /* gen8, gen9 */ - u32 fault_data1; /* gen8, gen9 */ - u32 done_reg; - u32 gac_eco; - u32 gam_ecochk; - u32 gab_ctl; - u32 gfx_mode; - u32 extra_instdone[I915_NUM_INSTDONE_REG]; - u64 fence[I915_MAX_NUM_FENCES]; - struct intel_overlay_error_state *overlay; - struct intel_display_error_state *display; - struct drm_i915_error_object *semaphore_obj; - - struct drm_i915_error_ring { - bool valid; - /* Software tracked state */ - bool waiting; - int num_waiters; - int hangcheck_score; - enum intel_ring_hangcheck_action hangcheck_action; - int num_requests; - - /* our own tracking of ring head and tail */ - u32 cpu_ring_head; - u32 cpu_ring_tail; - - u32 last_seqno; - u32 semaphore_seqno[I915_NUM_ENGINES - 1]; - - /* Register state */ - u32 start; - u32 tail; - u32 head; - u32 ctl; - u32 hws; - u32 ipeir; - u32 ipehr; - u32 instdone; - u32 bbstate; - u32 instpm; - u32 instps; - u32 seqno; - u64 bbaddr; - u64 acthd; - u32 fault_reg; - u64 faddr; - u32 rc_psmi; /* sleep state */ - u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; - - struct drm_i915_error_object { - int page_count; - u64 gtt_offset; - u32 *pages[0]; - } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; - - struct drm_i915_error_object *wa_ctx; - - struct drm_i915_error_request { - long jiffies; - u32 seqno; - u32 tail; - } *requests; - - struct drm_i915_error_waiter { - char comm[TASK_COMM_LEN]; - pid_t pid; - u32 seqno; - } *waiters; - - struct { - u32 gfx_mode; - union { - u64 pdp[4]; - u32 pp_dir_base; - }; - } vm_info; - - pid_t pid; - char comm[TASK_COMM_LEN]; - } ring[I915_NUM_ENGINES]; - - struct drm_i915_error_buffer { - u32 size; - u32 name; - u32 rseqno[I915_NUM_ENGINES], wseqno; - u64 gtt_offset; - u32 read_domains; - u32 write_domain; - s32 fence_reg:I915_MAX_NUM_FENCE_BITS; - s32 pinned:2; - u32 tiling:2; - u32 dirty:1; - u32 purgeable:1; - u32 userptr:1; - s32 ring:4; - u32 cache_level:3; - } **active_bo, **pinned_bo; - - u32 *active_bo_count, *pinned_bo_count; - u32 vm_count; -}; - struct intel_connector; struct intel_encoder; struct intel_crtc_state; @@ -629,8 +510,12 @@ struct drm_i915_display_funcs { struct intel_initial_plane_config *); int (*crtc_compute_clock)(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); - void (*crtc_enable)(struct drm_crtc *crtc); - void (*crtc_disable)(struct drm_crtc *crtc); + void (*crtc_enable)(struct intel_crtc_state *pipe_config, + struct drm_atomic_state *old_state); + void (*crtc_disable)(struct intel_crtc_state *old_crtc_state, + struct drm_atomic_state *old_state); + void (*update_crtcs)(struct drm_atomic_state *state, + unsigned int *crtc_vblank_mask); void (*audio_codec_enable)(struct drm_connector *connector, struct intel_encoder *encoder, const struct drm_display_mode *adjusted_mode); @@ -694,8 +579,6 @@ struct intel_uncore_funcs { uint16_t val, bool trace); void (*mmio_writel)(struct drm_i915_private *dev_priv, i915_reg_t r, uint32_t val, bool trace); - void (*mmio_writeq)(struct drm_i915_private *dev_priv, i915_reg_t r, - uint64_t val, bool trace); }; struct intel_uncore { @@ -756,7 +639,7 @@ struct intel_csr { func(is_i915g) sep \ func(is_i945gm) sep \ func(is_g33) sep \ - func(need_gfx_hws) sep \ + func(hws_needs_physical) sep \ func(is_g4x) sep \ func(is_pineview) sep \ func(is_broadwater) sep \ @@ -771,6 +654,19 @@ struct intel_csr { func(is_kabylake) sep \ func(is_preliminary) sep \ func(has_fbc) sep \ + func(has_psr) sep \ + func(has_runtime_pm) sep \ + func(has_csr) sep \ + func(has_resource_streamer) sep \ + func(has_rc6) sep \ + func(has_rc6p) sep \ + func(has_dp_mst) sep \ + func(has_gmbus_irq) sep \ + func(has_hw_contexts) sep \ + func(has_logical_ring_contexts) sep \ + func(has_l3_dpf) sep \ + func(has_gmch_display) sep \ + func(has_guc) sep \ func(has_pipe_cxsr) sep \ func(has_hotplug) sep \ func(cursor_needs_physical) sep \ @@ -786,6 +682,24 @@ struct intel_csr { #define DEFINE_FLAG(name) u8 name:1 #define SEP_SEMICOLON ; +struct sseu_dev_info { + u8 slice_mask; + u8 subslice_mask; + u8 eu_total; + u8 eu_per_subslice; + u8 min_eu_in_pool; + /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ + u8 subslice_7eu[3]; + u8 has_slice_pg:1; + u8 has_subslice_pg:1; + u8 has_eu_pg:1; +}; + +static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) +{ + return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); +} + struct intel_device_info { u32 display_mmio_offset; u16 device_id; @@ -794,7 +708,9 @@ struct intel_device_info { u8 gen; u16 gen_mask; u8 ring_mask; /* Rings supported by the HW */ + u8 num_rings; DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG, SEP_SEMICOLON); + u16 ddb_size; /* in blocks */ /* Register offsets for the various display pipes and transcoders */ int pipe_offsets[I915_MAX_TRANSCODERS]; int trans_offsets[I915_MAX_TRANSCODERS]; @@ -802,17 +718,7 @@ struct intel_device_info { int cursor_offsets[I915_MAX_PIPES]; /* Slice/subslice/EU info */ - u8 slice_total; - u8 subslice_total; - u8 subslice_per_slice; - u8 eu_total; - u8 eu_per_subslice; - u8 min_eu_in_pool; - /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ - u8 subslice_7eu[3]; - u8 has_slice_pg:1; - u8 has_subslice_pg:1; - u8 has_eu_pg:1; + struct sseu_dev_info sseu; struct color_luts { u16 degamma_lut_size; @@ -823,6 +729,134 @@ struct intel_device_info { #undef DEFINE_FLAG #undef SEP_SEMICOLON +struct intel_display_error_state; + +struct drm_i915_error_state { + struct kref ref; + struct timeval time; + + char error_msg[128]; + bool simulated; + int iommu; + u32 reset_count; + u32 suspend_count; + struct intel_device_info device_info; + + /* Generic register state */ + u32 eir; + u32 pgtbl_er; + u32 ier; + u32 gtier[4]; + u32 ccid; + u32 derrmr; + u32 forcewake; + u32 error; /* gen6+ */ + u32 err_int; /* gen7 */ + u32 fault_data0; /* gen8, gen9 */ + u32 fault_data1; /* gen8, gen9 */ + u32 done_reg; + u32 gac_eco; + u32 gam_ecochk; + u32 gab_ctl; + u32 gfx_mode; + u32 extra_instdone[I915_NUM_INSTDONE_REG]; + u64 fence[I915_MAX_NUM_FENCES]; + struct intel_overlay_error_state *overlay; + struct intel_display_error_state *display; + struct drm_i915_error_object *semaphore; + + struct drm_i915_error_engine { + int engine_id; + /* Software tracked state */ + bool waiting; + int num_waiters; + int hangcheck_score; + enum intel_engine_hangcheck_action hangcheck_action; + struct i915_address_space *vm; + int num_requests; + + /* our own tracking of ring head and tail */ + u32 cpu_ring_head; + u32 cpu_ring_tail; + + u32 last_seqno; + u32 semaphore_seqno[I915_NUM_ENGINES - 1]; + + /* Register state */ + u32 start; + u32 tail; + u32 head; + u32 ctl; + u32 mode; + u32 hws; + u32 ipeir; + u32 ipehr; + u32 instdone; + u32 bbstate; + u32 instpm; + u32 instps; + u32 seqno; + u64 bbaddr; + u64 acthd; + u32 fault_reg; + u64 faddr; + u32 rc_psmi; /* sleep state */ + u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; + + struct drm_i915_error_object { + int page_count; + u64 gtt_offset; + u64 gtt_size; + u32 *pages[0]; + } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; + + struct drm_i915_error_object *wa_ctx; + + struct drm_i915_error_request { + long jiffies; + pid_t pid; + u32 seqno; + u32 head; + u32 tail; + } *requests; + + struct drm_i915_error_waiter { + char comm[TASK_COMM_LEN]; + pid_t pid; + u32 seqno; + } *waiters; + + struct { + u32 gfx_mode; + union { + u64 pdp[4]; + u32 pp_dir_base; + }; + } vm_info; + + pid_t pid; + char comm[TASK_COMM_LEN]; + } engine[I915_NUM_ENGINES]; + + struct drm_i915_error_buffer { + u32 size; + u32 name; + u32 rseqno[I915_NUM_ENGINES], wseqno; + u64 gtt_offset; + u32 read_domains; + u32 write_domain; + s32 fence_reg:I915_MAX_NUM_FENCE_BITS; + u32 tiling:2; + u32 dirty:1; + u32 purgeable:1; + u32 userptr:1; + s32 engine:4; + u32 cache_level:3; + } *active_bo[I915_NUM_ENGINES], *pinned_bo; + u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; + struct i915_address_space *active_vm[I915_NUM_ENGINES]; +}; + enum i915_cache_level { I915_CACHE_NONE = 0, I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */ @@ -879,22 +913,23 @@ struct i915_gem_context { struct drm_i915_private *i915; struct drm_i915_file_private *file_priv; struct i915_hw_ppgtt *ppgtt; + struct pid *pid; struct i915_ctx_hang_stats hang_stats; - /* Unique identifier for this context, used by the hw for tracking */ unsigned long flags; #define CONTEXT_NO_ZEROMAP BIT(0) #define CONTEXT_NO_ERROR_CAPTURE BIT(1) - unsigned hw_id; + + /* Unique identifier for this context, used by the hw for tracking */ + unsigned int hw_id; u32 user_handle; u32 ggtt_alignment; struct intel_context { - struct drm_i915_gem_object *state; - struct intel_ringbuffer *ringbuf; - struct i915_vma *lrc_vma; + struct i915_vma *state; + struct intel_ring *ring; uint32_t *lrc_reg_state; u64 lrc_desc; int pin_count; @@ -908,6 +943,7 @@ struct i915_gem_context { struct list_head link; u8 remap_slice; + bool closed:1; }; enum fb_op_origin { @@ -1061,13 +1097,6 @@ struct intel_gmbus { struct i915_suspend_saved_registers { u32 saveDSPARB; - u32 saveLVDS; - u32 savePP_ON_DELAYS; - u32 savePP_OFF_DELAYS; - u32 savePP_ON; - u32 savePP_OFF; - u32 savePP_CONTROL; - u32 savePP_DIVISOR; u32 saveFBC_CONTROL; u32 saveCACHE_MODE_0; u32 saveMI_ARB_STATE; @@ -1156,6 +1185,7 @@ struct intel_gen6_power_mgmt { bool interrupts_enabled; u32 pm_iir; + /* PM interrupt bits that should never be masked */ u32 pm_intr_keep; /* Frequencies are stored in potentially platform dependent multiples. @@ -1173,6 +1203,7 @@ struct intel_gen6_power_mgmt { u8 max_freq_softlimit; /* Max frequency permitted by the driver */ u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ u8 min_freq; /* AKA RPn. Minimum frequency */ + u8 boost_freq; /* Frequency to request when wait boosting */ u8 idle_freq; /* Frequency to request when we are idle */ u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ u8 rp1_freq; /* "less than" RP0 power/freqency */ @@ -1190,11 +1221,9 @@ struct intel_gen6_power_mgmt { bool client_boost; bool enabled; - struct delayed_work delayed_resume_work; + struct delayed_work autoenable_work; unsigned boosts; - struct intel_rps_client semaphores, mmioflips; - /* manual wa residency calculations */ struct intel_rps_ei up_ei, down_ei; @@ -1319,7 +1348,6 @@ struct i915_gem_mm { struct notifier_block oom_notifier; struct notifier_block vmap_notifier; struct shrinker shrinker; - bool shrinker_no_lock_stealing; /** LRU list of objects with fence regs on them. */ struct list_head fence_list; @@ -1331,7 +1359,7 @@ struct i915_gem_mm { bool interruptible; /* the indicator for dispatch video commands on two BSD rings */ - unsigned int bsd_ring_dispatch_index; + atomic_t bsd_engine_dispatch_index; /** Bit 6 swizzling required for X tiling */ uint32_t bit_6_swizzle_x; @@ -1379,9 +1407,10 @@ struct i915_gpu_error { * State variable controlling the reset flow and count * * This is a counter which gets incremented when reset is triggered, - * and again when reset has been handled. So odd values (lowest bit set) - * means that reset is in progress and even values that - * (reset_counter >> 1):th reset was successfully completed. + * + * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set + * meaning that any waiters holding onto the struct_mutex should + * relinquish the lock immediately in order for the reset to start. * * If reset is not completed succesfully, the I915_WEDGE bit is * set meaning that hardware is terminally sour and there is no @@ -1396,10 +1425,11 @@ struct i915_gpu_error { * naturally enforces the correct ordering between the bail-out of the * waiter and the gpu reset work code. */ - atomic_t reset_counter; + unsigned long reset_count; -#define I915_RESET_IN_PROGRESS_FLAG 1 -#define I915_WEDGED (1 << 31) + unsigned long flags; +#define I915_RESET_IN_PROGRESS 0 +#define I915_WEDGED (BITS_PER_LONG - 1) /** * Waitqueue to signal when a hang is detected. Used to for waiters @@ -1670,7 +1700,7 @@ struct intel_pipe_crc { }; struct i915_frontbuffer_tracking { - struct mutex lock; + spinlock_t lock; /* * Tracking bits for delayed frontbuffer flushing du to gpu activity or @@ -1705,18 +1735,6 @@ struct i915_virtual_gpu { bool active; }; -struct i915_execbuffer_params { - struct drm_device *dev; - struct drm_file *file; - uint32_t dispatch_flags; - uint32_t args_batch_start_offset; - uint64_t batch_obj_vm_offset; - struct intel_engine_cs *engine; - struct drm_i915_gem_object *batch_obj; - struct i915_gem_context *ctx; - struct drm_i915_gem_request *request; -}; - /* used in computing the new watermarks state */ struct intel_wm_config { unsigned int num_pipes_active; @@ -1763,13 +1781,15 @@ struct drm_i915_private { uint32_t psr_mmio_base; + uint32_t pps_mmio_base; + wait_queue_head_t gmbus_wait_queue; struct pci_dev *bridge_dev; struct i915_gem_context *kernel_context; struct intel_engine_cs engine[I915_NUM_ENGINES]; - struct drm_i915_gem_object *semaphore_obj; - uint32_t last_seqno, next_seqno; + struct i915_vma *semaphore; + u32 next_seqno; struct drm_dma_handle *status_page_dmah; struct resource mch_res; @@ -1854,6 +1874,7 @@ struct drm_i915_private { enum modeset_restore modeset_restore; struct mutex modeset_restore_lock; struct drm_atomic_state *modeset_restore_state; + struct drm_modeset_acquire_ctx reset_ctx; struct list_head vm_list; /* Global list of all address spaces */ struct i915_ggtt ggtt; /* VM representing the global address space */ @@ -1962,6 +1983,13 @@ struct drm_i915_private { struct i915_suspend_saved_registers regfile; struct vlv_s0ix_state vlv_s0ix_state; + enum { + I915_SAGV_UNKNOWN = 0, + I915_SAGV_DISABLED, + I915_SAGV_ENABLED, + I915_SAGV_NOT_CONTROLLED + } sagv_status; + struct { /* * Raw watermark latency values: @@ -2016,12 +2044,8 @@ struct drm_i915_private { /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { - int (*execbuf_submit)(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas); - int (*init_engines)(struct drm_device *dev); + void (*resume)(struct drm_i915_private *); void (*cleanup_engine)(struct intel_engine_cs *engine); - void (*stop_engine)(struct intel_engine_cs *engine); /** * Is the GPU currently considered idle, or busy executing @@ -2068,9 +2092,9 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) return container_of(dev, struct drm_i915_private, drm); } -static inline struct drm_i915_private *dev_to_i915(struct device *dev) +static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) { - return to_i915(dev_get_drvdata(dev)); + return to_i915(dev_get_drvdata(kdev)); } static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) @@ -2093,13 +2117,16 @@ static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) for_each_if (((id__) = (engine__)->id, \ intel_engine_initialized(engine__))) +#define __mask_next_bit(mask) ({ \ + int __idx = ffs(mask) - 1; \ + mask &= ~BIT(__idx); \ + __idx; \ +}) + /* Iterator over subset of engines selected by mask */ -#define for_each_engine_masked(engine__, dev_priv__, mask__) \ - for ((engine__) = &(dev_priv__)->engine[0]; \ - (engine__) < &(dev_priv__)->engine[I915_NUM_ENGINES]; \ - (engine__)++) \ - for_each_if (((mask__) & intel_engine_flag(engine__)) && \ - intel_engine_initialized(engine__)) +#define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \ + for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask; \ + tmp__ ? (engine__ = &(dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : 0; ) enum hdmi_force_audio { HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */ @@ -2144,8 +2171,6 @@ struct drm_i915_gem_object_ops { */ #define INTEL_MAX_SPRITE_BITS_PER_PIPE 5 #define INTEL_FRONTBUFFER_BITS_PER_PIPE 8 -#define INTEL_FRONTBUFFER_BITS \ - (INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES) #define INTEL_FRONTBUFFER_PRIMARY(pipe) \ (1 << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))) #define INTEL_FRONTBUFFER_CURSOR(pipe) \ @@ -2169,18 +2194,21 @@ struct drm_i915_gem_object { struct drm_mm_node *stolen; struct list_head global_list; - struct list_head engine_list[I915_NUM_ENGINES]; /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; struct list_head batch_pool_link; + unsigned long flags; /** * This is set if the object is on the active lists (has pending * rendering and so a non-zero seqno), and is not set if it i s on * inactive (ready to be unbound) list. */ - unsigned int active:I915_NUM_ENGINES; +#define I915_BO_ACTIVE_SHIFT 0 +#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1) +#define __I915_BO_ACTIVE(bo) \ + ((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK) /** * This is set if the object has been written to since last bound @@ -2189,37 +2217,11 @@ struct drm_i915_gem_object { unsigned int dirty:1; /** - * Fence register bits (if any) for this object. Will be set - * as needed when mapped into the GTT. - * Protected by dev->struct_mutex. - */ - signed int fence_reg:I915_MAX_NUM_FENCE_BITS; - - /** * Advice: are the backing pages purgeable? */ unsigned int madv:2; /** - * Current tiling mode for the object. - */ - unsigned int tiling_mode:2; - /** - * Whether the tiling parameters for the currently associated fence - * register have changed. Note that for the purposes of tracking - * tiling changes we also treat the unfenced register, the register - * slot that the object occupies whilst it executes a fenced - * command (such as BLT on gen2/3), as a "fence". - */ - unsigned int fence_dirty:1; - - /** - * Is the object at the current location in the gtt mappable and - * fenceable? Used to avoid costly recalculations. - */ - unsigned int map_and_fenceable:1; - - /** * Whether the current gtt mapping needs to be mappable (and isn't just * mappable by accident). Track pin and fault separate for a more * accurate mappable working set. @@ -2234,9 +2236,17 @@ struct drm_i915_gem_object { unsigned int cache_level:3; unsigned int cache_dirty:1; - unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS; + atomic_t frontbuffer_bits; + unsigned int frontbuffer_ggtt_origin; /* write once */ - unsigned int has_wc_mmap; + /** Current tiling stride for the object, if it's tiled. */ + unsigned int tiling_and_stride; +#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ +#define TILING_MASK (FENCE_MINIMUM_STRIDE-1) +#define STRIDE_MASK (~TILING_MASK) + + /** Count of VMA actually bound by this object */ + unsigned int bind_count; unsigned int pin_display; struct sg_table *pages; @@ -2256,14 +2266,9 @@ struct drm_i915_gem_object { * requests on one ring where the write request is older than the * read request. This allows for the CPU to read from an active * buffer by only waiting for the write to complete. - * */ - struct drm_i915_gem_request *last_read_req[I915_NUM_ENGINES]; - struct drm_i915_gem_request *last_write_req; - /** Breadcrumb of last fenced GPU access to the buffer. */ - struct drm_i915_gem_request *last_fenced_req; - - /** Current tiling stride for the object, if it's tiled. */ - uint32_t stride; + */ + struct i915_gem_active last_read[I915_NUM_ENGINES]; + struct i915_gem_active last_write; /** References from framebuffers, locks out tiling changes. */ unsigned long framebuffer_references; @@ -2271,23 +2276,70 @@ struct drm_i915_gem_object { /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; - union { - /** for phy allocated objects */ - struct drm_dma_handle *phys_handle; - - struct i915_gem_userptr { - uintptr_t ptr; - unsigned read_only :1; - unsigned workers :4; + struct i915_gem_userptr { + uintptr_t ptr; + unsigned read_only :1; + unsigned workers :4; #define I915_GEM_USERPTR_MAX_WORKERS 15 - struct i915_mm_struct *mm; - struct i915_mmu_object *mmu_object; - struct work_struct *work; - } userptr; - }; + struct i915_mm_struct *mm; + struct i915_mmu_object *mmu_object; + struct work_struct *work; + } userptr; + + /** for phys allocated objects */ + struct drm_dma_handle *phys_handle; }; -#define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) + +static inline struct drm_i915_gem_object * +to_intel_bo(struct drm_gem_object *gem) +{ + /* Assert that to_intel_bo(NULL) == NULL */ + BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); + + return container_of(gem, struct drm_i915_gem_object, base); +} + +static inline struct drm_i915_gem_object * +i915_gem_object_lookup(struct drm_file *file, u32 handle) +{ + return to_intel_bo(drm_gem_object_lookup(file, handle)); +} + +__deprecated +extern struct drm_gem_object * +drm_gem_object_lookup(struct drm_file *file, u32 handle); + +__attribute__((nonnull)) +static inline struct drm_i915_gem_object * +i915_gem_object_get(struct drm_i915_gem_object *obj) +{ + drm_gem_object_reference(&obj->base); + return obj; +} + +__deprecated +extern void drm_gem_object_reference(struct drm_gem_object *); + +__attribute__((nonnull)) +static inline void +i915_gem_object_put(struct drm_i915_gem_object *obj) +{ + drm_gem_object_unreference(&obj->base); +} + +__deprecated +extern void drm_gem_object_unreference(struct drm_gem_object *); + +__attribute__((nonnull)) +static inline void +i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj) +{ + drm_gem_object_unreference_unlocked(&obj->base); +} + +__deprecated +extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) @@ -2295,6 +2347,67 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; } +static inline unsigned long +i915_gem_object_get_active(const struct drm_i915_gem_object *obj) +{ + return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK; +} + +static inline bool +i915_gem_object_is_active(const struct drm_i915_gem_object *obj) +{ + return i915_gem_object_get_active(obj); +} + +static inline void +i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine) +{ + obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT); +} + +static inline void +i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine) +{ + obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT); +} + +static inline bool +i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj, + int engine) +{ + return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT); +} + +static inline unsigned int +i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) +{ + return obj->tiling_and_stride & TILING_MASK; +} + +static inline bool +i915_gem_object_is_tiled(struct drm_i915_gem_object *obj) +{ + return i915_gem_object_get_tiling(obj) != I915_TILING_NONE; +} + +static inline unsigned int +i915_gem_object_get_stride(struct drm_i915_gem_object *obj) +{ + return obj->tiling_and_stride & STRIDE_MASK; +} + +static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) +{ + i915_gem_object_get(vma->obj); + return vma; +} + +static inline void i915_vma_put(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->dev->struct_mutex); + i915_gem_object_put(vma->obj); +} + /* * Optimised SGL iterator for GEM objects */ @@ -2365,171 +2478,6 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) (((__iter).curr += PAGE_SIZE) < (__iter).max) || \ ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0)) -/** - * Request queue structure. - * - * The request queue allows us to note sequence numbers that have been emitted - * and may be associated with active buffers to be retired. - * - * By keeping this list, we can avoid having to do questionable sequence - * number comparisons on buffer last_read|write_seqno. It also allows an - * emission time to be associated with the request for tracking how far ahead - * of the GPU the submission is. - * - * The requests are reference counted, so upon creation they should have an - * initial reference taken using kref_init - */ -struct drm_i915_gem_request { - struct kref ref; - - /** On Which ring this request was generated */ - struct drm_i915_private *i915; - struct intel_engine_cs *engine; - struct intel_signal_node signaling; - - /** GEM sequence number associated with the previous request, - * when the HWS breadcrumb is equal to this the GPU is processing - * this request. - */ - u32 previous_seqno; - - /** GEM sequence number associated with this request, - * when the HWS breadcrumb is equal or greater than this the GPU - * has finished processing this request. - */ - u32 seqno; - - /** Position in the ringbuffer of the start of the request */ - u32 head; - - /** - * Position in the ringbuffer of the start of the postfix. - * This is required to calculate the maximum available ringbuffer - * space without overwriting the postfix. - */ - u32 postfix; - - /** Position in the ringbuffer of the end of the whole request */ - u32 tail; - - /** Preallocate space in the ringbuffer for the emitting the request */ - u32 reserved_space; - - /** - * Context and ring buffer related to this request - * Contexts are refcounted, so when this request is associated with a - * context, we must increment the context's refcount, to guarantee that - * it persists while any request is linked to it. Requests themselves - * are also refcounted, so the request will only be freed when the last - * reference to it is dismissed, and the code in - * i915_gem_request_free() will then decrement the refcount on the - * context. - */ - struct i915_gem_context *ctx; - struct intel_ringbuffer *ringbuf; - - /** - * Context related to the previous request. - * As the contexts are accessed by the hardware until the switch is - * completed to a new context, the hardware may still be writing - * to the context object after the breadcrumb is visible. We must - * not unpin/unbind/prune that object whilst still active and so - * we keep the previous context pinned until the following (this) - * request is retired. - */ - struct i915_gem_context *previous_context; - - /** Batch buffer related to this request if any (used for - error state dump only) */ - struct drm_i915_gem_object *batch_obj; - - /** Time at which this request was emitted, in jiffies. */ - unsigned long emitted_jiffies; - - /** global list entry for this request */ - struct list_head list; - - struct drm_i915_file_private *file_priv; - /** file_priv list entry for this request */ - struct list_head client_list; - - /** process identifier submitting this request */ - struct pid *pid; - - /** - * The ELSP only accepts two elements at a time, so we queue - * context/tail pairs on a given queue (ring->execlist_queue) until the - * hardware is available. The queue serves a double purpose: we also use - * it to keep track of the up to 2 contexts currently in the hardware - * (usually one in execution and the other queued up by the GPU): We - * only remove elements from the head of the queue when the hardware - * informs us that an element has been completed. - * - * All accesses to the queue are mediated by a spinlock - * (ring->execlist_lock). - */ - - /** Execlist link in the submission queue.*/ - struct list_head execlist_link; - - /** Execlists no. of times this request has been sent to the ELSP */ - int elsp_submitted; - - /** Execlists context hardware id. */ - unsigned ctx_hw_id; -}; - -struct drm_i915_gem_request * __must_check -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); -void i915_gem_request_free(struct kref *req_ref); -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file); - -static inline uint32_t -i915_gem_request_get_seqno(struct drm_i915_gem_request *req) -{ - return req ? req->seqno : 0; -} - -static inline struct intel_engine_cs * -i915_gem_request_get_engine(struct drm_i915_gem_request *req) -{ - return req ? req->engine : NULL; -} - -static inline struct drm_i915_gem_request * -i915_gem_request_reference(struct drm_i915_gem_request *req) -{ - if (req) - kref_get(&req->ref); - return req; -} - -static inline void -i915_gem_request_unreference(struct drm_i915_gem_request *req) -{ - kref_put(&req->ref, i915_gem_request_free); -} - -static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, - struct drm_i915_gem_request *src) -{ - if (src) - i915_gem_request_reference(src); - - if (*pdst) - i915_gem_request_unreference(*pdst); - - *pdst = src; -} - -/* - * XXX: i915_gem_request_completed should be here but currently needs the - * definition of i915_seqno_passed() which is below. It will be moved in - * a later patch when the call to i915_seqno_passed() is obsoleted... - */ - /* * A command that requires special handling by the command parser. */ @@ -2617,8 +2565,9 @@ struct drm_i915_cmd_descriptor { /* * A table of commands requiring special handling by the command parser. * - * Each ring has an array of tables. Each table consists of an array of command - * descriptors, which must be sorted with command opcodes in ascending order. + * Each engine has an array of tables. Each table consists of an array of + * command descriptors, which must be sorted with command opcodes in + * ascending order. */ struct drm_i915_cmd_table { const struct drm_i915_cmd_descriptor *table; @@ -2636,7 +2585,7 @@ struct drm_i915_cmd_table { BUILD_BUG(); \ __p; \ }) -#define INTEL_INFO(p) (&__I915__(p)->info) +#define INTEL_INFO(p) (&__I915__(p)->info) #define INTEL_GEN(p) (INTEL_INFO(p)->gen) #define INTEL_DEVID(p) (INTEL_INFO(p)->device_id) @@ -2803,10 +2752,10 @@ struct drm_i915_cmd_table { #define HAS_EDRAM(dev) (!!(__I915__(dev)->edram_cap & EDRAM_ENABLED)) #define HAS_WT(dev) ((IS_HASWELL(dev) || IS_BROADWELL(dev)) && \ HAS_EDRAM(dev)) -#define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) +#define HWS_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->hws_needs_physical) -#define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) -#define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 8) +#define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->has_hw_contexts) +#define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)->has_logical_ring_contexts) #define USES_PPGTT(dev) (i915.enable_ppgtt) #define USES_FULL_PPGTT(dev) (i915.enable_ppgtt >= 2) #define USES_FULL_48BIT_PPGTT(dev) (i915.enable_ppgtt == 3) @@ -2830,7 +2779,7 @@ struct drm_i915_cmd_table { * interrupt source and so prevents the other device from working properly. */ #define HAS_AUX_IRQ(dev) (INTEL_INFO(dev)->gen >= 5) -#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->gen >= 5) +#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->has_gmbus_irq) /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte * rows, which changed the alignment requirements and fence programming. @@ -2846,38 +2795,27 @@ struct drm_i915_cmd_table { #define HAS_IPS(dev) (IS_HSW_ULT(dev) || IS_BROADWELL(dev)) -#define HAS_DP_MST(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev) || \ - INTEL_INFO(dev)->gen >= 9) +#define HAS_DP_MST(dev) (INTEL_INFO(dev)->has_dp_mst) #define HAS_DDI(dev) (INTEL_INFO(dev)->has_ddi) #define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg) -#define HAS_PSR(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev) || \ - IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev) || \ - IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) -#define HAS_RUNTIME_PM(dev) (IS_GEN6(dev) || IS_HASWELL(dev) || \ - IS_BROADWELL(dev) || IS_VALLEYVIEW(dev) || \ - IS_CHERRYVIEW(dev) || IS_SKYLAKE(dev) || \ - IS_KABYLAKE(dev) || IS_BROXTON(dev)) -#define HAS_RC6(dev) (INTEL_INFO(dev)->gen >= 6) -#define HAS_RC6p(dev) (IS_GEN6(dev) || IS_IVYBRIDGE(dev)) - -#define HAS_CSR(dev) (IS_GEN9(dev)) +#define HAS_PSR(dev) (INTEL_INFO(dev)->has_psr) +#define HAS_RUNTIME_PM(dev) (INTEL_INFO(dev)->has_runtime_pm) +#define HAS_RC6(dev) (INTEL_INFO(dev)->has_rc6) +#define HAS_RC6p(dev) (INTEL_INFO(dev)->has_rc6p) + +#define HAS_CSR(dev) (INTEL_INFO(dev)->has_csr) /* * For now, anything with a GuC requires uCode loading, and then supports * command submission once loaded. But these are logically independent * properties, so we have separate macros to test them. */ -#define HAS_GUC(dev) (IS_GEN9(dev)) +#define HAS_GUC(dev) (INTEL_INFO(dev)->has_guc) #define HAS_GUC_UCODE(dev) (HAS_GUC(dev)) #define HAS_GUC_SCHED(dev) (HAS_GUC(dev)) -#define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \ - INTEL_INFO(dev)->gen >= 8) - -#define HAS_CORE_RING_FREQ(dev) (INTEL_INFO(dev)->gen >= 6 && \ - !IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) && \ - !IS_BROXTON(dev)) +#define HAS_RESOURCE_STREAMER(dev) (INTEL_INFO(dev)->has_resource_streamer) #define HAS_POOLED_EU(dev) (INTEL_INFO(dev)->has_pooled_eu) @@ -2905,11 +2843,10 @@ struct drm_i915_cmd_table { #define HAS_PCH_NOP(dev) (INTEL_PCH_TYPE(dev) == PCH_NOP) #define HAS_PCH_SPLIT(dev) (INTEL_PCH_TYPE(dev) != PCH_NONE) -#define HAS_GMCH_DISPLAY(dev) (INTEL_INFO(dev)->gen < 5 || \ - IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) +#define HAS_GMCH_DISPLAY(dev) (INTEL_INFO(dev)->has_gmch_display) /* DPF == dynamic parity feature */ -#define HAS_L3_DPF(dev) (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) +#define HAS_L3_DPF(dev) (INTEL_INFO(dev)->has_l3_dpf) #define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_DPF(dev)) #define GT_FREQUENCY_MULTIPLIER 50 @@ -2930,7 +2867,9 @@ extern int i915_suspend_switcheroo(struct drm_device *dev, pm_message_t state); extern int i915_resume_switcheroo(struct drm_device *dev); int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, - int enable_ppgtt); + int enable_ppgtt); + +bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value); /* i915_drv.c */ void __printf(3, 4) @@ -2946,7 +2885,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, #endif extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask); extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); -extern int i915_reset(struct drm_i915_private *dev_priv); +extern void i915_reset(struct drm_i915_private *dev_priv); extern int intel_guc_reset(struct drm_i915_private *dev_priv); extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); @@ -3107,11 +3046,6 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -void i915_gem_execbuffer_move_to_active(struct list_head *vmas, - struct drm_i915_gem_request *req); -int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas); int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_execbuffer2(struct drm_device *dev, void *data, @@ -3140,6 +3074,7 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, void i915_gem_load_init(struct drm_device *dev); void i915_gem_load_cleanup(struct drm_device *dev); void i915_gem_load_init_fences(struct drm_i915_private *dev_priv); +int i915_gem_freeze(struct drm_i915_private *dev_priv); int i915_gem_freeze_late(struct drm_i915_private *dev_priv); void *i915_gem_object_alloc(struct drm_device *dev); @@ -3150,47 +3085,28 @@ struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev, size_t size); struct drm_i915_gem_object *i915_gem_object_create_from_data( struct drm_device *dev, const void *data, size_t size); +void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file); void i915_gem_free_object(struct drm_gem_object *obj); -void i915_gem_vma_destroy(struct i915_vma *vma); - -/* Flags used by pin/bind&friends. */ -#define PIN_MAPPABLE (1<<0) -#define PIN_NONBLOCK (1<<1) -#define PIN_GLOBAL (1<<2) -#define PIN_OFFSET_BIAS (1<<3) -#define PIN_USER (1<<4) -#define PIN_UPDATE (1<<5) -#define PIN_ZONE_4G (1<<6) -#define PIN_HIGH (1<<7) -#define PIN_OFFSET_FIXED (1<<8) -#define PIN_OFFSET_MASK (~4095) -int __must_check -i915_gem_object_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - uint32_t alignment, - uint64_t flags); -int __must_check + +struct i915_vma * __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, - uint32_t alignment, - uint64_t flags); + u64 size, + u64 alignment, + u64 flags); int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); -/* - * BEWARE: Do not use the function below unless you can _absolutely_ - * _guarantee_ VMA in question is _not in use_ anywhere. - */ -int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma); +void i915_vma_close(struct i915_vma *vma); +void i915_vma_destroy(struct i915_vma *vma); + +int i915_gem_object_unbind(struct drm_i915_gem_object *obj); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); -int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, - int *needs_clflush); - int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline int __sg_page_count(struct scatterlist *sg) @@ -3250,13 +3166,20 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) obj->pages_pin_count--; } +enum i915_map_type { + I915_MAP_WB = 0, + I915_MAP_WC, +}; + /** * i915_gem_object_pin_map - return a contiguous mapping of the entire object * @obj - the object to map into kernel address space + * @type - the type of mapping, used to select pgprot_t * * Calls i915_gem_object_pin_pages() to prevent reaping of the object's * pages and then returns a contiguous mapping of the backing storage into - * the kernel address space. + * the kernel address space. Based on the @type of mapping, the PTE will be + * set to either WriteBack or WriteCombine (via pgprot_t). * * The caller must hold the struct_mutex, and is responsible for calling * i915_gem_object_unpin_map() when the mapping is no longer required. @@ -3264,7 +3187,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) * Returns the pointer through which to access the mapped object, or an * ERR_PTR() on error. */ -void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj); +void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj, + enum i915_map_type type); /** * i915_gem_object_unpin_map - releases an earlier mapping @@ -3283,122 +3207,73 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } +int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, + unsigned int *needs_clflush); +int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, + unsigned int *needs_clflush); +#define CLFLUSH_BEFORE 0x1 +#define CLFLUSH_AFTER 0x2 +#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER) + +static inline void +i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj) +{ + i915_gem_object_unpin_pages(obj); +} + int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); -int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, - struct drm_i915_gem_request **to_req); void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req); + struct drm_i915_gem_request *req, + unsigned int flags); int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev, uint32_t handle, uint64_t *offset); +int i915_gem_mmap_gtt_version(void); void i915_gem_track_fb(struct drm_i915_gem_object *old, struct drm_i915_gem_object *new, unsigned frontbuffer_bits); -/** - * Returns true if seq1 is later than seq2. - */ -static inline bool -i915_seqno_passed(uint32_t seq1, uint32_t seq2) -{ - return (int32_t)(seq1 - seq2) >= 0; -} - -static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) -{ - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->previous_seqno); -} - -static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) -{ - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->seqno); -} - -bool __i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us); -static inline bool i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us) -{ - return (i915_gem_request_started(request) && - __i915_spin_request(request, state, timeout_us)); -} - -int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno); int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine); void i915_gem_retire_requests(struct drm_i915_private *dev_priv); -void i915_gem_retire_requests_ring(struct intel_engine_cs *engine); - -static inline u32 i915_reset_counter(struct i915_gpu_error *error) -{ - return atomic_read(&error->reset_counter); -} - -static inline bool __i915_reset_in_progress(u32 reset) -{ - return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG); -} - -static inline bool __i915_reset_in_progress_or_wedged(u32 reset) -{ - return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED)); -} - -static inline bool __i915_terminally_wedged(u32 reset) -{ - return unlikely(reset & I915_WEDGED); -} static inline bool i915_reset_in_progress(struct i915_gpu_error *error) { - return __i915_reset_in_progress(i915_reset_counter(error)); + return unlikely(test_bit(I915_RESET_IN_PROGRESS, &error->flags)); } -static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error) +static inline bool i915_terminally_wedged(struct i915_gpu_error *error) { - return __i915_reset_in_progress_or_wedged(i915_reset_counter(error)); + return unlikely(test_bit(I915_WEDGED, &error->flags)); } -static inline bool i915_terminally_wedged(struct i915_gpu_error *error) +static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error) { - return __i915_terminally_wedged(i915_reset_counter(error)); + return i915_reset_in_progress(error) | i915_terminally_wedged(error); } static inline u32 i915_reset_count(struct i915_gpu_error *error) { - return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2; + return READ_ONCE(error->reset_count); } -void i915_gem_reset(struct drm_device *dev); +void i915_gem_reset(struct drm_i915_private *dev_priv); +void i915_gem_set_wedged(struct drm_i915_private *dev_priv); bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_init(struct drm_device *dev); -int i915_gem_init_engines(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_engines(struct drm_device *dev); -int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv); +int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, + unsigned int flags); int __must_check i915_gem_suspend(struct drm_device *dev); -void __i915_add_request(struct drm_i915_gem_request *req, - struct drm_i915_gem_object *batch_obj, - bool flush_caches); -#define i915_add_request(req) \ - __i915_add_request(req, NULL, true) -#define i915_add_request_no_flush(req) \ - __i915_add_request(req, NULL, false) -int __i915_wait_request(struct drm_i915_gem_request *req, - bool interruptible, - s64 *timeout, - struct intel_rps_client *rps); -int __must_check i915_wait_request(struct drm_i915_gem_request *req); +void i915_gem_resume(struct drm_device *dev); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); int __must_check i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, @@ -3408,22 +3283,20 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); int __must_check i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); -int __must_check +struct i915_vma * __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, const struct i915_ggtt_view *view); -void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view); +void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma); int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align); int i915_gem_open(struct drm_device *dev, struct drm_file *file); void i915_gem_release(struct drm_device *dev, struct drm_file *file); -uint32_t -i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode); -uint32_t -i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, - int tiling_mode, bool fenced); +u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, u64 size, + int tiling_mode); +u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, + int tiling_mode, bool fenced); int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level); @@ -3434,86 +3307,82 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags); -u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view); -u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, - struct i915_address_space *vm); -static inline u64 -i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o) -{ - return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal); -} - -bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o); -bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view); -bool i915_gem_obj_bound(struct drm_i915_gem_object *o, - struct i915_address_space *vm); - struct i915_vma * i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm); -struct i915_vma * -i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view); + struct i915_address_space *vm, + const struct i915_ggtt_view *view); struct i915_vma * i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm); -struct i915_vma * -i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view); - -static inline struct i915_vma * -i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) -{ - return i915_gem_obj_to_ggtt_view(obj, &i915_ggtt_view_normal); -} -bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj); + struct i915_address_space *vm, + const struct i915_ggtt_view *view); -/* Some GGTT VM helpers */ static inline struct i915_hw_ppgtt * i915_vm_to_ppgtt(struct i915_address_space *vm) { return container_of(vm, struct i915_hw_ppgtt, base); } +static inline struct i915_vma * +i915_gem_object_to_ggtt(struct drm_i915_gem_object *obj, + const struct i915_ggtt_view *view) +{ + return i915_gem_obj_to_vma(obj, &to_i915(obj->base.dev)->ggtt.base, view); +} -static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) +static inline unsigned long +i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o, + const struct i915_ggtt_view *view) { - return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal); + return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view)); } -unsigned long -i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj); +/* i915_gem_fence.c */ +int __must_check i915_vma_get_fence(struct i915_vma *vma); +int __must_check i915_vma_put_fence(struct i915_vma *vma); -static inline int __must_check -i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, - uint32_t alignment, - unsigned flags) +/** + * i915_vma_pin_fence - pin fencing state + * @vma: vma to pin fencing for + * + * This pins the fencing state (whether tiled or untiled) to make sure the + * vma (and its object) is ready to be used as a scanout target. Fencing + * status must be synchronize first by calling i915_vma_get_fence(): + * + * The resulting fence pin reference must be released again with + * i915_vma_unpin_fence(). + * + * Returns: + * + * True if the vma has a fence, false otherwise. + */ +static inline bool +i915_vma_pin_fence(struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - - return i915_gem_object_pin(obj, &ggtt->base, - alignment, flags | PIN_GLOBAL); + if (vma->fence) { + vma->fence->pin_count++; + return true; + } else + return false; } -void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view); +/** + * i915_vma_unpin_fence - unpin fencing state + * @vma: vma to unpin fencing for + * + * This releases the fence pin reference acquired through + * i915_vma_pin_fence. It will handle both objects with and without an + * attached fence correctly, callers do not need to distinguish this. + */ static inline void -i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj) +i915_vma_unpin_fence(struct i915_vma *vma) { - i915_gem_object_ggtt_unpin_view(obj, &i915_ggtt_view_normal); + if (vma->fence) { + GEM_BUG_ON(vma->fence->pin_count <= 0); + vma->fence->pin_count--; + } } -/* i915_gem_fence.c */ -int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj); -int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj); - -bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj); -void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj); - void i915_gem_restore_fences(struct drm_device *dev); void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); @@ -3524,10 +3393,10 @@ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj); int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_lost(struct drm_i915_private *dev_priv); void i915_gem_context_fini(struct drm_device *dev); -void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); int i915_switch_context(struct drm_i915_gem_request *req); +int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv); void i915_gem_context_free(struct kref *ctx_ref); struct drm_i915_gem_object * i915_gem_alloc_context_obj(struct drm_device *dev, size_t size); @@ -3548,12 +3417,14 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) return ctx; } -static inline void i915_gem_context_reference(struct i915_gem_context *ctx) +static inline struct i915_gem_context * +i915_gem_context_get(struct i915_gem_context *ctx) { kref_get(&ctx->ref); + return ctx; } -static inline void i915_gem_context_unreference(struct i915_gem_context *ctx) +static inline void i915_gem_context_put(struct i915_gem_context *ctx) { lockdep_assert_held(&ctx->i915->drm.struct_mutex); kref_put(&ctx->ref, i915_gem_context_free); @@ -3576,13 +3447,10 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); /* i915_gem_evict.c */ -int __must_check i915_gem_evict_something(struct drm_device *dev, - struct i915_address_space *vm, - int min_size, - unsigned alignment, +int __must_check i915_gem_evict_something(struct i915_address_space *vm, + u64 min_size, u64 alignment, unsigned cache_level, - unsigned long start, - unsigned long end, + u64 start, u64 end, unsigned flags); int __must_check i915_gem_evict_for_vma(struct i915_vma *target); int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); @@ -3590,6 +3458,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); /* belongs in i915_gem_gtt.h */ static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) { + wmb(); if (INTEL_GEN(dev_priv) < 6) intel_gtt_chipset_flush(); } @@ -3634,28 +3503,21 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec struct drm_i915_private *dev_priv = to_i915(obj->base.dev); return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && - obj->tiling_mode != I915_TILING_NONE; + i915_gem_object_is_tiled(obj); } -/* i915_gem_debug.c */ -#if WATCH_LISTS -int i915_verify_lists(struct drm_device *dev); -#else -#define i915_verify_lists(dev) 0 -#endif - /* i915_debugfs.c */ #ifdef CONFIG_DEBUG_FS int i915_debugfs_register(struct drm_i915_private *dev_priv); void i915_debugfs_unregister(struct drm_i915_private *dev_priv); int i915_debugfs_connector_add(struct drm_connector *connector); -void intel_display_crc_init(struct drm_device *dev); +void intel_display_crc_init(struct drm_i915_private *dev_priv); #else static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) {return 0;} static inline void i915_debugfs_unregister(struct drm_i915_private *dev_priv) {} static inline int i915_debugfs_connector_add(struct drm_connector *connector) { return 0; } -static inline void intel_display_crc_init(struct drm_device *dev) {} +static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {} #endif /* i915_gpu_error.c */ @@ -3684,23 +3546,23 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); /* i915_cmd_parser.c */ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); -int i915_cmd_parser_init_ring(struct intel_engine_cs *engine); -void i915_cmd_parser_fini_ring(struct intel_engine_cs *engine); -bool i915_needs_cmd_parser(struct intel_engine_cs *engine); -int i915_parse_cmds(struct intel_engine_cs *engine, - struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, - u32 batch_start_offset, - u32 batch_len, - bool is_master); +void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); +void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); +bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine); +int intel_engine_cmd_parser(struct intel_engine_cs *engine, + struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, + u32 batch_start_offset, + u32 batch_len, + bool is_master); /* i915_suspend.c */ extern int i915_save_state(struct drm_device *dev); extern int i915_restore_state(struct drm_device *dev); /* i915_sysfs.c */ -void i915_setup_sysfs(struct drm_device *dev_priv); -void i915_teardown_sysfs(struct drm_device *dev_priv); +void i915_setup_sysfs(struct drm_i915_private *dev_priv); +void i915_teardown_sysfs(struct drm_i915_private *dev_priv); /* intel_i2c.c */ extern int intel_setup_gmbus(struct drm_device *dev); @@ -3800,7 +3662,6 @@ extern void intel_set_rps(struct drm_i915_private *dev_priv, u8 val); extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); -extern bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv); int i915_reg_read_ioctl(struct drm_device *dev, void *data, struct drm_file *file); @@ -3878,9 +3739,16 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); * will be implemented using 2 32-bit writes in an arbitrary order with * an arbitrary delay between them. This can cause the hardware to * act upon the intermediate value, possibly leading to corruption and - * machine death. You have been warned. + * machine death. For this reason we do not support I915_WRITE64, or + * dev_priv->uncore.funcs.mmio_writeq. + * + * When reading a 64-bit value as two 32-bit values, the delay may cause + * the two reads to mismatch, e.g. a timestamp overflowing. Also note that + * occasionally a 64-bit register does not actualy support a full readq + * and must be read using two 32-bit reads. + * + * You have been warned. */ -#define I915_WRITE64(reg, val) dev_priv->uncore.funcs.mmio_writeq(dev_priv, (reg), (val), true) #define I915_READ64(reg) dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true) #define I915_READ64_2x32(lower_reg, upper_reg) ({ \ @@ -3923,7 +3791,7 @@ __raw_write(64, q) #undef __raw_write /* These are untraced mmio-accessors that are only valid to be used inside - * criticial sections inside IRQ handlers where forcewake is explicitly + * critical sections inside IRQ handlers where forcewake is explicitly * controlled. * Think twice, and think again, before using these. * Note: Should only be used between intel_uncore_forcewake_irqlock() and @@ -3995,7 +3863,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) schedule_timeout_uninterruptible(remaining_jiffies); } } -static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) + +static inline bool +__i915_request_irq_complete(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; @@ -4017,7 +3887,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * is woken. */ if (engine->irq_seqno_barrier && - READ_ONCE(engine->breadcrumbs.irq_seqno_bh) == current && + rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current && cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) { struct task_struct *tsk; @@ -4042,7 +3912,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * irq_posted == false but we are still running). */ rcu_read_lock(); - tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh); + tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); if (tsk && tsk != current) /* Note that if the bottom-half is changed as we * are sending the wake-up, the new bottom-half will @@ -4057,18 +3927,35 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) return true; } - /* We need to check whether any gpu reset happened in between - * the request being submitted and now. If a reset has occurred, - * the seqno will have been advance past ours and our request - * is complete. If we are in the process of handling a reset, - * the request is effectively complete as the rendering will - * be discarded, but we need to return in order to drop the - * struct_mutex. - */ - if (i915_reset_in_progress(&req->i915->gpu_error)) - return true; - return false; } +void i915_memcpy_init_early(struct drm_i915_private *dev_priv); +bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len); + +/* i915_mm.c */ +int remap_io_mapping(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size, + struct io_mapping *iomap); + +#define ptr_mask_bits(ptr) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v & PAGE_MASK); \ +}) + +#define ptr_unpack_bits(ptr, bits) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (bits) = __v & ~PAGE_MASK; \ + (typeof(ptr))(__v & PAGE_MASK); \ +}) + +#define ptr_pack_bits(ptr, bits) \ + ((typeof(ptr))((unsigned long)(ptr) | (bits))) + +#define fetch_and_zero(ptr) ({ \ + typeof(*ptr) __T = *(ptr); \ + *(ptr) = (typeof(*ptr))0; \ + __T; \ +}) + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 11681501d7b1..947e82c2b175 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,10 +29,13 @@ #include <drm/drm_vma_manager.h> #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_dmabuf.h" #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" +#include "intel_frontbuffer.h" #include "intel_mocs.h" +#include <linux/reservation.h> #include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/swap.h> @@ -41,10 +44,6 @@ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static void -i915_gem_object_retire__write(struct drm_i915_gem_object *obj); -static void -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) @@ -139,7 +138,6 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) if (ret) return ret; - WARN_ON(i915_verify_lists(dev)); return 0; } @@ -156,10 +154,10 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, pinned = 0; mutex_lock(&dev->struct_mutex); list_for_each_entry(vma, &ggtt->base.active_list, vm_link) - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) pinned += vma->node.size; list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) pinned += vma->node.size; mutex_unlock(&dev->struct_mutex); @@ -281,23 +279,129 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; -static int -drop_pages(struct drm_i915_gem_object *obj) +int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { - struct i915_vma *vma, *next; + struct i915_vma *vma; + LIST_HEAD(still_in_list); int ret; - drm_gem_object_reference(&obj->base); - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) - if (i915_vma_unbind(vma)) - break; + lockdep_assert_held(&obj->base.dev->struct_mutex); - ret = i915_gem_object_put_pages(obj); - drm_gem_object_unreference(&obj->base); + /* Closed vma are removed from the obj->vma_list - but they may + * still have an active binding on the object. To remove those we + * must wait for all rendering to complete to the object (as unbinding + * must anyway), and retire the requests. + */ + ret = i915_gem_object_wait_rendering(obj, false); + if (ret) + return ret; + + i915_gem_retire_requests(to_i915(obj->base.dev)); + + while ((vma = list_first_entry_or_null(&obj->vma_list, + struct i915_vma, + obj_link))) { + list_move_tail(&vma->obj_link, &still_in_list); + ret = i915_vma_unbind(vma); + if (ret) + break; + } + list_splice(&still_in_list, &obj->vma_list); return ret; } +/** + * Ensures that all rendering to the object has completed and the object is + * safe to unbind from the GTT or access from the CPU. + * @obj: i915 gem object + * @readonly: waiting for just read access or read-write access + */ +int +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, + bool readonly) +{ + struct reservation_object *resv; + struct i915_gem_active *active; + unsigned long active_mask; + int idx; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + + if (!readonly) { + active = obj->last_read; + active_mask = i915_gem_object_get_active(obj); + } else { + active_mask = 1; + active = &obj->last_write; + } + + for_each_active(active_mask, idx) { + int ret; + + ret = i915_gem_active_wait(&active[idx], + &obj->base.dev->struct_mutex); + if (ret) + return ret; + } + + resv = i915_gem_object_get_dmabuf_resv(obj); + if (resv) { + long err; + + err = reservation_object_wait_timeout_rcu(resv, !readonly, true, + MAX_SCHEDULE_TIMEOUT); + if (err < 0) + return err; + } + + return 0; +} + +/* A nonblocking variant of the above wait. Must be called prior to + * acquiring the mutex for the object, as the object state may change + * during this call. A reference must be held by the caller for the object. + */ +static __must_check int +__unsafe_wait_rendering(struct drm_i915_gem_object *obj, + struct intel_rps_client *rps, + bool readonly) +{ + struct i915_gem_active *active; + unsigned long active_mask; + int idx; + + active_mask = __I915_BO_ACTIVE(obj); + if (!active_mask) + return 0; + + if (!readonly) { + active = obj->last_read; + } else { + active_mask = 1; + active = &obj->last_write; + } + + for_each_active(active_mask, idx) { + int ret; + + ret = i915_gem_active_wait_unlocked(&active[idx], + I915_WAIT_INTERRUPTIBLE, + NULL, rps); + if (ret) + return ret; + } + + return 0; +} + +static struct intel_rps_client *to_rps_client(struct drm_file *file) +{ + struct drm_i915_file_private *fpriv = file->driver_priv; + + return &fpriv->rps; +} + int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) @@ -318,7 +422,11 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, if (obj->base.filp == NULL) return -EINVAL; - ret = drop_pages(obj); + ret = i915_gem_object_unbind(obj); + if (ret) + return ret; + + ret = i915_gem_object_put_pages(obj); if (ret) return ret; @@ -408,7 +516,7 @@ i915_gem_create(struct drm_file *file, ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); if (ret) return ret; @@ -502,33 +610,106 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, * flush the object from the CPU cache. */ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, - int *needs_clflush) + unsigned int *needs_clflush) { int ret; *needs_clflush = 0; - if (WARN_ON(!i915_gem_object_has_struct_page(obj))) - return -EINVAL; + if (!i915_gem_object_has_struct_page(obj)) + return -ENODEV; + + ret = i915_gem_object_wait_rendering(obj, true); + if (ret) + return ret; + + ret = i915_gem_object_get_pages(obj); + if (ret) + return ret; + + i915_gem_object_pin_pages(obj); + + i915_gem_object_flush_gtt_write_domain(obj); - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { - /* If we're not in the cpu read domain, set ourself into the gtt - * read domain and manually flush cachelines (if required). This - * optimizes for the case when the gpu will dirty the data - * anyway again before the next pread happens. */ + /* If we're not in the cpu read domain, set ourself into the gtt + * read domain and manually flush cachelines (if required). This + * optimizes for the case when the gpu will dirty the data + * anyway again before the next pread happens. + */ + if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, obj->cache_level); - ret = i915_gem_object_wait_rendering(obj, true); + + if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { + ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) - return ret; + goto err_unpin; + + *needs_clflush = 0; } + /* return with the pages pinned */ + return 0; + +err_unpin: + i915_gem_object_unpin_pages(obj); + return ret; +} + +int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, + unsigned int *needs_clflush) +{ + int ret; + + *needs_clflush = 0; + if (!i915_gem_object_has_struct_page(obj)) + return -ENODEV; + + ret = i915_gem_object_wait_rendering(obj, false); + if (ret) + return ret; + ret = i915_gem_object_get_pages(obj); if (ret) return ret; i915_gem_object_pin_pages(obj); + i915_gem_object_flush_gtt_write_domain(obj); + + /* If we're not in the cpu write domain, set ourself into the + * gtt write domain and manually flush cachelines (as required). + * This optimizes for the case when the gpu will use the data + * right away and we therefore have to clflush anyway. + */ + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) + *needs_clflush |= cpu_write_needs_clflush(obj) << 1; + + /* Same trick applies to invalidate partially written cachelines read + * before writing. + */ + if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) + *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev, + obj->cache_level); + + if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { + ret = i915_gem_object_set_to_cpu_domain(obj, true); + if (ret) + goto err_unpin; + + *needs_clflush = 0; + } + + if ((*needs_clflush & CLFLUSH_AFTER) == 0) + obj->cache_dirty = true; + + intel_fb_obj_invalidate(obj, ORIGIN_CPU); + obj->dirty = 1; + /* return with the pages pinned */ + return 0; + +err_unpin: + i915_gem_object_unpin_pages(obj); return ret; } @@ -638,14 +819,24 @@ i915_gem_gtt_pread(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_vma *vma; struct drm_mm_node node; char __user *user_data; uint64_t remain; uint64_t offset; int ret; - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); - if (ret) { + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (!IS_ERR(vma)) { + node.start = i915_ggtt_offset(vma); + node.allocated = false; + ret = i915_vma_put_fence(vma); + if (ret) { + i915_vma_unpin(vma); + vma = ERR_PTR(ret); + } + } + if (IS_ERR(vma)) { ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); if (ret) goto out; @@ -657,12 +848,6 @@ i915_gem_gtt_pread(struct drm_device *dev, } i915_gem_object_pin_pages(obj); - } else { - node.start = i915_gem_obj_ggtt_offset(obj); - node.allocated = false; - ret = i915_gem_object_put_fence(obj); - if (ret) - goto out_unpin; } ret = i915_gem_object_set_to_gtt_domain(obj, false); @@ -675,7 +860,7 @@ i915_gem_gtt_pread(struct drm_device *dev, mutex_unlock(&dev->struct_mutex); if (likely(!i915.prefault_disable)) { - ret = fault_in_multipages_writeable(user_data, remain); + ret = fault_in_pages_writeable(user_data, remain); if (ret) { mutex_lock(&dev->struct_mutex); goto out_unpin; @@ -707,7 +892,7 @@ i915_gem_gtt_pread(struct drm_device *dev, * and write to user memory which may result into page * faults, and so we cannot perform this under struct_mutex. */ - if (slow_user_access(ggtt->mappable, page_base, + if (slow_user_access(&ggtt->mappable, page_base, page_offset, user_data, page_length, false)) { ret = -EFAULT; @@ -739,7 +924,7 @@ out_unpin: i915_gem_object_unpin_pages(obj); remove_mappable_node(&node); } else { - i915_gem_object_ggtt_unpin(obj); + i915_vma_unpin(vma); } out: return ret; @@ -760,19 +945,14 @@ i915_gem_shmem_pread(struct drm_device *dev, int needs_clflush = 0; struct sg_page_iter sg_iter; - if (!i915_gem_object_has_struct_page(obj)) - return -ENODEV; - - user_data = u64_to_user_ptr(args->data_ptr); - remain = args->size; - - obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); if (ret) return ret; + obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + user_data = u64_to_user_ptr(args->data_ptr); offset = args->offset; + remain = args->size; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, offset >> PAGE_SHIFT) { @@ -803,7 +983,7 @@ i915_gem_shmem_pread(struct drm_device *dev, mutex_unlock(&dev->struct_mutex); if (likely(!i915.prefault_disable) && !prefaulted) { - ret = fault_in_multipages_writeable(user_data, remain); + ret = fault_in_pages_writeable(user_data, remain); /* Userspace is tricking us, but we've already clobbered * its pages with the prefault and promised to write the * data up to the first fault. Hence ignore any errors @@ -828,7 +1008,7 @@ next_page: } out: - i915_gem_object_unpin_pages(obj); + i915_gem_obj_finish_shmem_access(obj); return ret; } @@ -857,36 +1037,44 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, args->size)) return -EFAULT; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; /* Bounds check source. */ if (args->offset > obj->base.size || args->size > obj->base.size - args->offset) { ret = -EINVAL; - goto out; + goto err; } trace_i915_gem_object_pread(obj, args->offset, args->size); + ret = __unsafe_wait_rendering(obj, to_rps_client(file), true); + if (ret) + goto err; + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err; + ret = i915_gem_shmem_pread(dev, obj, args, file); /* pread for non shmem backed objects */ - if (ret == -EFAULT || ret == -ENODEV) + if (ret == -EFAULT || ret == -ENODEV) { + intel_runtime_pm_get(to_i915(dev)); ret = i915_gem_gtt_pread(dev, obj, args->size, args->offset, args->data_ptr); + intel_runtime_pm_put(to_i915(dev)); + } -out: - drm_gem_object_unreference(&obj->base); -unlock: + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); + + return ret; + +err: + i915_gem_object_put_unlocked(obj); return ret; } @@ -916,7 +1104,7 @@ fast_user_write(struct io_mapping *mapping, /** * This is the fast pwrite path, where we copy the data directly from the * user into the GTT, uncached. - * @dev: drm device pointer + * @i915: i915 device private data * @obj: i915 gem object * @args: pwrite arguments structure * @file: drm file pointer @@ -929,17 +1117,28 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, { struct i915_ggtt *ggtt = &i915->ggtt; struct drm_device *dev = obj->base.dev; + struct i915_vma *vma; struct drm_mm_node node; uint64_t remain, offset; char __user *user_data; int ret; bool hit_slow_path = false; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) return -EFAULT; - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); - if (ret) { + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | PIN_NONBLOCK); + if (!IS_ERR(vma)) { + node.start = i915_ggtt_offset(vma); + node.allocated = false; + ret = i915_vma_put_fence(vma); + if (ret) { + i915_vma_unpin(vma); + vma = ERR_PTR(ret); + } + } + if (IS_ERR(vma)) { ret = insert_mappable_node(i915, &node, PAGE_SIZE); if (ret) goto out; @@ -951,19 +1150,13 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, } i915_gem_object_pin_pages(obj); - } else { - node.start = i915_gem_obj_ggtt_offset(obj); - node.allocated = false; - ret = i915_gem_object_put_fence(obj); - if (ret) - goto out_unpin; } ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) goto out_unpin; - intel_fb_obj_invalidate(obj, ORIGIN_GTT); + intel_fb_obj_invalidate(obj, ORIGIN_CPU); obj->dirty = true; user_data = u64_to_user_ptr(args->data_ptr); @@ -995,11 +1188,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, * If the object is non-shmem backed, we retry again with the * path that handles page fault. */ - if (fast_user_write(ggtt->mappable, page_base, + if (fast_user_write(&ggtt->mappable, page_base, page_offset, user_data, page_length)) { hit_slow_path = true; mutex_unlock(&dev->struct_mutex); - if (slow_user_access(ggtt->mappable, + if (slow_user_access(&ggtt->mappable, page_base, page_offset, user_data, page_length, true)) { @@ -1030,7 +1223,7 @@ out_flush: } } - intel_fb_obj_flush(obj, false, ORIGIN_GTT); + intel_fb_obj_flush(obj, false, ORIGIN_CPU); out_unpin: if (node.allocated) { wmb(); @@ -1040,7 +1233,7 @@ out_unpin: i915_gem_object_unpin_pages(obj); remove_mappable_node(&node); } else { - i915_gem_object_ggtt_unpin(obj); + i915_vma_unpin(vma); } out: return ret; @@ -1123,41 +1316,17 @@ i915_gem_shmem_pwrite(struct drm_device *dev, int shmem_page_offset, page_length, ret = 0; int obj_do_bit17_swizzling, page_do_bit17_swizzling; int hit_slowpath = 0; - int needs_clflush_after = 0; - int needs_clflush_before = 0; + unsigned int needs_clflush; struct sg_page_iter sg_iter; - user_data = u64_to_user_ptr(args->data_ptr); - remain = args->size; - - obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - /* If we're not in the cpu write domain, set ourself into the gtt - * write domain and manually flush cachelines (if required). This - * optimizes for the case when the gpu will use the data - * right away and we therefore have to clflush anyway. */ - needs_clflush_after = cpu_write_needs_clflush(obj); - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; - } - /* Same trick applies to invalidate partially written cachelines read - * before writing. */ - if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) - needs_clflush_before = - !cpu_cache_is_coherent(dev, obj->cache_level); - - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); if (ret) return ret; - intel_fb_obj_invalidate(obj, ORIGIN_CPU); - - i915_gem_object_pin_pages(obj); - + obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + user_data = u64_to_user_ptr(args->data_ptr); offset = args->offset; - obj->dirty = 1; + remain = args->size; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, offset >> PAGE_SHIFT) { @@ -1181,7 +1350,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, /* If we don't overwrite a cacheline completely we need to be * careful to have up-to-date data by first clflushing. Don't * overcomplicate things and flush the entire patch. */ - partial_cacheline_write = needs_clflush_before && + partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE && ((shmem_page_offset | page_length) & (boot_cpu_data.x86_clflush_size - 1)); @@ -1191,7 +1360,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, partial_cacheline_write, - needs_clflush_after); + needs_clflush & CLFLUSH_AFTER); if (ret == 0) goto next_page; @@ -1200,7 +1369,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, partial_cacheline_write, - needs_clflush_after); + needs_clflush & CLFLUSH_AFTER); mutex_lock(&dev->struct_mutex); @@ -1214,7 +1383,7 @@ next_page: } out: - i915_gem_object_unpin_pages(obj); + i915_gem_obj_finish_shmem_access(obj); if (hit_slowpath) { /* @@ -1222,17 +1391,15 @@ out: * cachelines in-line while writing and the object moved * out of the cpu write domain while we've dropped the lock. */ - if (!needs_clflush_after && + if (!(needs_clflush & CLFLUSH_AFTER) && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { if (i915_gem_clflush_object(obj, obj->pin_display)) - needs_clflush_after = true; + needs_clflush |= CLFLUSH_AFTER; } } - if (needs_clflush_after) + if (needs_clflush & CLFLUSH_AFTER) i915_gem_chipset_flush(to_i915(dev)); - else - obj->cache_dirty = true; intel_fb_obj_flush(obj, false, ORIGIN_CPU); return ret; @@ -1264,33 +1431,35 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, return -EFAULT; if (likely(!i915.prefault_disable)) { - ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr), + ret = fault_in_pages_readable(u64_to_user_ptr(args->data_ptr), args->size); if (ret) return -EFAULT; } - intel_runtime_pm_get(dev_priv); - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto put_rpm; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; /* Bounds check destination. */ if (args->offset > obj->base.size || args->size > obj->base.size - args->offset) { ret = -EINVAL; - goto out; + goto err; } trace_i915_gem_object_pwrite(obj, args->offset, args->size); + ret = __unsafe_wait_rendering(obj, to_rps_client(file), false); + if (ret) + goto err; + + intel_runtime_pm_get(dev_priv); + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err_rpm; + ret = -EFAULT; /* We can only do the GTT pwrite on untiled buffers, as otherwise * it would end up going through the fenced access, and we'll get @@ -1306,508 +1475,31 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * textures). Fallback to the shmem path in that case. */ } - if (ret == -EFAULT) { + if (ret == -EFAULT || ret == -ENOSPC) { if (obj->phys_handle) ret = i915_gem_phys_pwrite(obj, args, file); - else if (i915_gem_object_has_struct_page(obj)) - ret = i915_gem_shmem_pwrite(dev, obj, args, file); else - ret = -ENODEV; + ret = i915_gem_shmem_pwrite(dev, obj, args, file); } -out: - drm_gem_object_unreference(&obj->base); -unlock: + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); -put_rpm: intel_runtime_pm_put(dev_priv); return ret; -} - -static int -i915_gem_check_wedge(unsigned reset_counter, bool interruptible) -{ - if (__i915_terminally_wedged(reset_counter)) - return -EIO; - - if (__i915_reset_in_progress(reset_counter)) { - /* Non-interruptible callers can't handle -EAGAIN, hence return - * -EIO unconditionally for these. */ - if (!interruptible) - return -EIO; - - return -EAGAIN; - } - - return 0; -} - -static unsigned long local_clock_us(unsigned *cpu) -{ - unsigned long t; - - /* Cheaply and approximately convert from nanoseconds to microseconds. - * The result and subsequent calculations are also defined in the same - * approximate microseconds units. The principal source of timing - * error here is from the simple truncation. - * - * Note that local_clock() is only defined wrt to the current CPU; - * the comparisons are no longer valid if we switch CPUs. Instead of - * blocking preemption for the entire busywait, we can detect the CPU - * switch and use that as indicator of system load and a reason to - * stop busywaiting, see busywait_stop(). - */ - *cpu = get_cpu(); - t = local_clock() >> 10; - put_cpu(); - - return t; -} - -static bool busywait_stop(unsigned long timeout, unsigned cpu) -{ - unsigned this_cpu; - - if (time_after(local_clock_us(&this_cpu), timeout)) - return true; - - return this_cpu != cpu; -} - -bool __i915_spin_request(const struct drm_i915_gem_request *req, - int state, unsigned long timeout_us) -{ - unsigned cpu; - - /* When waiting for high frequency requests, e.g. during synchronous - * rendering split between the CPU and GPU, the finite amount of time - * required to set up the irq and wait upon it limits the response - * rate. By busywaiting on the request completion for a short while we - * can service the high frequency waits as quick as possible. However, - * if it is a slow request, we want to sleep as quickly as possible. - * The tradeoff between waiting and sleeping is roughly the time it - * takes to sleep on a request, on the order of a microsecond. - */ - - timeout_us += local_clock_us(&cpu); - do { - if (i915_gem_request_completed(req)) - return true; - - if (signal_pending_state(state, current)) - break; - - if (busywait_stop(timeout_us, cpu)) - break; - - cpu_relax_lowlatency(); - } while (!need_resched()); - - return false; -} - -/** - * __i915_wait_request - wait until execution of request has finished - * @req: duh! - * @interruptible: do an interruptible wait (normally yes) - * @timeout: in - how long to wait (NULL forever); out - how much time remaining - * @rps: RPS client - * - * Note: It is of utmost importance that the passed in seqno and reset_counter - * values have been read by the caller in an smp safe manner. Where read-side - * locks are involved, it is sufficient to read the reset_counter before - * unlocking the lock that protects the seqno. For lockless tricks, the - * reset_counter _must_ be read before, and an appropriate smp_rmb must be - * inserted. - * - * Returns 0 if the request was found within the alloted time. Else returns the - * errno with remaining time filled in timeout argument. - */ -int __i915_wait_request(struct drm_i915_gem_request *req, - bool interruptible, - s64 *timeout, - struct intel_rps_client *rps) -{ - int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - DEFINE_WAIT(reset); - struct intel_wait wait; - unsigned long timeout_remain; - s64 before = 0; /* Only to silence a compiler warning. */ - int ret = 0; - - might_sleep(); - - if (list_empty(&req->list)) - return 0; - - if (i915_gem_request_completed(req)) - return 0; - - timeout_remain = MAX_SCHEDULE_TIMEOUT; - if (timeout) { - if (WARN_ON(*timeout < 0)) - return -EINVAL; - - if (*timeout == 0) - return -ETIME; - - timeout_remain = nsecs_to_jiffies_timeout(*timeout); - - /* - * Record current time in case interrupted by signal, or wedged. - */ - before = ktime_get_raw_ns(); - } - - trace_i915_gem_request_wait_begin(req); - - /* This client is about to stall waiting for the GPU. In many cases - * this is undesirable and limits the throughput of the system, as - * many clients cannot continue processing user input/output whilst - * blocked. RPS autotuning may take tens of milliseconds to respond - * to the GPU load and thus incurs additional latency for the client. - * We can circumvent that by promoting the GPU frequency to maximum - * before we wait. This makes the GPU throttle up much more quickly - * (good for benchmarks and user experience, e.g. window animations), - * but at a cost of spending more power processing the workload - * (bad for battery). Not all clients even want their results - * immediately and for them we should just let the GPU select its own - * frequency to maximise efficiency. To prevent a single client from - * forcing the clocks too high for the whole system, we only allow - * each client to waitboost once in a busy period. - */ - if (INTEL_INFO(req->i915)->gen >= 6) - gen6_rps_boost(req->i915, rps, req->emitted_jiffies); - - /* Optimistic spin for the next ~jiffie before touching IRQs */ - if (i915_spin_request(req, state, 5)) - goto complete; - - set_current_state(state); - add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_wait_init(&wait, req->seqno); - if (intel_engine_add_wait(req->engine, &wait)) - /* In order to check that we haven't missed the interrupt - * as we enabled it, we need to kick ourselves to do a - * coherent check on the seqno before we sleep. - */ - goto wakeup; - - for (;;) { - if (signal_pending_state(state, current)) { - ret = -ERESTARTSYS; - break; - } - - timeout_remain = io_schedule_timeout(timeout_remain); - if (timeout_remain == 0) { - ret = -ETIME; - break; - } - - if (intel_wait_complete(&wait)) - break; - - set_current_state(state); - -wakeup: - /* Carefully check if the request is complete, giving time - * for the seqno to be visible following the interrupt. - * We also have to check in case we are kicked by the GPU - * reset in order to drop the struct_mutex. - */ - if (__i915_request_irq_complete(req)) - break; - - /* Only spin if we know the GPU is processing this request */ - if (i915_spin_request(req, state, 2)) - break; - } - remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_engine_remove_wait(req->engine, &wait); - __set_current_state(TASK_RUNNING); -complete: - trace_i915_gem_request_wait_end(req); - - if (timeout) { - s64 tres = *timeout - (ktime_get_raw_ns() - before); - - *timeout = tres < 0 ? 0 : tres; - - /* - * Apparently ktime isn't accurate enough and occasionally has a - * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch - * things up to make the test happy. We allow up to 1 jiffy. - * - * This is a regrssion from the timespec->ktime conversion. - */ - if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) - *timeout = 0; - } - - if (rps && req->seqno == req->engine->last_submitted_seqno) { - /* The GPU is now idle and this client has stalled. - * Since no other client has submitted a request in the - * meantime, assume that this client is the only one - * supplying work to the GPU but is unable to keep that - * work supplied because it is waiting. Since the GPU is - * then never kept fully busy, RPS autoclocking will - * keep the clocks relatively low, causing further delays. - * Compensate by giving the synchronous client credit for - * a waitboost next time. - */ - spin_lock(&req->i915->rps.client_lock); - list_del_init(&rps->link); - spin_unlock(&req->i915->rps.client_lock); - } - - return ret; -} - -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv; - - WARN_ON(!req || !file || req->file_priv); - - if (!req || !file) - return -EINVAL; - - if (req->file_priv) - return -EINVAL; - - file_priv = file->driver_priv; - - spin_lock(&file_priv->mm.lock); - req->file_priv = file_priv; - list_add_tail(&req->client_list, &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); - - req->pid = get_pid(task_pid(current)); - - return 0; -} - -static inline void -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) -{ - struct drm_i915_file_private *file_priv = request->file_priv; - - if (!file_priv) - return; - - spin_lock(&file_priv->mm.lock); - list_del(&request->client_list); - request->file_priv = NULL; - spin_unlock(&file_priv->mm.lock); - - put_pid(request->pid); - request->pid = NULL; -} - -static void i915_gem_request_retire(struct drm_i915_gem_request *request) -{ - trace_i915_gem_request_retire(request); - - /* We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - * - * Note this requires that we are always called in request - * completion order. - */ - request->ringbuf->last_retired_head = request->postfix; - - list_del_init(&request->list); - i915_gem_request_remove_from_client(request); - - if (request->previous_context) { - if (i915.enable_execlists) - intel_lr_context_unpin(request->previous_context, - request->engine); - } - - i915_gem_context_unreference(request->ctx); - i915_gem_request_unreference(request); -} - -static void -__i915_gem_request_retire__upto(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - struct drm_i915_gem_request *tmp; - - lockdep_assert_held(&engine->i915->drm.struct_mutex); - - if (list_empty(&req->list)) - return; - - do { - tmp = list_first_entry(&engine->request_list, - typeof(*tmp), list); - - i915_gem_request_retire(tmp); - } while (tmp != req); - - WARN_ON(i915_verify_lists(engine->dev)); -} - -/** - * Waits for a request to be signaled, and cleans up the - * request and object lists appropriately for that event. - * @req: request to wait on - */ -int -i915_wait_request(struct drm_i915_gem_request *req) -{ - struct drm_i915_private *dev_priv = req->i915; - bool interruptible; - int ret; - - interruptible = dev_priv->mm.interruptible; - - BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex)); - - ret = __i915_wait_request(req, interruptible, NULL, NULL); - if (ret) - return ret; - - /* If the GPU hung, we want to keep the requests to find the guilty. */ - if (!i915_reset_in_progress(&dev_priv->gpu_error)) - __i915_gem_request_retire__upto(req); - - return 0; -} - -/** - * Ensures that all rendering to the object has completed and the object is - * safe to unbind from the GTT or access from the CPU. - * @obj: i915 gem object - * @readonly: waiting for read access or write - */ -int -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, - bool readonly) -{ - int ret, i; - - if (!obj->active) - return 0; - - if (readonly) { - if (obj->last_write_req != NULL) { - ret = i915_wait_request(obj->last_write_req); - if (ret) - return ret; - - i = obj->last_write_req->engine->id; - if (obj->last_read_req[i] == obj->last_write_req) - i915_gem_object_retire__read(obj, i); - else - i915_gem_object_retire__write(obj); - } - } else { - for (i = 0; i < I915_NUM_ENGINES; i++) { - if (obj->last_read_req[i] == NULL) - continue; - - ret = i915_wait_request(obj->last_read_req[i]); - if (ret) - return ret; - - i915_gem_object_retire__read(obj, i); - } - GEM_BUG_ON(obj->active); - } - - return 0; -} - -static void -i915_gem_object_retire_request(struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req) -{ - int ring = req->engine->id; - - if (obj->last_read_req[ring] == req) - i915_gem_object_retire__read(obj, ring); - else if (obj->last_write_req == req) - i915_gem_object_retire__write(obj); - - if (!i915_reset_in_progress(&req->i915->gpu_error)) - __i915_gem_request_retire__upto(req); -} - -/* A nonblocking variant of the above wait. This is a highly dangerous routine - * as the object state may change during this call. - */ -static __must_check int -i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, - struct intel_rps_client *rps, - bool readonly) -{ - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; - int ret, i, n = 0; - - BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - BUG_ON(!dev_priv->mm.interruptible); - - if (!obj->active) - return 0; - - if (readonly) { - struct drm_i915_gem_request *req; - - req = obj->last_write_req; - if (req == NULL) - return 0; - - requests[n++] = i915_gem_request_reference(req); - } else { - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; - - req = obj->last_read_req[i]; - if (req == NULL) - continue; - - requests[n++] = i915_gem_request_reference(req); - } - } - - mutex_unlock(&dev->struct_mutex); - ret = 0; - for (i = 0; ret == 0 && i < n; i++) - ret = __i915_wait_request(requests[i], true, NULL, rps); - mutex_lock(&dev->struct_mutex); - - for (i = 0; i < n; i++) { - if (ret == 0) - i915_gem_object_retire_request(obj, requests[i]); - i915_gem_request_unreference(requests[i]); - } +err_rpm: + intel_runtime_pm_put(dev_priv); +err: + i915_gem_object_put_unlocked(obj); return ret; } -static struct intel_rps_client *to_rps_client(struct drm_file *file) -{ - struct drm_i915_file_private *fpriv = file->driver_priv; - return &fpriv->rps; -} - -static enum fb_op_origin +static inline enum fb_op_origin write_origin(struct drm_i915_gem_object *obj, unsigned domain) { - return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ? - ORIGIN_GTT : ORIGIN_CPU; + return (domain == I915_GEM_DOMAIN_GTT ? + obj->frontbuffer_ggtt_origin : ORIGIN_CPU); } /** @@ -1828,10 +1520,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, int ret; /* Only handle setting domains to types used by the CPU. */ - if (write_domain & I915_GEM_GPU_DOMAINS) - return -EINVAL; - - if (read_domains & I915_GEM_GPU_DOMAINS) + if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) return -EINVAL; /* Having something in the write domain implies it's in the read @@ -1840,25 +1529,21 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (write_domain != 0 && read_domains != write_domain) return -EINVAL; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; /* Try to flush the object off the GPU without holding the lock. * We will repeat the flush holding the lock in the normal manner * to catch cases where we are gazumped. */ - ret = i915_gem_object_wait_rendering__nonblocking(obj, - to_rps_client(file), - !write_domain); + ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain); + if (ret) + goto err; + + ret = i915_mutex_lock_interruptible(dev); if (ret) - goto unref; + goto err; if (read_domains & I915_GEM_DOMAIN_GTT) ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); @@ -1868,11 +1553,13 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (write_domain != 0) intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); -unref: - drm_gem_object_unreference(&obj->base); -unlock: + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); return ret; + +err: + i915_gem_object_put_unlocked(obj); + return ret; } /** @@ -1887,26 +1574,23 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_sw_finish *args = data; struct drm_i915_gem_object *obj; - int ret = 0; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + int err = 0; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; /* Pinned buffers may be scanout, so flush the cache */ - if (obj->pin_display) - i915_gem_object_flush_cpu_write_domain(obj); + if (READ_ONCE(obj->pin_display)) { + err = i915_mutex_lock_interruptible(dev); + if (!err) { + i915_gem_object_flush_cpu_write_domain(obj); + mutex_unlock(&dev->struct_mutex); + } + } - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; + i915_gem_object_put_unlocked(obj); + return err; } /** @@ -1934,7 +1618,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_mmap *args = data; - struct drm_gem_object *obj; + struct drm_i915_gem_object *obj; unsigned long addr; if (args->flags & ~(I915_MMAP_WC)) @@ -1943,19 +1627,19 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) return -ENODEV; - obj = drm_gem_object_lookup(file, args->handle); - if (obj == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; /* prime objects have no backing filp to GEM mmap * pages from. */ - if (!obj->filp) { - drm_gem_object_unreference_unlocked(obj); + if (!obj->base.filp) { + i915_gem_object_put_unlocked(obj); return -EINVAL; } - addr = vm_mmap(obj->filp, 0, args->size, + addr = vm_mmap(obj->base.filp, 0, args->size, PROT_READ | PROT_WRITE, MAP_SHARED, args->offset); if (args->flags & I915_MMAP_WC) { @@ -1963,7 +1647,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct vm_area_struct *vma; if (down_write_killable(&mm->mmap_sem)) { - drm_gem_object_unreference_unlocked(obj); + i915_gem_object_put_unlocked(obj); return -EINTR; } vma = find_vma(mm, addr); @@ -1975,9 +1659,9 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, up_write(&mm->mmap_sem); /* This may race, but that's ok, it only gets set */ - WRITE_ONCE(to_intel_bo(obj)->has_wc_mmap, true); + WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); } - drm_gem_object_unreference_unlocked(obj); + i915_gem_object_put_unlocked(obj); if (IS_ERR((void *)addr)) return addr; @@ -1986,9 +1670,69 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, return 0; } +static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) +{ + u64 size; + + size = i915_gem_object_get_stride(obj); + size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8; + + return size >> PAGE_SHIFT; +} + +/** + * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps + * + * A history of the GTT mmap interface: + * + * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to + * aligned and suitable for fencing, and still fit into the available + * mappable space left by the pinned display objects. A classic problem + * we called the page-fault-of-doom where we would ping-pong between + * two objects that could not fit inside the GTT and so the memcpy + * would page one object in at the expense of the other between every + * single byte. + * + * 1 - Objects can be any size, and have any compatible fencing (X Y, or none + * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the + * object is too large for the available space (or simply too large + * for the mappable aperture!), a view is created instead and faulted + * into userspace. (This view is aligned and sized appropriately for + * fenced access.) + * + * Restrictions: + * + * * snoopable objects cannot be accessed via the GTT. It can cause machine + * hangs on some architectures, corruption on others. An attempt to service + * a GTT page fault from a snoopable object will generate a SIGBUS. + * + * * the object must be able to fit into RAM (physical memory, though no + * limited to the mappable aperture). + * + * + * Caveats: + * + * * a new GTT page fault will synchronize rendering from the GPU and flush + * all data to system memory. Subsequent access will not be synchronized. + * + * * all mappings are revoked on runtime device suspend. + * + * * there are only 8, 16 or 32 fence registers to share between all users + * (older machines require fence register for display and blitter access + * as well). Contention of the fence registers will cause the previous users + * to be unmapped and any new access will generate new page faults. + * + * * running out of memory while servicing a fault may generate a SIGBUS, + * rather than the expected SIGSEGV. + */ +int i915_gem_mmap_gtt_version(void) +{ + return 1; +} + /** * i915_gem_fault - fault a page into the GTT - * @vma: VMA in question + * @area: CPU VMA in question * @vmf: fault info * * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped @@ -2001,122 +1745,120 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, * from the GTT and/or fence registers to make room. So performance may * suffer if the GTT working set is large or there are few fence registers * left. + * + * The current feature set supported by i915_gem_fault() and thus GTT mmaps + * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). */ -int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) { - struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); +#define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */ + struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct i915_ggtt_view view = i915_ggtt_view_normal; - pgoff_t page_offset; - unsigned long pfn; - int ret = 0; bool write = !!(vmf->flags & FAULT_FLAG_WRITE); - - intel_runtime_pm_get(dev_priv); + struct i915_vma *vma; + pgoff_t page_offset; + unsigned int flags; + int ret; /* We don't use vmf->pgoff since that has the fake offset */ - page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> + page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >> PAGE_SHIFT; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto out; - trace_i915_gem_object_fault(obj, page_offset, true, write); /* Try to flush the object off the GPU first without holding the lock. - * Upon reacquiring the lock, we will perform our sanity checks and then + * Upon acquiring the lock, we will perform our sanity checks and then * repeat the flush holding the lock in the normal manner to catch cases * where we are gazumped. */ - ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); + ret = __unsafe_wait_rendering(obj, NULL, !write); if (ret) - goto unlock; + goto err; + + intel_runtime_pm_get(dev_priv); + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err_rpm; /* Access to snoopable pages through the GTT is incoherent. */ if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { ret = -EFAULT; - goto unlock; + goto err_unlock; } - /* Use a partial view if the object is bigger than the aperture. */ - if (obj->base.size >= ggtt->mappable_end && - obj->tiling_mode == I915_TILING_NONE) { - static const unsigned int chunk_size = 256; // 1 MiB + /* If the object is smaller than a couple of partial vma, it is + * not worth only creating a single partial vma - we may as well + * clear enough space for the full object. + */ + flags = PIN_MAPPABLE; + if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT) + flags |= PIN_NONBLOCK | PIN_NONFAULT; + + /* Now pin it into the GTT as needed */ + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags); + if (IS_ERR(vma)) { + struct i915_ggtt_view view; + unsigned int chunk_size; + + /* Use a partial view if it is bigger than available space */ + chunk_size = MIN_CHUNK_PAGES; + if (i915_gem_object_is_tiled(obj)) + chunk_size = max(chunk_size, tile_row_pages(obj)); memset(&view, 0, sizeof(view)); view.type = I915_GGTT_VIEW_PARTIAL; view.params.partial.offset = rounddown(page_offset, chunk_size); view.params.partial.size = - min_t(unsigned int, - chunk_size, - (vma->vm_end - vma->vm_start)/PAGE_SIZE - + min_t(unsigned int, chunk_size, + (area->vm_end - area->vm_start) / PAGE_SIZE - view.params.partial.offset); - } - /* Now pin it into the GTT if needed */ - ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); - if (ret) - goto unlock; + /* If the partial covers the entire object, just create a + * normal VMA. + */ + if (chunk_size >= obj->base.size >> PAGE_SHIFT) + view.type = I915_GGTT_VIEW_NORMAL; + + /* Userspace is now writing through an untracked VMA, abandon + * all hope that the hardware is able to track future writes. + */ + obj->frontbuffer_ggtt_origin = ORIGIN_CPU; + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + } + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unlock; + } ret = i915_gem_object_set_to_gtt_domain(obj, write); if (ret) - goto unpin; + goto err_unpin; - ret = i915_gem_object_get_fence(obj); + ret = i915_vma_get_fence(vma); if (ret) - goto unpin; + goto err_unpin; /* Finally, remap it using the new GTT offset */ - pfn = ggtt->mappable_base + - i915_gem_obj_ggtt_offset_view(obj, &view); - pfn >>= PAGE_SHIFT; - - if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { - /* Overriding existing pages in partial view does not cause - * us any trouble as TLBs are still valid because the fault - * is due to userspace losing part of the mapping or never - * having accessed it before (at this partials' range). - */ - unsigned long base = vma->vm_start + - (view.params.partial.offset << PAGE_SHIFT); - unsigned int i; - - for (i = 0; i < view.params.partial.size; i++) { - ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); - if (ret) - break; - } - - obj->fault_mappable = true; - } else { - if (!obj->fault_mappable) { - unsigned long size = min_t(unsigned long, - vma->vm_end - vma->vm_start, - obj->base.size); - int i; - - for (i = 0; i < size >> PAGE_SHIFT; i++) { - ret = vm_insert_pfn(vma, - (unsigned long)vma->vm_start + i * PAGE_SIZE, - pfn + i); - if (ret) - break; - } + ret = remap_io_mapping(area, + area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT), + (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, + min_t(u64, vma->size, area->vm_end - area->vm_start), + &ggtt->mappable); + if (ret) + goto err_unpin; - obj->fault_mappable = true; - } else - ret = vm_insert_pfn(vma, - (unsigned long)vmf->virtual_address, - pfn + page_offset); - } -unpin: - i915_gem_object_ggtt_unpin_view(obj, &view); -unlock: + obj->fault_mappable = true; +err_unpin: + __i915_vma_unpin(vma); +err_unlock: mutex_unlock(&dev->struct_mutex); -out: +err_rpm: + intel_runtime_pm_put(dev_priv); +err: switch (ret) { case -EIO: /* @@ -2157,8 +1899,6 @@ out: ret = VM_FAULT_SIGBUS; break; } - - intel_runtime_pm_put(dev_priv); return ret; } @@ -2212,46 +1952,58 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) i915_gem_release_mmap(obj); } -uint32_t -i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) +/** + * i915_gem_get_ggtt_size - return required global GTT size for an object + * @dev_priv: i915 device + * @size: object size + * @tiling_mode: tiling mode + * + * Return the required global GTT size for an object, taking into account + * potential fence register mapping. + */ +u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, + u64 size, int tiling_mode) { - uint32_t gtt_size; + u64 ggtt_size; - if (INTEL_INFO(dev)->gen >= 4 || + GEM_BUG_ON(size == 0); + + if (INTEL_GEN(dev_priv) >= 4 || tiling_mode == I915_TILING_NONE) return size; /* Previous chips need a power-of-two fence region when tiling */ - if (IS_GEN3(dev)) - gtt_size = 1024*1024; + if (IS_GEN3(dev_priv)) + ggtt_size = 1024*1024; else - gtt_size = 512*1024; + ggtt_size = 512*1024; - while (gtt_size < size) - gtt_size <<= 1; + while (ggtt_size < size) + ggtt_size <<= 1; - return gtt_size; + return ggtt_size; } /** - * i915_gem_get_gtt_alignment - return required GTT alignment for an object - * @dev: drm device + * i915_gem_get_ggtt_alignment - return required global GTT alignment + * @dev_priv: i915 device * @size: object size * @tiling_mode: tiling mode - * @fenced: is fenced alignemned required or not + * @fenced: is fenced alignment required or not * - * Return the required GTT alignment for an object, taking into account + * Return the required global GTT alignment for an object, taking into account * potential fence register mapping. */ -uint32_t -i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, - int tiling_mode, bool fenced) +u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, + int tiling_mode, bool fenced) { + GEM_BUG_ON(size == 0); + /* * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ - if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || + if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) || tiling_mode == I915_TILING_NONE) return 4096; @@ -2259,42 +2011,34 @@ i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, * Previous chips need to be aligned to the size of the smallest * fence register that can contain the object. */ - return i915_gem_get_gtt_size(dev, size, tiling_mode); + return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode); } static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - int ret; - - dev_priv->mm.shrinker_no_lock_stealing = true; + int err; - ret = drm_gem_create_mmap_offset(&obj->base); - if (ret != -ENOSPC) - goto out; + err = drm_gem_create_mmap_offset(&obj->base); + if (!err) + return 0; - /* Badly fragmented mmap space? The only way we can recover - * space is by destroying unwanted objects. We can't randomly release - * mmap_offsets as userspace expects them to be persistent for the - * lifetime of the objects. The closest we can is to release the - * offsets on purgeable objects by truncating it and marking it purged, - * which prevents userspace from ever using that object again. + /* We can idle the GPU locklessly to flush stale objects, but in order + * to claim that space for ourselves, we need to take the big + * struct_mutex to free the requests+objects and allocate our slot. */ - i915_gem_shrink(dev_priv, - obj->base.size >> PAGE_SHIFT, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE); - ret = drm_gem_create_mmap_offset(&obj->base); - if (ret != -ENOSPC) - goto out; + err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE); + if (err) + return err; - i915_gem_shrink_all(dev_priv); - ret = drm_gem_create_mmap_offset(&obj->base); -out: - dev_priv->mm.shrinker_no_lock_stealing = false; + err = i915_mutex_lock_interruptible(&dev_priv->drm); + if (!err) { + i915_gem_retire_requests(dev_priv); + err = drm_gem_create_mmap_offset(&obj->base); + mutex_unlock(&dev_priv->drm.struct_mutex); + } - return ret; + return err; } static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) @@ -2311,32 +2055,15 @@ i915_gem_mmap_gtt(struct drm_file *file, struct drm_i915_gem_object *obj; int ret; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } - - if (obj->madv != I915_MADV_WILLNEED) { - DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); - ret = -EFAULT; - goto out; - } + obj = i915_gem_object_lookup(file, handle); + if (!obj) + return -ENOENT; ret = i915_gem_object_create_mmap_offset(obj); - if (ret) - goto out; + if (ret == 0) + *offset = drm_vma_node_offset_addr(&obj->base.vma_node); - *offset = drm_vma_node_offset_addr(&obj->base.vma_node); - -out: - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); + i915_gem_object_put_unlocked(obj); return ret; } @@ -2454,7 +2181,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) if (obj->pages_pin_count) return -EBUSY; - BUG_ON(i915_gem_obj_bound_any(obj)); + GEM_BUG_ON(obj->bind_count); /* ->put_pages might need to allocate memory for the bit17 swizzle * array, hence protect them from being reaped by removing them from gtt @@ -2462,10 +2189,14 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) list_del(&obj->global_list); if (obj->mapping) { - if (is_vmalloc_addr(obj->mapping)) - vunmap(obj->mapping); + void *ptr; + + ptr = ptr_mask_bits(obj->mapping); + if (is_vmalloc_addr(ptr)) + vunmap(ptr); else - kunmap(kmap_to_page(obj->mapping)); + kunmap(kmap_to_page(ptr)); + obj->mapping = NULL; } @@ -2574,7 +2305,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj); - if (obj->tiling_mode != I915_TILING_NONE && + if (i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) i915_gem_object_pin_pages(obj); @@ -2638,7 +2369,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) } /* The 'mapping' part of i915_gem_object_pin_map() below */ -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, + enum i915_map_type type) { unsigned long n_pages = obj->base.size >> PAGE_SHIFT; struct sg_table *sgt = obj->pages; @@ -2647,10 +2379,11 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) struct page *stack_pages[32]; struct page **pages = stack_pages; unsigned long i = 0; + pgprot_t pgprot; void *addr; /* A single page can always be kmapped */ - if (n_pages == 1) + if (n_pages == 1 && type == I915_MAP_WB) return kmap(sg_page(sgt->sgl)); if (n_pages > ARRAY_SIZE(stack_pages)) { @@ -2666,7 +2399,15 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) /* Check that we have the expected number of pages */ GEM_BUG_ON(i != n_pages); - addr = vmap(pages, n_pages, 0, PAGE_KERNEL); + switch (type) { + case I915_MAP_WB: + pgprot = PAGE_KERNEL; + break; + case I915_MAP_WC: + pgprot = pgprot_writecombine(PAGE_KERNEL_IO); + break; + } + addr = vmap(pages, n_pages, 0, pgprot); if (pages != stack_pages) drm_free_large(pages); @@ -2675,276 +2416,89 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) } /* get, pin, and map the pages of the object into kernel space */ -void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) +void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, + enum i915_map_type type) { + enum i915_map_type has_type; + bool pinned; + void *ptr; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); + GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); ret = i915_gem_object_get_pages(obj); if (ret) return ERR_PTR(ret); i915_gem_object_pin_pages(obj); + pinned = obj->pages_pin_count > 1; - if (!obj->mapping) { - obj->mapping = i915_gem_object_map(obj); - if (!obj->mapping) { - i915_gem_object_unpin_pages(obj); - return ERR_PTR(-ENOMEM); + ptr = ptr_unpack_bits(obj->mapping, has_type); + if (ptr && has_type != type) { + if (pinned) { + ret = -EBUSY; + goto err; } - } - return obj->mapping; -} + if (is_vmalloc_addr(ptr)) + vunmap(ptr); + else + kunmap(kmap_to_page(ptr)); -void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req) -{ - struct drm_i915_gem_object *obj = vma->obj; - struct intel_engine_cs *engine; + ptr = obj->mapping = NULL; + } - engine = i915_gem_request_get_engine(req); + if (!ptr) { + ptr = i915_gem_object_map(obj, type); + if (!ptr) { + ret = -ENOMEM; + goto err; + } - /* Add a reference if we're newly entering the active list. */ - if (obj->active == 0) - drm_gem_object_reference(&obj->base); - obj->active |= intel_engine_flag(engine); + obj->mapping = ptr_pack_bits(ptr, type); + } - list_move_tail(&obj->engine_list[engine->id], &engine->active_list); - i915_gem_request_assign(&obj->last_read_req[engine->id], req); + return ptr; - list_move_tail(&vma->vm_link, &vma->vm->active_list); +err: + i915_gem_object_unpin_pages(obj); + return ERR_PTR(ret); } static void -i915_gem_object_retire__write(struct drm_i915_gem_object *obj) +i915_gem_object_retire__write(struct i915_gem_active *active, + struct drm_i915_gem_request *request) { - GEM_BUG_ON(obj->last_write_req == NULL); - GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); + struct drm_i915_gem_object *obj = + container_of(active, struct drm_i915_gem_object, last_write); - i915_gem_request_assign(&obj->last_write_req, NULL); intel_fb_obj_flush(obj, true, ORIGIN_CS); } static void -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) +i915_gem_object_retire__read(struct i915_gem_active *active, + struct drm_i915_gem_request *request) { - struct i915_vma *vma; - - GEM_BUG_ON(obj->last_read_req[ring] == NULL); - GEM_BUG_ON(!(obj->active & (1 << ring))); - - list_del_init(&obj->engine_list[ring]); - i915_gem_request_assign(&obj->last_read_req[ring], NULL); + int idx = request->engine->id; + struct drm_i915_gem_object *obj = + container_of(active, struct drm_i915_gem_object, last_read[idx]); - if (obj->last_write_req && obj->last_write_req->engine->id == ring) - i915_gem_object_retire__write(obj); + GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx)); - obj->active &= ~(1 << ring); - if (obj->active) + i915_gem_object_clear_active(obj, idx); + if (i915_gem_object_is_active(obj)) return; /* Bump our place on the bound list to keep it roughly in LRU order * so that we don't steal from recently used but inactive objects * (unless we are forced to ofc!) */ - list_move_tail(&obj->global_list, - &to_i915(obj->base.dev)->mm.bound_list); - - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!list_empty(&vma->vm_link)) - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - } + if (obj->bind_count) + list_move_tail(&obj->global_list, + &request->i915->mm.bound_list); - i915_gem_request_assign(&obj->last_fenced_req, NULL); - drm_gem_object_unreference(&obj->base); -} - -static int -i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) -{ - struct intel_engine_cs *engine; - int ret; - - /* Carefully retire all requests without writing to the rings */ - for_each_engine(engine, dev_priv) { - ret = intel_engine_idle(engine); - if (ret) - return ret; - } - i915_gem_retire_requests(dev_priv); - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { - while (intel_kick_waiters(dev_priv) || - intel_kick_signalers(dev_priv)) - yield(); - } - - /* Finally reset hw state */ - for_each_engine(engine, dev_priv) - intel_ring_init_seqno(engine, seqno); - - return 0; -} - -int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - ret = i915_gem_init_seqno(dev_priv, seqno - 1); - if (ret) - return ret; - - /* Carefully set the last_seqno value so that wrap - * detection still works - */ - dev_priv->next_seqno = seqno; - dev_priv->last_seqno = seqno - 1; - if (dev_priv->last_seqno == 0) - dev_priv->last_seqno--; - - return 0; -} - -int -i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) -{ - /* reserve 0 for non-seqno */ - if (dev_priv->next_seqno == 0) { - int ret = i915_gem_init_seqno(dev_priv, 0); - if (ret) - return ret; - - dev_priv->next_seqno = 1; - } - - *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; - return 0; -} - -static void i915_gem_mark_busy(const struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - dev_priv->gt.active_engines |= intel_engine_flag(engine); - if (dev_priv->gt.awake) - return; - - intel_runtime_pm_get_noresume(dev_priv); - dev_priv->gt.awake = true; - - i915_update_gfx_val(dev_priv); - if (INTEL_GEN(dev_priv) >= 6) - gen6_rps_busy(dev_priv); - - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - -/* - * NB: This function is not allowed to fail. Doing so would mean the the - * request is not being tracked for completion but the work itself is - * going to happen on the hardware. This would be a Bad Thing(tm). - */ -void __i915_add_request(struct drm_i915_gem_request *request, - struct drm_i915_gem_object *obj, - bool flush_caches) -{ - struct intel_engine_cs *engine; - struct intel_ringbuffer *ringbuf; - u32 request_start; - u32 reserved_tail; - int ret; - - if (WARN_ON(request == NULL)) - return; - - engine = request->engine; - ringbuf = request->ringbuf; - - /* - * To ensure that this call will not fail, space for its emissions - * should already have been reserved in the ring buffer. Let the ring - * know that it is time to use that space up. - */ - request_start = intel_ring_get_tail(ringbuf); - reserved_tail = request->reserved_space; - request->reserved_space = 0; - - /* - * Emit any outstanding flushes - execbuf can fail to emit the flush - * after having emitted the batchbuffer command. Hence we need to fix - * things up similar to emitting the lazy request. The difference here - * is that the flush _must_ happen before the next request, no matter - * what. - */ - if (flush_caches) { - if (i915.enable_execlists) - ret = logical_ring_flush_all_caches(request); - else - ret = intel_ring_flush_all_caches(request); - /* Not allowed to fail! */ - WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); - } - - trace_i915_gem_request_add(request); - - request->head = request_start; - - /* Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when this - * request is retired will the the batch_obj be moved onto the - * inactive_list and lose its active reference. Hence we do not need - * to explicitly hold another reference here. - */ - request->batch_obj = obj; - - /* Seal the request and mark it as pending execution. Note that - * we may inspect this state, without holding any locks, during - * hangcheck. Hence we apply the barrier to ensure that we do not - * see a more recent value in the hws than we are tracking. - */ - request->emitted_jiffies = jiffies; - request->previous_seqno = engine->last_submitted_seqno; - smp_store_mb(engine->last_submitted_seqno, request->seqno); - list_add_tail(&request->list, &engine->request_list); - - /* Record the position of the start of the request so that - * should we detect the updated seqno part-way through the - * GPU processing the request, we never over-estimate the - * position of the head. - */ - request->postfix = intel_ring_get_tail(ringbuf); - - if (i915.enable_execlists) - ret = engine->emit_request(request); - else { - ret = engine->add_request(request); - - request->tail = intel_ring_get_tail(ringbuf); - } - /* Not allowed to fail! */ - WARN(ret, "emit|add_request failed: %d!\n", ret); - /* Sanity check that the reserved size was large enough. */ - ret = intel_ring_get_tail(ringbuf) - request_start; - if (ret < 0) - ret += ringbuf->size; - WARN_ONCE(ret > reserved_tail, - "Not enough space reserved (%d bytes) " - "for adding the request (%d bytes)\n", - reserved_tail, ret); - - i915_gem_mark_busy(engine); + i915_gem_object_put(obj); } static bool i915_context_is_banned(const struct i915_gem_context *ctx) @@ -2978,101 +2532,6 @@ static void i915_set_reset_status(struct i915_gem_context *ctx, } } -void i915_gem_request_free(struct kref *req_ref) -{ - struct drm_i915_gem_request *req = container_of(req_ref, - typeof(*req), ref); - kmem_cache_free(req->i915->requests, req); -} - -static inline int -__i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx, - struct drm_i915_gem_request **req_out) -{ - struct drm_i915_private *dev_priv = engine->i915; - unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); - struct drm_i915_gem_request *req; - int ret; - - if (!req_out) - return -EINVAL; - - *req_out = NULL; - - /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex - * and restart. - */ - ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); - if (ret) - return ret; - - req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); - if (req == NULL) - return -ENOMEM; - - ret = i915_gem_get_seqno(engine->i915, &req->seqno); - if (ret) - goto err; - - kref_init(&req->ref); - req->i915 = dev_priv; - req->engine = engine; - req->ctx = ctx; - i915_gem_context_reference(req->ctx); - - /* - * Reserve space in the ring buffer for all the commands required to - * eventually emit this request. This is to guarantee that the - * i915_add_request() call can't fail. Note that the reserve may need - * to be redone if the request is not actually submitted straight - * away, e.g. because a GPU scheduler has deferred it. - */ - req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; - - if (i915.enable_execlists) - ret = intel_logical_ring_alloc_request_extras(req); - else - ret = intel_ring_alloc_request_extras(req); - if (ret) - goto err_ctx; - - *req_out = req; - return 0; - -err_ctx: - i915_gem_context_unreference(ctx); -err: - kmem_cache_free(dev_priv->requests, req); - return ret; -} - -/** - * i915_gem_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * This can be NULL if the request is not directly related to - * any specific user context, in which case this function will - * choose an appropriate context to use. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ -struct drm_i915_gem_request * -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) -{ - struct drm_i915_gem_request *req; - int err; - - if (ctx == NULL) - ctx = engine->i915->kernel_context; - err = __i915_gem_request_alloc(engine, ctx, &req); - return err ? ERR_PTR(err) : req; -} - struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { @@ -3086,182 +2545,143 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - list_for_each_entry(request, &engine->request_list, list) { + list_for_each_entry(request, &engine->request_list, link) { if (i915_gem_request_completed(request)) continue; + if (!i915_sw_fence_done(&request->submit)) + break; + return request; } return NULL; } -static void i915_gem_reset_engine_status(struct intel_engine_cs *engine) +static void reset_request(struct drm_i915_gem_request *request) +{ + void *vaddr = request->ring->vaddr; + u32 head; + + /* As this request likely depends on state from the lost + * context, clear out all the user operations leaving the + * breadcrumb at the end (so we get the fence notifications). + */ + head = request->head; + if (request->postfix < head) { + memset(vaddr + head, 0, request->ring->size - head); + head = 0; + } + memset(vaddr + head, 0, request->postfix - head); +} + +static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; + struct i915_gem_context *incomplete_ctx; bool ring_hung; + /* Ensure irq handler finishes, and not run again. */ + tasklet_kill(&engine->irq_tasklet); + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + request = i915_gem_find_active_request(engine); - if (request == NULL) + if (!request) return; ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; - i915_set_reset_status(request->ctx, ring_hung); - list_for_each_entry_continue(request, &engine->request_list, list) - i915_set_reset_status(request->ctx, false); -} - -static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) -{ - struct intel_ringbuffer *buffer; - - while (!list_empty(&engine->active_list)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(&engine->active_list, - struct drm_i915_gem_object, - engine_list[engine->id]); - - i915_gem_object_retire__read(obj, engine->id); - } - - /* - * Clear the execlists queue up before freeing the requests, as those - * are the ones that keep the context and ringbuffer backing objects - * pinned in place. - */ - - if (i915.enable_execlists) { - /* Ensure irq handler finishes or is cancelled. */ - tasklet_kill(&engine->irq_tasklet); - - intel_execlists_cancel_requests(engine); - } - - /* - * We must free the requests after all the corresponding objects have - * been moved off active lists. Which is the same order as the normal - * retire_requests function does. This is important if object hold - * implicit references on things like e.g. ppgtt address spaces through - * the request. - */ - while (!list_empty(&engine->request_list)) { - struct drm_i915_gem_request *request; + if (!ring_hung) + return; - request = list_first_entry(&engine->request_list, - struct drm_i915_gem_request, - list); + DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", + engine->name, request->fence.seqno); - i915_gem_request_retire(request); - } + /* Setup the CS to resume from the breadcrumb of the hung request */ + engine->reset_hw(engine, request); - /* Having flushed all requests from all queues, we know that all - * ringbuffers must now be empty. However, since we do not reclaim - * all space when retiring the request (to prevent HEADs colliding - * with rapid ringbuffer wraparound) the amount of available space - * upon reset is less than when we start. Do one more pass over - * all the ringbuffers to reset last_retired_head. + /* Users of the default context do not rely on logical state + * preserved between batches. They have to emit full state on + * every batch and so it is safe to execute queued requests following + * the hang. + * + * Other contexts preserve state, now corrupt. We want to skip all + * queued requests that reference the corrupt context. */ - list_for_each_entry(buffer, &engine->buffers, link) { - buffer->last_retired_head = buffer->tail; - intel_ring_update_space(buffer); - } + incomplete_ctx = request->ctx; + if (i915_gem_context_is_default(incomplete_ctx)) + return; - intel_ring_init_seqno(engine, engine->last_submitted_seqno); + list_for_each_entry_continue(request, &engine->request_list, link) + if (request->ctx == incomplete_ctx) + reset_request(request); } -void i915_gem_reset(struct drm_device *dev) +void i915_gem_reset(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_engine_cs *engine; - /* - * Before we free the objects from the requests, we need to inspect - * them for finding the guilty party. As the requests only borrow - * their reference to the objects, the inspection must be done first. - */ - for_each_engine(engine, dev_priv) - i915_gem_reset_engine_status(engine); + i915_gem_retire_requests(dev_priv); for_each_engine(engine, dev_priv) - i915_gem_reset_engine_cleanup(engine); + i915_gem_reset_engine(engine); - i915_gem_context_reset(dev); + i915_gem_restore_fences(&dev_priv->drm); - i915_gem_restore_fences(dev); + if (dev_priv->gt.awake) { + intel_sanitize_gt_powersave(dev_priv); + intel_enable_gt_powersave(dev_priv); + if (INTEL_GEN(dev_priv) >= 6) + gen6_rps_busy(dev_priv); + } +} - WARN_ON(i915_verify_lists(dev)); +static void nop_submit_request(struct drm_i915_gem_request *request) +{ } -/** - * This function clears the request list as sequence numbers are passed. - * @engine: engine to retire requests on - */ -void -i915_gem_retire_requests_ring(struct intel_engine_cs *engine) +static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) { - WARN_ON(i915_verify_lists(engine->dev)); + engine->submit_request = nop_submit_request; - /* Retire requests first as we use it above for the early return. - * If we retire requests last, we may use a later seqno and so clear - * the requests lists without clearing the active list, leading to - * confusion. + /* Mark all pending requests as complete so that any concurrent + * (lockless) lookup doesn't try and wait upon the request as we + * reset it. */ - while (!list_empty(&engine->request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&engine->request_list, - struct drm_i915_gem_request, - list); + intel_engine_init_seqno(engine, engine->last_submitted_seqno); - if (!i915_gem_request_completed(request)) - break; - - i915_gem_request_retire(request); - } - - /* Move any buffers on the active list that are no longer referenced - * by the ringbuffer to the flushing/inactive lists as appropriate, - * before we free the context associated with the requests. + /* + * Clear the execlists queue up before freeing the requests, as those + * are the ones that keep the context and ringbuffer backing objects + * pinned in place. */ - while (!list_empty(&engine->active_list)) { - struct drm_i915_gem_object *obj; - obj = list_first_entry(&engine->active_list, - struct drm_i915_gem_object, - engine_list[engine->id]); - - if (!list_empty(&obj->last_read_req[engine->id]->list)) - break; - - i915_gem_object_retire__read(obj, engine->id); + if (i915.enable_execlists) { + spin_lock(&engine->execlist_lock); + INIT_LIST_HEAD(&engine->execlist_queue); + i915_gem_request_put(engine->execlist_port[0].request); + i915_gem_request_put(engine->execlist_port[1].request); + memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); + spin_unlock(&engine->execlist_lock); } - WARN_ON(i915_verify_lists(engine->dev)); + engine->i915->gt.active_engines &= ~intel_engine_flag(engine); } -void i915_gem_retire_requests(struct drm_i915_private *dev_priv) +void i915_gem_set_wedged(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; lockdep_assert_held(&dev_priv->drm.struct_mutex); + set_bit(I915_WEDGED, &dev_priv->gpu_error.flags); - if (dev_priv->gt.active_engines == 0) - return; - - GEM_BUG_ON(!dev_priv->gt.awake); - - for_each_engine(engine, dev_priv) { - i915_gem_retire_requests_ring(engine); - if (list_empty(&engine->request_list)) - dev_priv->gt.active_engines &= ~intel_engine_flag(engine); - } + i915_gem_context_lost(dev_priv); + for_each_engine(engine, dev_priv) + i915_gem_cleanup_engine(engine); + mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); - if (dev_priv->gt.active_engines == 0) - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.idle_work, - msecs_to_jiffies(100)); + i915_gem_retire_requests(dev_priv); } static void @@ -3281,10 +2701,12 @@ i915_gem_retire_work_handler(struct work_struct *work) * We do not need to do this test under locking as in the worst-case * we queue the retire worker once too often. */ - if (READ_ONCE(dev_priv->gt.awake)) + if (READ_ONCE(dev_priv->gt.awake)) { + i915_queue_hangcheck(dev_priv); queue_delayed_work(dev_priv->wq, &dev_priv->gt.retire_work, round_jiffies_up_relative(HZ)); + } } static void @@ -3294,7 +2716,6 @@ i915_gem_idle_work_handler(struct work_struct *work) container_of(work, typeof(*dev_priv), gt.idle_work.work); struct drm_device *dev = &dev_priv->drm; struct intel_engine_cs *engine; - unsigned int stuck_engines; bool rearm_hangcheck; if (!READ_ONCE(dev_priv->gt.awake)) @@ -3324,12 +2745,6 @@ i915_gem_idle_work_handler(struct work_struct *work) dev_priv->gt.awake = false; rearm_hangcheck = false; - stuck_engines = intel_kick_waiters(dev_priv); - if (unlikely(stuck_engines)) { - DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n"); - dev_priv->gpu_error.missed_irq_rings |= stuck_engines; - } - if (INTEL_GEN(dev_priv) >= 6) gen6_rps_idle(dev_priv); intel_runtime_pm_put(dev_priv); @@ -3343,32 +2758,17 @@ out_rearm: } } -/** - * Ensures that an object will eventually get non-busy by flushing any required - * write domains, emitting any outstanding lazy request and retiring and - * completed requests. - * @obj: object to flush - */ -static int -i915_gem_object_flush_active(struct drm_i915_gem_object *obj) +void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { - int i; - - if (!obj->active) - return 0; - - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; - - req = obj->last_read_req[i]; - if (req == NULL) - continue; - - if (i915_gem_request_completed(req)) - i915_gem_object_retire__read(obj, i); - } + struct drm_i915_gem_object *obj = to_intel_bo(gem); + struct drm_i915_file_private *fpriv = file->driver_priv; + struct i915_vma *vma, *vn; - return 0; + mutex_lock(&obj->base.dev->struct_mutex); + list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) + if (vma->vm->file == fpriv) + i915_vma_close(vma); + mutex_unlock(&obj->base.dev->struct_mutex); } /** @@ -3399,219 +2799,35 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_wait *args = data; + struct intel_rps_client *rps = to_rps_client(file); struct drm_i915_gem_object *obj; - struct drm_i915_gem_request *req[I915_NUM_ENGINES]; - int i, n = 0; - int ret; + unsigned long active; + int idx, ret = 0; if (args->flags != 0) return -EINVAL; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle)); - if (&obj->base == NULL) { - mutex_unlock(&dev->struct_mutex); + obj = i915_gem_object_lookup(file, args->bo_handle); + if (!obj) return -ENOENT; - } - - /* Need to make sure the object gets inactive eventually. */ - ret = i915_gem_object_flush_active(obj); - if (ret) - goto out; - - if (!obj->active) - goto out; - - /* Do this after OLR check to make sure we make forward progress polling - * on this IOCTL with a timeout == 0 (like busy ioctl) - */ - if (args->timeout_ns == 0) { - ret = -ETIME; - goto out; - } - drm_gem_object_unreference(&obj->base); - - for (i = 0; i < I915_NUM_ENGINES; i++) { - if (obj->last_read_req[i] == NULL) - continue; - - req[n++] = i915_gem_request_reference(obj->last_read_req[i]); - } - - mutex_unlock(&dev->struct_mutex); - - for (i = 0; i < n; i++) { - if (ret == 0) - ret = __i915_wait_request(req[i], true, - args->timeout_ns > 0 ? &args->timeout_ns : NULL, - to_rps_client(file)); - i915_gem_request_unreference(req[i]); - } - return ret; - -out: - drm_gem_object_unreference(&obj->base); - mutex_unlock(&dev->struct_mutex); - return ret; -} - -static int -__i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, - struct drm_i915_gem_request *from_req, - struct drm_i915_gem_request **to_req) -{ - struct intel_engine_cs *from; - int ret; - - from = i915_gem_request_get_engine(from_req); - if (to == from) - return 0; - - if (i915_gem_request_completed(from_req)) - return 0; - - if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - ret = __i915_wait_request(from_req, - i915->mm.interruptible, - NULL, - &i915->rps.semaphores); - if (ret) - return ret; - - i915_gem_object_retire_request(obj, from_req); - } else { - int idx = intel_ring_sync_index(from, to); - u32 seqno = i915_gem_request_get_seqno(from_req); - - WARN_ON(!to_req); - - if (seqno <= from->semaphore.sync_seqno[idx]) - return 0; - - if (*to_req == NULL) { - struct drm_i915_gem_request *req; - - req = i915_gem_request_alloc(to, NULL); - if (IS_ERR(req)) - return PTR_ERR(req); - - *to_req = req; - } - - trace_i915_gem_ring_sync_to(*to_req, from, from_req); - ret = to->semaphore.sync_to(*to_req, from, seqno); - if (ret) - return ret; - - /* We use last_read_req because sync_to() - * might have just caused seqno wrap under - * the radar. - */ - from->semaphore.sync_seqno[idx] = - i915_gem_request_get_seqno(obj->last_read_req[from->id]); - } - - return 0; -} - -/** - * i915_gem_object_sync - sync an object to a ring. - * - * @obj: object which may be in use on another ring. - * @to: ring we wish to use the object on. May be NULL. - * @to_req: request we wish to use the object for. See below. - * This will be allocated and returned if a request is - * required but not passed in. - * - * This code is meant to abstract object synchronization with the GPU. - * Calling with NULL implies synchronizing the object with the CPU - * rather than a particular GPU ring. Conceptually we serialise writes - * between engines inside the GPU. We only allow one engine to write - * into a buffer at any time, but multiple readers. To ensure each has - * a coherent view of memory, we must: - * - * - If there is an outstanding write request to the object, the new - * request must wait for it to complete (either CPU or in hw, requests - * on the same ring will be naturally ordered). - * - * - If we are a write request (pending_write_domain is set), the new - * request must wait for outstanding read requests to complete. - * - * For CPU synchronisation (NULL to) no request is required. For syncing with - * rings to_req must be non-NULL. However, a request does not have to be - * pre-allocated. If *to_req is NULL and sync commands will be emitted then a - * request will be allocated automatically and returned through *to_req. Note - * that it is not guaranteed that commands will be emitted (because the system - * might already be idle). Hence there is no need to create a request that - * might never have any work submitted. Note further that if a request is - * returned in *to_req, it is the responsibility of the caller to submit - * that request (after potentially adding more work to it). - * - * Returns 0 if successful, else propagates up the lower layer error. - */ -int -i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, - struct drm_i915_gem_request **to_req) -{ - const bool readonly = obj->base.pending_write_domain == 0; - struct drm_i915_gem_request *req[I915_NUM_ENGINES]; - int ret, i, n; - - if (!obj->active) - return 0; - - if (to == NULL) - return i915_gem_object_wait_rendering(obj, readonly); - - n = 0; - if (readonly) { - if (obj->last_write_req) - req[n++] = obj->last_write_req; - } else { - for (i = 0; i < I915_NUM_ENGINES; i++) - if (obj->last_read_req[i]) - req[n++] = obj->last_read_req[i]; - } - for (i = 0; i < n; i++) { - ret = __i915_gem_object_sync(obj, to, req[i], to_req); + active = __I915_BO_ACTIVE(obj); + for_each_active(active, idx) { + s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL; + ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], + I915_WAIT_INTERRUPTIBLE, + timeout, rps); if (ret) - return ret; + break; } - return 0; -} - -static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) -{ - u32 old_write_domain, old_read_domains; - - /* Force a pagefault for domain tracking on next user access */ - i915_gem_release_mmap(obj); - - if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) - return; - - old_read_domains = obj->base.read_domains; - old_write_domain = obj->base.write_domain; - - obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; - obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; - - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); + i915_gem_object_put_unlocked(obj); + return ret; } static void __i915_vma_iounmap(struct i915_vma *vma) { - GEM_BUG_ON(vma->pin_count); + GEM_BUG_ON(i915_vma_is_pinned(vma)); if (vma->iomap == NULL) return; @@ -3620,65 +2836,83 @@ static void __i915_vma_iounmap(struct i915_vma *vma) vma->iomap = NULL; } -static int __i915_vma_unbind(struct i915_vma *vma, bool wait) +int i915_vma_unbind(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + unsigned long active; int ret; - if (list_empty(&vma->obj_link)) - return 0; - - if (!drm_mm_node_allocated(&vma->node)) { - i915_gem_vma_destroy(vma); - return 0; - } - - if (vma->pin_count) - return -EBUSY; + /* First wait upon any activity as retiring the request may + * have side-effects such as unpinning or even unbinding this vma. + */ + active = i915_vma_get_active(vma); + if (active) { + int idx; + + /* When a closed VMA is retired, it is unbound - eek. + * In order to prevent it from being recursively closed, + * take a pin on the vma so that the second unbind is + * aborted. + */ + __i915_vma_pin(vma); - BUG_ON(obj->pages == NULL); + for_each_active(active, idx) { + ret = i915_gem_active_retire(&vma->last_read[idx], + &vma->vm->dev->struct_mutex); + if (ret) + break; + } - if (wait) { - ret = i915_gem_object_wait_rendering(obj, false); + __i915_vma_unpin(vma); if (ret) return ret; + + GEM_BUG_ON(i915_vma_is_active(vma)); } - if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { - i915_gem_object_finish_gtt(obj); + if (i915_vma_is_pinned(vma)) + return -EBUSY; + + if (!drm_mm_node_allocated(&vma->node)) + goto destroy; + + GEM_BUG_ON(obj->bind_count == 0); + GEM_BUG_ON(!obj->pages); + if (i915_vma_is_map_and_fenceable(vma)) { /* release the fence reg _after_ flushing */ - ret = i915_gem_object_put_fence(obj); + ret = i915_vma_put_fence(vma); if (ret) return ret; + /* Force a pagefault for domain tracking on next user access */ + i915_gem_release_mmap(obj); + __i915_vma_iounmap(vma); + vma->flags &= ~I915_VMA_CAN_FENCE; } - trace_i915_vma_unbind(vma); - - vma->vm->unbind_vma(vma); - vma->bound = 0; - - list_del_init(&vma->vm_link); - if (vma->is_ggtt) { - if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { - obj->map_and_fenceable = false; - } else if (vma->ggtt_view.pages) { - sg_free_table(vma->ggtt_view.pages); - kfree(vma->ggtt_view.pages); - } - vma->ggtt_view.pages = NULL; + if (likely(!vma->vm->closed)) { + trace_i915_vma_unbind(vma); + vma->vm->unbind_vma(vma); } + vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); drm_mm_remove_node(&vma->node); - i915_gem_vma_destroy(vma); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + + if (vma->pages != obj->pages) { + GEM_BUG_ON(!vma->pages); + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ - if (list_empty(&obj->vma_list)) - list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); + if (--obj->bind_count == 0) + list_move_tail(&obj->global_list, + &to_i915(obj->base.dev)->mm.unbound_list); /* And finally now the object is completely decoupled from this vma, * we can drop its hold on the backing storage and allow it to be @@ -3686,36 +2920,28 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) */ i915_gem_object_unpin_pages(obj); - return 0; -} +destroy: + if (unlikely(i915_vma_is_closed(vma))) + i915_vma_destroy(vma); -int i915_vma_unbind(struct i915_vma *vma) -{ - return __i915_vma_unbind(vma, true); -} - -int __i915_vma_unbind_no_wait(struct i915_vma *vma) -{ - return __i915_vma_unbind(vma, false); + return 0; } -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv) +int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, + unsigned int flags) { struct intel_engine_cs *engine; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - for_each_engine(engine, dev_priv) { if (engine->last_context == NULL) continue; - ret = intel_engine_idle(engine); + ret = intel_engine_idle(engine, flags); if (ret) return ret; } - WARN_ON(i915_verify_lists(dev)); return 0; } @@ -3753,128 +2979,87 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma, } /** - * Finds free space in the GTT aperture and binds the object or a view of it - * there. - * @obj: object to bind - * @vm: address space to bind into - * @ggtt_view: global gtt view if applicable - * @alignment: requested alignment + * i915_vma_insert - finds a slot for the vma in its address space + * @vma: the vma + * @size: requested size in bytes (can be larger than the VMA) + * @alignment: required alignment * @flags: mask of PIN_* flags to use + * + * First we try to allocate some free space that meets the requirements for + * the VMA. Failiing that, if the flags permit, it will evict an old VMA, + * preferrably the oldest idle entry to make room for the new VMA. + * + * Returns: + * 0 on success, negative error code otherwise. */ -static struct i915_vma * -i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - unsigned alignment, - uint64_t flags) +static int +i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - u32 fence_alignment, unfenced_alignment; - u32 search_flag, alloc_flag; + struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); + struct drm_i915_gem_object *obj = vma->obj; u64 start, end; - u64 size, fence_size; - struct i915_vma *vma; int ret; - if (i915_is_ggtt(vm)) { - u32 view_size; - - if (WARN_ON(!ggtt_view)) - return ERR_PTR(-EINVAL); - - view_size = i915_ggtt_view_size(obj, ggtt_view); - - fence_size = i915_gem_get_gtt_size(dev, - view_size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(dev, - view_size, - obj->tiling_mode, - true); - unfenced_alignment = i915_gem_get_gtt_alignment(dev, - view_size, - obj->tiling_mode, - false); - size = flags & PIN_MAPPABLE ? fence_size : view_size; - } else { - fence_size = i915_gem_get_gtt_size(dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, - true); - unfenced_alignment = - i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, - false); - size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; - } + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + + size = max(size, vma->size); + if (flags & PIN_MAPPABLE) + size = i915_gem_get_ggtt_size(dev_priv, size, + i915_gem_object_get_tiling(obj)); + + alignment = max(max(alignment, vma->display_alignment), + i915_gem_get_ggtt_alignment(dev_priv, size, + i915_gem_object_get_tiling(obj), + flags & PIN_MAPPABLE)); start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; - end = vm->total; + + end = vma->vm->total; if (flags & PIN_MAPPABLE) - end = min_t(u64, end, ggtt->mappable_end); + end = min_t(u64, end, dev_priv->ggtt.mappable_end); if (flags & PIN_ZONE_4G) end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); - if (alignment == 0) - alignment = flags & PIN_MAPPABLE ? fence_alignment : - unfenced_alignment; - if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { - DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", - ggtt_view ? ggtt_view->type : 0, - alignment); - return ERR_PTR(-EINVAL); - } - /* If binding the object/GGTT view requires more space than the entire * aperture has, reject it early before evicting everything in a vain * attempt to find space. */ if (size > end) { - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", - ggtt_view ? ggtt_view->type : 0, - size, + DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", + size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); - return ERR_PTR(-E2BIG); + return -E2BIG; } ret = i915_gem_object_get_pages(obj); if (ret) - return ERR_PTR(ret); + return ret; i915_gem_object_pin_pages(obj); - vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : - i915_gem_obj_lookup_or_create_vma(obj, vm); - - if (IS_ERR(vma)) - goto err_unpin; - if (flags & PIN_OFFSET_FIXED) { - uint64_t offset = flags & PIN_OFFSET_MASK; - - if (offset & (alignment - 1) || offset + size > end) { + u64 offset = flags & PIN_OFFSET_MASK; + if (offset & (alignment - 1) || offset > end - size) { ret = -EINVAL; - goto err_free_vma; + goto err_unpin; } + vma->node.start = offset; vma->node.size = size; vma->node.color = obj->cache_level; - ret = drm_mm_reserve_node(&vm->mm, &vma->node); + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); if (ret) { ret = i915_gem_evict_for_vma(vma); if (ret == 0) - ret = drm_mm_reserve_node(&vm->mm, &vma->node); + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); + if (ret) + goto err_unpin; } - if (ret) - goto err_free_vma; } else { + u32 search_flag, alloc_flag; + if (flags & PIN_HIGH) { search_flag = DRM_MM_SEARCH_BELOW; alloc_flag = DRM_MM_CREATE_TOP; @@ -3883,47 +3068,45 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, alloc_flag = DRM_MM_CREATE_DEFAULT; } + /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, + * so we know that we always have a minimum alignment of 4096. + * The drm_mm range manager is optimised to return results + * with zero alignment, so where possible use the optimal + * path. + */ + if (alignment <= 4096) + alignment = 0; + search_free: - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, + ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, + &vma->node, size, alignment, obj->cache_level, start, end, search_flag, alloc_flag); if (ret) { - ret = i915_gem_evict_something(dev, vm, size, alignment, + ret = i915_gem_evict_something(vma->vm, size, alignment, obj->cache_level, start, end, flags); if (ret == 0) goto search_free; - goto err_free_vma; + goto err_unpin; } } - if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { - ret = -EINVAL; - goto err_remove_node; - } - - trace_i915_vma_bind(vma, flags); - ret = i915_vma_bind(vma, obj->cache_level, flags); - if (ret) - goto err_remove_node; + GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); - list_add_tail(&vma->vm_link, &vm->inactive_list); + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + obj->bind_count++; - return vma; + return 0; -err_remove_node: - drm_mm_remove_node(&vma->node); -err_free_vma: - i915_gem_vma_destroy(vma); - vma = ERR_PTR(ret); err_unpin: i915_gem_object_unpin_pages(obj); - return vma; + return ret; } bool @@ -3968,51 +3151,72 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) { - uint32_t old_write_domain; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) return; /* No actual flushing is required for the GTT write domain. Writes - * to it immediately go to main memory as far as we know, so there's + * to it "immediately" go to main memory as far as we know, so there's * no chipset flush. It also doesn't land in render cache. * * However, we do have to enforce the order so that all writes through * the GTT land before any writes to the device, such as updates to * the GATT itself. + * + * We also have to wait a bit for the writes to land from the GTT. + * An uncached read (i.e. mmio) seems to be ideal for the round-trip + * timing. This issue has only been observed when switching quickly + * between GTT writes and CPU reads from inside the kernel on recent hw, + * and it appears to only affect discrete GTT blocks (i.e. on LLC + * system agents we cannot reproduce this behaviour). */ wmb(); + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) + POSTING_READ(RING_ACTHD(dev_priv->engine[RCS].mmio_base)); - old_write_domain = obj->base.write_domain; - obj->base.write_domain = 0; - - intel_fb_obj_flush(obj, false, ORIGIN_GTT); + intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT)); + obj->base.write_domain = 0; trace_i915_gem_object_change_domain(obj, obj->base.read_domains, - old_write_domain); + I915_GEM_DOMAIN_GTT); } /** Flushes the CPU write domain for the object if it's dirty. */ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) { - uint32_t old_write_domain; - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; if (i915_gem_clflush_object(obj, obj->pin_display)) i915_gem_chipset_flush(to_i915(obj->base.dev)); - old_write_domain = obj->base.write_domain; - obj->base.write_domain = 0; - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + obj->base.write_domain = 0; trace_i915_gem_object_change_domain(obj, obj->base.read_domains, - old_write_domain); + I915_GEM_DOMAIN_CPU); +} + +static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!i915_vma_is_ggtt(vma)) + continue; + + if (i915_vma_is_active(vma)) + continue; + + if (!drm_mm_node_allocated(&vma->node)) + continue; + + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + } } /** @@ -4026,20 +3230,16 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t old_write_domain, old_read_domains; - struct i915_vma *vma; int ret; - if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) - return 0; - ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; + if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) + return 0; + /* Flush and acquire obj->pages so that we are coherent through * direct access in memory with previous cached writes through * shmemfs and that our cache domain tracking remains valid. @@ -4080,10 +3280,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) old_write_domain); /* And bump the LRU for this access */ - vma = i915_gem_obj_to_ggtt(obj); - if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) - list_move_tail(&vma->vm_link, - &ggtt->base.inactive_list); + i915_gem_object_bump_inactive_ggtt(obj); return 0; } @@ -4106,9 +3303,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { - struct drm_device *dev = obj->base.dev; - struct i915_vma *vma, *next; - bool bound = false; + struct i915_vma *vma; int ret = 0; if (obj->cache_level == cache_level) @@ -4119,21 +3314,28 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * catch the issue of the CS prefetch crossing page boundaries and * reading an invalid PTE on older architectures. */ - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { +restart: + list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; - if (vma->pin_count) { + if (i915_vma_is_pinned(vma)) { DRM_DEBUG("can not change the cache level of pinned objects\n"); return -EBUSY; } - if (!i915_gem_valid_gtt_space(vma, cache_level)) { - ret = i915_vma_unbind(vma); - if (ret) - return ret; - } else - bound = true; + if (i915_gem_valid_gtt_space(vma, cache_level)) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + + /* As unbinding may affect other elements in the + * obj->vma_list (due to side-effects from retiring + * an active vma), play safe and restart the iterator. + */ + goto restart; } /* We can reuse the existing drm_mm nodes but need to change the @@ -4143,7 +3345,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * rewrite the PTE in the belief that doing so tramples upon less * state and so involves less work. */ - if (bound) { + if (obj->bind_count) { /* Before we change the PTE, the GPU must not be accessing it. * If we wait upon the object, we know that all the bound * VMA are no longer active. @@ -4152,7 +3354,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, if (ret) return ret; - if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { + if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) { /* Access to snoopable pages through the GTT is * incoherent and on some machines causes a hard * lockup. Relinquish the CPU mmaping to force @@ -4169,9 +3371,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * dropped the fence as all snoopable access is * supposed to be linear. */ - ret = i915_gem_object_put_fence(obj); - if (ret) - return ret; + list_for_each_entry(vma, &obj->vma_list, obj_link) { + ret = i915_vma_put_fence(vma); + if (ret) + return ret; + } } else { /* We either have incoherent backing store and * so no GTT access or the architecture is fully @@ -4215,8 +3419,8 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_caching *args = data; struct drm_i915_gem_object *obj; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; switch (obj->cache_level) { @@ -4234,7 +3438,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, break; } - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return 0; } @@ -4276,15 +3480,15 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (ret) goto rpm_put; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } ret = i915_gem_object_set_cache_level(obj, level); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); rpm_put: @@ -4298,11 +3502,12 @@ rpm_put: * Can be called from an uninterruptible phase (modesetting) and allows * any flushes to be pipelined (for pageflips). */ -int +struct i915_vma * i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, const struct i915_ggtt_view *view) { + struct i915_vma *vma; u32 old_read_domains, old_write_domain; int ret; @@ -4322,19 +3527,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, */ ret = i915_gem_object_set_cache_level(obj, HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); - if (ret) + if (ret) { + vma = ERR_PTR(ret); goto err_unpin_display; + } /* As the user may map the buffer once pinned in the display plane * (e.g. libkms for the bootup splash), we have to ensure that we - * always use map_and_fenceable for all scanout buffers. + * always use map_and_fenceable for all scanout buffers. However, + * it may simply be too big to fit into mappable, in which case + * put it anyway and hope that userspace can cope (but always first + * try to preserve the existing ABI). */ - ret = i915_gem_object_ggtt_pin(obj, view, alignment, - view->type == I915_GGTT_VIEW_NORMAL ? - PIN_MAPPABLE : 0); - if (ret) + vma = ERR_PTR(-ENOSPC); + if (view->type == I915_GGTT_VIEW_NORMAL) + vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, + PIN_MAPPABLE | PIN_NONBLOCK); + if (IS_ERR(vma)) + vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0); + if (IS_ERR(vma)) goto err_unpin_display; + vma->display_alignment = max_t(u64, vma->display_alignment, alignment); + + WARN_ON(obj->pin_display > i915_vma_pin_count(vma)); + i915_gem_object_flush_cpu_write_domain(obj); old_write_domain = obj->base.write_domain; @@ -4350,23 +3567,28 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, old_read_domains, old_write_domain); - return 0; + return vma; err_unpin_display: obj->pin_display--; - return ret; + return vma; } void -i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view) +i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) { - if (WARN_ON(obj->pin_display == 0)) + if (WARN_ON(vma->obj->pin_display == 0)) return; - i915_gem_object_ggtt_unpin_view(obj, view); + if (--vma->obj->pin_display == 0) + vma->display_alignment = 0; - obj->pin_display--; + /* Bump the LRU to try and avoid premature eviction whilst flipping */ + if (!i915_vma_is_active(vma)) + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + + i915_vma_unpin(vma); + WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma)); } /** @@ -4383,13 +3605,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) uint32_t old_write_domain, old_read_domains; int ret; - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) - return 0; - ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return 0; + i915_gem_object_flush_gtt_write_domain(obj); old_write_domain = obj->base.write_domain; @@ -4464,28 +3686,31 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) target = request; } if (target) - i915_gem_request_reference(target); + i915_gem_request_get(target); spin_unlock(&file_priv->mm.lock); if (target == NULL) return 0; - ret = __i915_wait_request(target, true, NULL, NULL); - i915_gem_request_unreference(target); + ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL); + i915_gem_request_put(target); return ret; } static bool -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - struct drm_i915_gem_object *obj = vma->obj; + if (!drm_mm_node_allocated(&vma->node)) + return false; - if (alignment && - vma->node.start & (alignment - 1)) + if (vma->node.size < size) return true; - if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) + if (alignment && vma->node.start & (alignment - 1)) + return true; + + if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) return true; if (flags & PIN_OFFSET_BIAS && @@ -4502,135 +3727,208 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); bool mappable, fenceable; u32 fence_size, fence_alignment; - fence_size = i915_gem_get_gtt_size(obj->base.dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, - obj->base.size, - obj->tiling_mode, - true); + fence_size = i915_gem_get_ggtt_size(dev_priv, + vma->size, + i915_gem_object_get_tiling(obj)); + fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, + vma->size, + i915_gem_object_get_tiling(obj), + true); fenceable = (vma->node.size == fence_size && (vma->node.start & (fence_alignment - 1)) == 0); mappable = (vma->node.start + fence_size <= - to_i915(obj->base.dev)->ggtt.mappable_end); + dev_priv->ggtt.mappable_end); - obj->map_and_fenceable = mappable && fenceable; + if (mappable && fenceable) + vma->flags |= I915_VMA_CAN_FENCE; + else + vma->flags &= ~I915_VMA_CAN_FENCE; } -static int -i915_gem_object_do_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - uint32_t alignment, - uint64_t flags) +int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_vma *vma; - unsigned bound; + unsigned int bound = vma->flags; int ret; - if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) - return -ENODEV; + GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); + GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); - if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) - return -EINVAL; + if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { + ret = -EBUSY; + goto err; + } - if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) - return -EINVAL; + if ((bound & I915_VMA_BIND_MASK) == 0) { + ret = i915_vma_insert(vma, size, alignment, flags); + if (ret) + goto err; + } - if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) - return -EINVAL; + ret = i915_vma_bind(vma, vma->obj->cache_level, flags); + if (ret) + goto err; - vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : - i915_gem_obj_to_vma(obj, vm); + if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) + __i915_vma_set_map_and_fenceable(vma); - if (vma) { - if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) - return -EBUSY; + GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); + return 0; - if (i915_vma_misplaced(vma, alignment, flags)) { - WARN(vma->pin_count, - "bo is already pinned in %s with incorrect alignment:" - " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," - " obj->map_and_fenceable=%d\n", - ggtt_view ? "ggtt" : "ppgtt", - upper_32_bits(vma->node.start), - lower_32_bits(vma->node.start), - alignment, - !!(flags & PIN_MAPPABLE), - obj->map_and_fenceable); - ret = i915_vma_unbind(vma); - if (ret) - return ret; +err: + __i915_vma_unpin(vma); + return ret; +} - vma = NULL; - } - } +struct i915_vma * +i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, + const struct i915_ggtt_view *view, + u64 size, + u64 alignment, + u64 flags) +{ + struct i915_address_space *vm = &to_i915(obj->base.dev)->ggtt.base; + struct i915_vma *vma; + int ret; - bound = vma ? vma->bound : 0; - if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, - flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); - } else { - ret = i915_vma_bind(vma, obj->cache_level, flags); + vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view); + if (IS_ERR(vma)) + return vma; + + if (i915_vma_misplaced(vma, size, alignment, flags)) { + if (flags & PIN_NONBLOCK && + (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) + return ERR_PTR(-ENOSPC); + + WARN(i915_vma_is_pinned(vma), + "bo is already pinned in ggtt with incorrect alignment:" + " offset=%08x, req.alignment=%llx," + " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", + i915_ggtt_offset(vma), alignment, + !!(flags & PIN_MAPPABLE), + i915_vma_is_map_and_fenceable(vma)); + ret = i915_vma_unbind(vma); if (ret) - return ret; + return ERR_PTR(ret); } - if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && - (bound ^ vma->bound) & GLOBAL_BIND) { - __i915_vma_set_map_and_fenceable(vma); - WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); - } + ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + if (ret) + return ERR_PTR(ret); - vma->pin_count++; - return 0; + return vma; } -int -i915_gem_object_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - uint32_t alignment, - uint64_t flags) +static __always_inline unsigned int __busy_read_flag(unsigned int id) { - return i915_gem_object_do_pin(obj, vm, - i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, - alignment, flags); + /* Note that we could alias engines in the execbuf API, but + * that would be very unwise as it prevents userspace from + * fine control over engine selection. Ahem. + * + * This should be something like EXEC_MAX_ENGINE instead of + * I915_NUM_ENGINES. + */ + BUILD_BUG_ON(I915_NUM_ENGINES > 16); + return 0x10000 << id; } -int -i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view, - uint32_t alignment, - uint64_t flags) +static __always_inline unsigned int __busy_write_id(unsigned int id) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; + /* The uABI guarantees an active writer is also amongst the read + * engines. This would be true if we accessed the activity tracking + * under the lock, but as we perform the lookup of the object and + * its activity locklessly we can not guarantee that the last_write + * being active implies that we have set the same engine flag from + * last_read - hence we always set both read and write busy for + * last_write. + */ + return id | __busy_read_flag(id); +} - BUG_ON(!view); +static __always_inline unsigned int +__busy_set_if_active(const struct i915_gem_active *active, + unsigned int (*flag)(unsigned int id)) +{ + struct drm_i915_gem_request *request; + + request = rcu_dereference(active->request); + if (!request || i915_gem_request_completed(request)) + return 0; - return i915_gem_object_do_pin(obj, &ggtt->base, view, - alignment, flags | PIN_GLOBAL); + /* This is racy. See __i915_gem_active_get_rcu() for an in detail + * discussion of how to handle the race correctly, but for reporting + * the busy state we err on the side of potentially reporting the + * wrong engine as being busy (but we guarantee that the result + * is at least self-consistent). + * + * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated + * whilst we are inspecting it, even under the RCU read lock as we are. + * This means that there is a small window for the engine and/or the + * seqno to have been overwritten. The seqno will always be in the + * future compared to the intended, and so we know that if that + * seqno is idle (on whatever engine) our request is idle and the + * return 0 above is correct. + * + * The issue is that if the engine is switched, it is just as likely + * to report that it is busy (but since the switch happened, we know + * the request should be idle). So there is a small chance that a busy + * result is actually the wrong engine. + * + * So why don't we care? + * + * For starters, the busy ioctl is a heuristic that is by definition + * racy. Even with perfect serialisation in the driver, the hardware + * state is constantly advancing - the state we report to the user + * is stale. + * + * The critical information for the busy-ioctl is whether the object + * is idle as userspace relies on that to detect whether its next + * access will stall, or if it has missed submitting commands to + * the hardware allowing the GPU to stall. We never generate a + * false-positive for idleness, thus busy-ioctl is reliable at the + * most fundamental level, and we maintain the guarantee that a + * busy object left to itself will eventually become idle (and stay + * idle!). + * + * We allow ourselves the leeway of potentially misreporting the busy + * state because that is an optimisation heuristic that is constantly + * in flux. Being quickly able to detect the busy/idle state is much + * more important than accurate logging of exactly which engines were + * busy. + * + * For accuracy in reporting the engine, we could use + * + * result = 0; + * request = __i915_gem_active_get_rcu(active); + * if (request) { + * if (!i915_gem_request_completed(request)) + * result = flag(request->engine->exec_id); + * i915_gem_request_put(request); + * } + * + * but that still remains susceptible to both hardware and userspace + * races. So we accept making the result of that race slightly worse, + * given the rarity of the race and its low impact on the result. + */ + return flag(READ_ONCE(request->engine->exec_id)); } -void -i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view) +static __always_inline unsigned int +busy_check_reader(const struct i915_gem_active *active) { - struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); - - WARN_ON(vma->pin_count == 0); - WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); + return __busy_set_if_active(active, __busy_read_flag); +} - --vma->pin_count; +static __always_inline unsigned int +busy_check_writer(const struct i915_gem_active *active) +{ + return __busy_set_if_active(active, __busy_write_id); } int @@ -4639,47 +3937,64 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - int ret; + unsigned long active; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + args->busy = 0; + active = __I915_BO_ACTIVE(obj); + if (active) { + int idx; - /* Count all active objects as busy, even if they are currently not used - * by the gpu. Users of this interface expect objects to eventually - * become non-busy without any further actions, therefore emit any - * necessary flushes here. - */ - ret = i915_gem_object_flush_active(obj); - if (ret) - goto unref; + /* Yes, the lookups are intentionally racy. + * + * First, we cannot simply rely on __I915_BO_ACTIVE. We have + * to regard the value as stale and as our ABI guarantees + * forward progress, we confirm the status of each active + * request with the hardware. + * + * Even though we guard the pointer lookup by RCU, that only + * guarantees that the pointer and its contents remain + * dereferencable and does *not* mean that the request we + * have is the same as the one being tracked by the object. + * + * Consider that we lookup the request just as it is being + * retired and freed. We take a local copy of the pointer, + * but before we add its engine into the busy set, the other + * thread reallocates it and assigns it to a task on another + * engine with a fresh and incomplete seqno. Guarding against + * that requires careful serialisation and reference counting, + * i.e. using __i915_gem_active_get_request_rcu(). We don't, + * instead we expect that if the result is busy, which engines + * are busy is not completely reliable - we only guarantee + * that the object was busy. + */ + rcu_read_lock(); - args->busy = 0; - if (obj->active) { - int i; + for_each_active(active, idx) + args->busy |= busy_check_reader(&obj->last_read[idx]); - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; + /* For ABI sanity, we only care that the write engine is in + * the set of read engines. This should be ensured by the + * ordering of setting last_read/last_write in + * i915_vma_move_to_active(), and then in reverse in retire. + * However, for good measure, we always report the last_write + * request as a busy read as well as being a busy write. + * + * We don't care that the set of active read/write engines + * may change during construction of the result, as it is + * equally liable to change before userspace can inspect + * the result. + */ + args->busy |= busy_check_writer(&obj->last_write); - req = obj->last_read_req[i]; - if (req) - args->busy |= 1 << (16 + req->engine->exec_id); - } - if (obj->last_write_req) - args->busy |= obj->last_write_req->engine->exec_id; + rcu_read_unlock(); } -unref: - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; + i915_gem_object_put_unlocked(obj); + return 0; } int @@ -4710,19 +4025,14 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file_priv, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } - if (i915_gem_obj_is_pinned(obj)) { - ret = -EINVAL; - goto out; - } - if (obj->pages && - obj->tiling_mode != I915_TILING_NONE && + i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (obj->madv == I915_MADV_WILLNEED) i915_gem_object_unpin_pages(obj); @@ -4739,8 +4049,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, args->retained = obj->madv != __I915_MADV_PURGED; -out: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); return ret; @@ -4753,14 +4062,17 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->global_list); for (i = 0; i < I915_NUM_ENGINES; i++) - INIT_LIST_HEAD(&obj->engine_list[i]); + init_request_active(&obj->last_read[i], + i915_gem_object_retire__read); + init_request_active(&obj->last_write, + i915_gem_object_retire__write); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->batch_pool_link); obj->ops = ops; - obj->fence_reg = I915_FENCE_REG_NONE; + obj->frontbuffer_ggtt_origin = ORIGIN_GTT; obj->madv = I915_MADV_WILLNEED; i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); @@ -4865,33 +4177,31 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) trace_i915_gem_object_destroy(obj); + /* All file-owned VMA should have been released by this point through + * i915_gem_close_object(), or earlier by i915_gem_context_close(). + * However, the object may also be bound into the global GTT (e.g. + * older GPUs without per-process support, or for direct access through + * the GTT either for the user or for scanout). Those VMA still need to + * unbound now. + */ list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { - int ret; - - vma->pin_count = 0; - ret = i915_vma_unbind(vma); - if (WARN_ON(ret == -ERESTARTSYS)) { - bool was_interruptible; - - was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; - - WARN_ON(i915_vma_unbind(vma)); - - dev_priv->mm.interruptible = was_interruptible; - } + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(i915_vma_is_active(vma)); + vma->flags &= ~I915_VMA_PIN_MASK; + i915_vma_close(vma); } + GEM_BUG_ON(obj->bind_count); /* Stolen objects don't hold a ref, but do hold pin count. Fix that up * before progressing. */ if (obj->stolen) i915_gem_object_unpin_pages(obj); - WARN_ON(obj->frontbuffer_bits); + WARN_ON(atomic_read(&obj->frontbuffer_bits)); if (obj->pages && obj->madv == I915_MADV_WILLNEED && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && - obj->tiling_mode != I915_TILING_NONE) + i915_gem_object_is_tiled(obj)) i915_gem_object_unpin_pages(obj); if (WARN_ON(obj->pages_pin_count)) @@ -4899,7 +4209,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (discard_backing_storage(obj)) obj->madv = I915_MADV_DONTNEED; i915_gem_object_put_pages(obj); - i915_gem_object_free_mmap_offset(obj); BUG_ON(obj->pages); @@ -4918,71 +4227,35 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) intel_runtime_pm_put(dev_priv); } -struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && - vma->vm == vm) - return vma; - } - return NULL; -} - -struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view) -{ - struct i915_vma *vma; - - GEM_BUG_ON(!view); - - list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) - return vma; - return NULL; -} - -void i915_gem_vma_destroy(struct i915_vma *vma) -{ - WARN_ON(vma->node.allocated); - - /* Keep the vma as a placeholder in the execbuffer reservation lists */ - if (!list_empty(&vma->exec_list)) - return; - - if (!vma->is_ggtt) - i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); - - list_del(&vma->obj_link); - - kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); -} - -static void -i915_gem_stop_engines(struct drm_device *dev) +int i915_gem_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine; + int ret; - for_each_engine(engine, dev_priv) - dev_priv->gt.stop_engine(engine); -} - -int -i915_gem_suspend(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int ret = 0; + intel_suspend_gt_powersave(dev_priv); mutex_lock(&dev->struct_mutex); - ret = i915_gem_wait_for_idle(dev_priv); + + /* We have to flush all the executing contexts to main memory so + * that they can saved in the hibernation image. To ensure the last + * context image is coherent, we have to switch away from it. That + * leaves the dev_priv->kernel_context still active when + * we actually suspend, and its image in memory may not match the GPU + * state. Fortunately, the kernel_context is disposable and we do + * not rely on its state. + */ + ret = i915_gem_switch_to_kernel_context(dev_priv); + if (ret) + goto err; + + ret = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); if (ret) goto err; i915_gem_retire_requests(dev_priv); - i915_gem_stop_engines(dev); i915_gem_context_lost(dev_priv); mutex_unlock(&dev->struct_mutex); @@ -5002,6 +4275,22 @@ err: return ret; } +void i915_gem_resume(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + mutex_lock(&dev->struct_mutex); + i915_gem_restore_gtt_mappings(dev); + + /* As we didn't flush the kernel context before suspend, we cannot + * guarantee that the context image is complete. So let's just reset + * it and start again. + */ + dev_priv->gt.resume(dev_priv); + + mutex_unlock(&dev->struct_mutex); +} + void i915_gem_init_swizzling(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -5054,53 +4343,6 @@ static void init_unused_rings(struct drm_device *dev) } } -int i915_gem_init_engines(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - ret = intel_init_render_ring_buffer(dev); - if (ret) - return ret; - - if (HAS_BSD(dev)) { - ret = intel_init_bsd_ring_buffer(dev); - if (ret) - goto cleanup_render_ring; - } - - if (HAS_BLT(dev)) { - ret = intel_init_blt_ring_buffer(dev); - if (ret) - goto cleanup_bsd_ring; - } - - if (HAS_VEBOX(dev)) { - ret = intel_init_vebox_ring_buffer(dev); - if (ret) - goto cleanup_blt_ring; - } - - if (HAS_BSD2(dev)) { - ret = intel_init_bsd2_ring_buffer(dev); - if (ret) - goto cleanup_vebox_ring; - } - - return 0; - -cleanup_vebox_ring: - intel_cleanup_engine(&dev_priv->engine[VECS]); -cleanup_blt_ring: - intel_cleanup_engine(&dev_priv->engine[BCS]); -cleanup_bsd_ring: - intel_cleanup_engine(&dev_priv->engine[VCS]); -cleanup_render_ring: - intel_cleanup_engine(&dev_priv->engine[RCS]); - - return ret; -} - int i915_gem_init_hw(struct drm_device *dev) { @@ -5167,6 +4409,27 @@ out: return ret; } +bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) +{ + if (INTEL_INFO(dev_priv)->gen < 6) + return false; + + /* TODO: make semaphores and Execlists play nicely together */ + if (i915.enable_execlists) + return false; + + if (value >= 0) + return value; + +#ifdef CONFIG_INTEL_IOMMU + /* Enable semaphores on SNB when IO remapping is off */ + if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped) + return false; +#endif + + return true; +} + int i915_gem_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -5175,15 +4438,11 @@ int i915_gem_init(struct drm_device *dev) mutex_lock(&dev->struct_mutex); if (!i915.enable_execlists) { - dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; - dev_priv->gt.init_engines = i915_gem_init_engines; - dev_priv->gt.cleanup_engine = intel_cleanup_engine; - dev_priv->gt.stop_engine = intel_stop_engine; + dev_priv->gt.resume = intel_legacy_submission_resume; + dev_priv->gt.cleanup_engine = intel_engine_cleanup; } else { - dev_priv->gt.execbuf_submit = intel_execlists_submission; - dev_priv->gt.init_engines = intel_logical_rings_init; + dev_priv->gt.resume = intel_lr_context_resume; dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; - dev_priv->gt.stop_engine = intel_logical_ring_stop; } /* This is just a security blanket to placate dragons. @@ -5195,24 +4454,27 @@ int i915_gem_init(struct drm_device *dev) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); i915_gem_init_userptr(dev_priv); - i915_gem_init_ggtt(dev); + + ret = i915_gem_init_ggtt(dev_priv); + if (ret) + goto out_unlock; ret = i915_gem_context_init(dev); if (ret) goto out_unlock; - ret = dev_priv->gt.init_engines(dev); + ret = intel_engines_init(dev); if (ret) goto out_unlock; ret = i915_gem_init_hw(dev); if (ret == -EIO) { - /* Allow ring initialisation to fail by marking the GPU as + /* Allow engine initialisation to fail by marking the GPU as * wedged. But we only want to do this where the GPU is angry, * for all other failure, such as an allocation failure, bail. */ DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); - atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); + i915_gem_set_wedged(dev_priv); ret = 0; } @@ -5236,7 +4498,6 @@ i915_gem_cleanup_engines(struct drm_device *dev) static void init_engine_lists(struct intel_engine_cs *engine) { - INIT_LIST_HEAD(&engine->active_list); INIT_LIST_HEAD(&engine->request_list); } @@ -5244,6 +4505,7 @@ void i915_gem_load_init_fences(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; + int i; if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) @@ -5259,6 +4521,13 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv) I915_READ(vgtif_reg(avail_rs.fence_num)); /* Initialize fence registers to zero */ + for (i = 0; i < dev_priv->num_fence_regs; i++) { + struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; + + fence->i915 = dev_priv; + fence->id = i; + list_add_tail(&fence->link, &dev_priv->mm.fence_list); + } i915_gem_restore_fences(dev); i915_gem_detect_bit_6_swizzle(dev); @@ -5283,18 +4552,17 @@ i915_gem_load_init(struct drm_device *dev) dev_priv->requests = kmem_cache_create("i915_gem_request", sizeof(struct drm_i915_gem_request), 0, - SLAB_HWCACHE_ALIGN, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU, NULL); - INIT_LIST_HEAD(&dev_priv->vm_list); INIT_LIST_HEAD(&dev_priv->context_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); for (i = 0; i < I915_NUM_ENGINES; i++) init_engine_lists(&dev_priv->engine[i]); - for (i = 0; i < I915_MAX_NUM_FENCES; i++) - INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); INIT_DELAYED_WORK(&dev_priv->gt.retire_work, i915_gem_retire_work_handler); INIT_DELAYED_WORK(&dev_priv->gt.idle_work, @@ -5304,13 +4572,13 @@ i915_gem_load_init(struct drm_device *dev) dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; - INIT_LIST_HEAD(&dev_priv->mm.fence_list); - init_waitqueue_head(&dev_priv->pending_flip_queue); dev_priv->mm.interruptible = true; - mutex_init(&dev_priv->fb_tracking.lock); + atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); + + spin_lock_init(&dev_priv->fb_tracking.lock); } void i915_gem_load_cleanup(struct drm_device *dev) @@ -5320,11 +4588,32 @@ void i915_gem_load_cleanup(struct drm_device *dev) kmem_cache_destroy(dev_priv->requests); kmem_cache_destroy(dev_priv->vmas); kmem_cache_destroy(dev_priv->objects); + + /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ + rcu_barrier(); +} + +int i915_gem_freeze(struct drm_i915_private *dev_priv) +{ + intel_runtime_pm_get(dev_priv); + + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_shrink_all(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + + intel_runtime_pm_put(dev_priv); + + return 0; } int i915_gem_freeze_late(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *obj; + struct list_head *phases[] = { + &dev_priv->mm.unbound_list, + &dev_priv->mm.bound_list, + NULL + }, **p; /* Called just before we write the hibernation image. * @@ -5335,17 +4624,21 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) * * To make sure the hibernation image contains the latest state, * we update that state just before writing out the image. + * + * To try and reduce the hibernation image, we manually shrink + * the objects as well. */ - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; - } + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND); - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; + for (p = phases; *p; p++) { + list_for_each_entry(obj, *p, global_list) { + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + } } + mutex_unlock(&dev_priv->drm.struct_mutex); return 0; } @@ -5353,21 +4646,15 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) void i915_gem_release(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_gem_request *request; /* Clean up our request list when the client is going away, so that * later retire_requests won't dereference our soon-to-be-gone * file_priv. */ spin_lock(&file_priv->mm.lock); - while (!list_empty(&file_priv->mm.request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&file_priv->mm.request_list, - struct drm_i915_gem_request, - client_list); - list_del(&request->client_list); + list_for_each_entry(request, &file_priv->mm.request_list, client_list) request->file_priv = NULL; - } spin_unlock(&file_priv->mm.lock); if (!list_empty(&file_priv->rps.link)) { @@ -5396,7 +4683,7 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file) spin_lock_init(&file_priv->mm.lock); INIT_LIST_HEAD(&file_priv->mm.request_list); - file_priv->bsd_ring = -1; + file_priv->bsd_engine = -1; ret = i915_gem_context_open(dev, file); if (ret) @@ -5418,120 +4705,26 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, struct drm_i915_gem_object *new, unsigned frontbuffer_bits) { + /* Control of individual bits within the mask are guarded by + * the owning plane->mutex, i.e. we can never see concurrent + * manipulation of individual bits. But since the bitfield as a whole + * is updated using RMW, we need to use atomics in order to update + * the bits. + */ + BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > + sizeof(atomic_t) * BITS_PER_BYTE); + if (old) { - WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); - WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); - old->frontbuffer_bits &= ~frontbuffer_bits; + WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); + atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); } if (new) { - WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); - WARN_ON(new->frontbuffer_bits & frontbuffer_bits); - new->frontbuffer_bits |= frontbuffer_bits; + WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); + atomic_or(frontbuffer_bits, &new->frontbuffer_bits); } } -/* All the new VM stuff */ -u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, - struct i915_address_space *vm) -{ - struct drm_i915_private *dev_priv = to_i915(o->base.dev); - struct i915_vma *vma; - - WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); - - list_for_each_entry(vma, &o->vma_list, obj_link) { - if (vma->is_ggtt && - vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) - continue; - if (vma->vm == vm) - return vma->node.start; - } - - WARN(1, "%s vma for this object not found.\n", - i915_is_ggtt(vm) ? "global" : "ppgtt"); - return -1; -} - -u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &o->vma_list, obj_link) - if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) - return vma->node.start; - - WARN(1, "global vma for this object not found. (view=%u)\n", view->type); - return -1; -} - -bool i915_gem_obj_bound(struct drm_i915_gem_object *o, - struct i915_address_space *vm) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &o->vma_list, obj_link) { - if (vma->is_ggtt && - vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) - continue; - if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) - return true; - } - - return false; -} - -bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &o->vma_list, obj_link) - if (vma->is_ggtt && - i915_ggtt_view_equal(&vma->ggtt_view, view) && - drm_mm_node_allocated(&vma->node)) - return true; - - return false; -} - -bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &o->vma_list, obj_link) - if (drm_mm_node_allocated(&vma->node)) - return true; - - return false; -} - -unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o) -{ - struct i915_vma *vma; - - GEM_BUG_ON(list_empty(&o->vma_list)); - - list_for_each_entry(vma, &o->vma_list, obj_link) { - if (vma->is_ggtt && - vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) - return vma->node.size; - } - - return 0; -} - -bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) -{ - struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->pin_count > 0) - return true; - - return false; -} - /* Like i915_gem_object_get_page(), but mark the returned page dirty */ struct page * i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) @@ -5584,6 +4777,6 @@ i915_gem_object_create_from_data(struct drm_device *dev, return obj; fail: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 3752d5daa4b2..ed989596d9a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -41,15 +41,15 @@ /** * i915_gem_batch_pool_init() - initialize a batch buffer pool - * @dev: the drm device + * @engine: the associated request submission engine * @pool: the batch buffer pool */ -void i915_gem_batch_pool_init(struct drm_device *dev, +void i915_gem_batch_pool_init(struct intel_engine_cs *engine, struct i915_gem_batch_pool *pool) { int n; - pool->dev = dev; + pool->engine = engine; for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) INIT_LIST_HEAD(&pool->cache_list[n]); @@ -65,18 +65,17 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool) { int n; - WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); + lockdep_assert_held(&pool->engine->i915->drm.struct_mutex); for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { - while (!list_empty(&pool->cache_list[n])) { - struct drm_i915_gem_object *obj = - list_first_entry(&pool->cache_list[n], - struct drm_i915_gem_object, - batch_pool_link); - - list_del(&obj->batch_pool_link); - drm_gem_object_unreference(&obj->base); - } + struct drm_i915_gem_object *obj, *next; + + list_for_each_entry_safe(obj, next, + &pool->cache_list[n], + batch_pool_link) + i915_gem_object_put(obj); + + INIT_LIST_HEAD(&pool->cache_list[n]); } } @@ -102,7 +101,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, struct list_head *list; int n; - WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); + lockdep_assert_held(&pool->engine->i915->drm.struct_mutex); /* Compute a power-of-two bucket, but throw everything greater than * 16KiB into the same bucket: i.e. the the buckets hold objects of @@ -115,13 +114,14 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, list_for_each_entry_safe(tmp, next, list, batch_pool_link) { /* The batches are strictly LRU ordered */ - if (tmp->active) + if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id], + &tmp->base.dev->struct_mutex)) break; /* While we're looping, do some clean up */ if (tmp->madv == __I915_MADV_PURGED) { list_del(&tmp->batch_pool_link); - drm_gem_object_unreference(&tmp->base); + i915_gem_object_put(tmp); continue; } @@ -134,7 +134,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, if (obj == NULL) { int ret; - obj = i915_gem_object_create(pool->dev, size); + obj = i915_gem_object_create(&pool->engine->i915->drm, size); if (IS_ERR(obj)) return obj; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h index 848e90703eed..10d5ac4c00d3 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h @@ -27,13 +27,15 @@ #include "i915_drv.h" +struct intel_engine_cs; + struct i915_gem_batch_pool { - struct drm_device *dev; + struct intel_engine_cs *engine; struct list_head cache_list[4]; }; /* i915_gem_batch_pool.c */ -void i915_gem_batch_pool_init(struct drm_device *dev, +void i915_gem_batch_pool_init(struct intel_engine_cs *engine, struct i915_gem_batch_pool *pool); void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); struct drm_i915_gem_object* diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3c97f0e7a003..df10f4e95736 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -134,21 +134,6 @@ static int get_context_size(struct drm_i915_private *dev_priv) return ret; } -static void i915_gem_context_clean(struct i915_gem_context *ctx) -{ - struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; - struct i915_vma *vma, *next; - - if (!ppgtt) - return; - - list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list, - vm_link) { - if (WARN_ON(__i915_vma_unbind_no_wait(vma))) - break; - } -} - void i915_gem_context_free(struct kref *ctx_ref) { struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); @@ -156,13 +141,7 @@ void i915_gem_context_free(struct kref *ctx_ref) lockdep_assert_held(&ctx->i915->drm.struct_mutex); trace_i915_context_free(ctx); - - /* - * This context is going away and we need to remove all VMAs still - * around. This is to handle imported shared objects for which - * destructor did not run when their handles were closed. - */ - i915_gem_context_clean(ctx); + GEM_BUG_ON(!ctx->closed); i915_ppgtt_put(ctx->ppgtt); @@ -173,12 +152,13 @@ void i915_gem_context_free(struct kref *ctx_ref) continue; WARN_ON(ce->pin_count); - if (ce->ringbuf) - intel_ringbuffer_free(ce->ringbuf); + if (ce->ring) + intel_ring_free(ce->ring); - drm_gem_object_unreference(&ce->state->base); + i915_vma_put(ce->state); } + put_pid(ctx->pid); list_del(&ctx->link); ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id); @@ -216,7 +196,7 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); /* Failure shouldn't ever happen this early */ if (WARN_ON(ret)) { - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return ERR_PTR(ret); } } @@ -224,6 +204,37 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) return obj; } +static void i915_ppgtt_close(struct i915_address_space *vm) +{ + struct list_head *phases[] = { + &vm->active_list, + &vm->inactive_list, + &vm->unbound_list, + NULL, + }, **phase; + + GEM_BUG_ON(vm->closed); + vm->closed = true; + + for (phase = phases; *phase; phase++) { + struct i915_vma *vma, *vn; + + list_for_each_entry_safe(vma, vn, *phase, vm_link) + if (!i915_vma_is_closed(vma)) + i915_vma_close(vma); + } +} + +static void context_close(struct i915_gem_context *ctx) +{ + GEM_BUG_ON(ctx->closed); + ctx->closed = true; + if (ctx->ppgtt) + i915_ppgtt_close(&ctx->ppgtt->base); + ctx->file_priv = ERR_PTR(-EBADF); + i915_gem_context_put(ctx); +} + static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) { int ret; @@ -271,13 +282,24 @@ __create_hw_context(struct drm_device *dev, ctx->ggtt_alignment = get_context_alignment(dev_priv); if (dev_priv->hw_context_size) { - struct drm_i915_gem_object *obj = - i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size); + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_alloc_context_obj(dev, + dev_priv->hw_context_size); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_out; } - ctx->engine[RCS].state = obj; + + vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + ret = PTR_ERR(vma); + goto err_out; + } + + ctx->engine[RCS].state = vma; } /* Default context will never have a file_priv */ @@ -290,6 +312,9 @@ __create_hw_context(struct drm_device *dev, ret = DEFAULT_CONTEXT_HANDLE; ctx->file_priv = file_priv; + if (file_priv) + ctx->pid = get_task_pid(current, PIDTYPE_PID); + ctx->user_handle = ret; /* NB: Mark all slices as needing a remap so that when the context first * loads it will restore whatever remap state already exists. If there @@ -305,7 +330,7 @@ __create_hw_context(struct drm_device *dev, return ctx; err_out: - i915_gem_context_unreference(ctx); + context_close(ctx); return ERR_PTR(ret); } @@ -327,13 +352,14 @@ i915_gem_create_context(struct drm_device *dev, return ctx; if (USES_FULL_PPGTT(dev)) { - struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv); + struct i915_hw_ppgtt *ppgtt = + i915_ppgtt_create(to_i915(dev), file_priv); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); idr_remove(&file_priv->context_idr, ctx->user_handle); - i915_gem_context_unreference(ctx); + context_close(ctx); return ERR_CAST(ppgtt); } @@ -388,28 +414,12 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx, struct intel_context *ce = &ctx->engine[engine->id]; if (ce->state) - i915_gem_object_ggtt_unpin(ce->state); + i915_vma_unpin(ce->state); - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); } } -void i915_gem_context_reset(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - lockdep_assert_held(&dev->struct_mutex); - - if (i915.enable_execlists) { - struct i915_gem_context *ctx; - - list_for_each_entry(ctx, &dev_priv->context_list, link) - intel_lr_context_reset(dev_priv, ctx); - } - - i915_gem_context_lost(dev_priv); -} - int i915_gem_context_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -504,7 +514,7 @@ void i915_gem_context_fini(struct drm_device *dev) lockdep_assert_held(&dev->struct_mutex); - i915_gem_context_unreference(dctx); + context_close(dctx); dev_priv->kernel_context = NULL; ida_destroy(&dev_priv->context_hw_ida); @@ -514,8 +524,7 @@ static int context_idr_cleanup(int id, void *p, void *data) { struct i915_gem_context *ctx = p; - ctx->file_priv = ERR_PTR(-EBADF); - i915_gem_context_unreference(ctx); + context_close(ctx); return 0; } @@ -552,12 +561,13 @@ static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { struct drm_i915_private *dev_priv = req->i915; + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; u32 flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ - i915_semaphore_is_enabled(dev_priv) ? - hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 : + i915.semaphores ? + INTEL_INFO(dev_priv)->num_rings - 1 : 0; int len, ret; @@ -567,7 +577,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * itlb_before_ctx_switch. */ if (IS_GEN6(dev_priv)) { - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, 0); + ret = engine->emit_flush(req, EMIT_INVALIDATE); if (ret) return ret; } @@ -589,64 +599,64 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (INTEL_GEN(dev_priv) >= 7) { - intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_DISABLE); + intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); if (num_rings) { struct intel_engine_cs *signaller; - intel_ring_emit(engine, + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); for_each_engine(signaller, dev_priv) { if (signaller == engine) continue; - intel_ring_emit_reg(engine, + intel_ring_emit_reg(ring, RING_PSMI_CTL(signaller->mmio_base)); - intel_ring_emit(engine, + intel_ring_emit(ring, _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); } } } - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_SET_CONTEXT); - intel_ring_emit(engine, - i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) | - flags); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_SET_CONTEXT); + intel_ring_emit(ring, + i915_ggtt_offset(req->ctx->engine[RCS].state) | flags); /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv */ - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); if (INTEL_GEN(dev_priv) >= 7) { if (num_rings) { struct intel_engine_cs *signaller; i915_reg_t last_reg = {}; /* keep gcc quiet */ - intel_ring_emit(engine, + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); for_each_engine(signaller, dev_priv) { if (signaller == engine) continue; last_reg = RING_PSMI_CTL(signaller->mmio_base); - intel_ring_emit_reg(engine, last_reg); - intel_ring_emit(engine, + intel_ring_emit_reg(ring, last_reg); + intel_ring_emit(ring, _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); } /* Insert a delay before the next switch! */ - intel_ring_emit(engine, + intel_ring_emit(ring, MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(engine, last_reg); - intel_ring_emit(engine, engine->scratch.gtt_offset); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit_reg(ring, last_reg); + intel_ring_emit(ring, + i915_ggtt_offset(engine->scratch)); + intel_ring_emit(ring, MI_NOOP); } - intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_ENABLE); + intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); } - intel_ring_advance(engine); + intel_ring_advance(ring); return ret; } @@ -654,7 +664,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) static int remap_l3(struct drm_i915_gem_request *req, int slice) { u32 *remap_info = req->i915->l3_parity.remap_info[slice]; - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int i, ret; if (!remap_info) @@ -669,13 +679,13 @@ static int remap_l3(struct drm_i915_gem_request *req, int slice) * here because no other code should access these registers other than * at initialization time. */ - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { - intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); - intel_ring_emit(engine, remap_info[i]); + intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); + intel_ring_emit(ring, remap_info[i]); } - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -744,6 +754,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) struct i915_gem_context *to = req->ctx; struct intel_engine_cs *engine = req->engine; struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; + struct i915_vma *vma = to->engine[RCS].state; struct i915_gem_context *from; u32 hw_flags; int ret, i; @@ -751,10 +762,15 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (skip_rcs_switch(ppgtt, engine, to)) return 0; + /* Clear this page out of any CPU caches for coherent swap-in/out. */ + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); + if (ret) + return ret; + } + /* Trying to pin first makes error handling easier. */ - ret = i915_gem_obj_ggtt_pin(to->engine[RCS].state, - to->ggtt_alignment, - 0); + ret = i915_vma_pin(vma, 0, to->ggtt_alignment, PIN_GLOBAL); if (ret) return ret; @@ -767,18 +783,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) */ from = engine->last_context; - /* - * Clear this page out of any CPU caches for coherent swap-in/out. Note - * that thanks to write = false in this call and us not setting any gpu - * write domains when putting a context object onto the active list - * (when switching away from it), this won't block. - * - * XXX: We need a real interface to do this instead of trickery. - */ - ret = i915_gem_object_set_to_gtt_domain(to->engine[RCS].state, false); - if (ret) - goto unpin_out; - if (needs_pd_load_pre(ppgtt, engine, to)) { /* Older GENs and non render rings still want the load first, * "PP_DCLV followed by PP_DIR_BASE register through Load @@ -787,7 +791,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) trace_switch_mm(engine, to); ret = ppgtt->switch_mm(ppgtt, req); if (ret) - goto unpin_out; + goto err; } if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) @@ -804,7 +808,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (to != from || (hw_flags & MI_FORCE_RESTORE)) { ret = mi_set_context(req, hw_flags); if (ret) - goto unpin_out; + goto err; } /* The backing object for the context is done after switching to the @@ -814,8 +818,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) * MI_SET_CONTEXT instead of when the next seqno has completed. */ if (from != NULL) { - from->engine[RCS].state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; - i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->engine[RCS].state), req); /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the * whole damn pipeline, we don't need to explicitly mark the * object dirty. The only exception is that the context must be @@ -823,14 +825,12 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) * able to defer doing this until we know the object would be * swapped, but there is no way to do that yet. */ - from->engine[RCS].state->dirty = 1; - - /* obj is kept alive until the next request by its active ref */ - i915_gem_object_ggtt_unpin(from->engine[RCS].state); - i915_gem_context_unreference(from); + i915_vma_move_to_active(from->engine[RCS].state, req, 0); + /* state is kept alive until the next request */ + i915_vma_unpin(from->engine[RCS].state); + i915_gem_context_put(from); } - i915_gem_context_reference(to); - engine->last_context = to; + engine->last_context = i915_gem_context_get(to); /* GEN8 does *not* require an explicit reload if the PDPs have been * setup, and we do not wish to move them. @@ -872,8 +872,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) return 0; -unpin_out: - i915_gem_object_ggtt_unpin(to->engine[RCS].state); +err: + i915_vma_unpin(vma); return ret; } @@ -894,8 +894,9 @@ int i915_switch_context(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; - WARN_ON(i915.enable_execlists); lockdep_assert_held(&req->i915->drm.struct_mutex); + if (i915.enable_execlists) + return 0; if (!req->ctx->engine[engine->id].state) { struct i915_gem_context *to = req->ctx; @@ -914,10 +915,9 @@ int i915_switch_context(struct drm_i915_gem_request *req) } if (to != engine->last_context) { - i915_gem_context_reference(to); if (engine->last_context) - i915_gem_context_unreference(engine->last_context); - engine->last_context = to; + i915_gem_context_put(engine->last_context); + engine->last_context = i915_gem_context_get(to); } return 0; @@ -926,6 +926,33 @@ int i915_switch_context(struct drm_i915_gem_request *req) return do_rcs_switch(req); } +int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + + for_each_engine(engine, dev_priv) { + struct drm_i915_gem_request *req; + int ret; + + if (engine->last_context == NULL) + continue; + + if (engine->last_context == dev_priv->kernel_context) + continue; + + req = i915_gem_request_alloc(engine, dev_priv->kernel_context); + if (IS_ERR(req)) + return PTR_ERR(req); + + ret = i915_switch_context(req); + i915_add_request_no_flush(req); + if (ret) + return ret; + } + + return 0; +} + static bool contexts_enabled(struct drm_device *dev) { return i915.enable_execlists || to_i915(dev)->hw_context_size; @@ -985,7 +1012,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, } idr_remove(&file_priv->context_idr, ctx->user_handle); - i915_gem_context_unreference(ctx); + context_close(ctx); mutex_unlock(&dev->struct_mutex); DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id); diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c deleted file mode 100644 index a56516482394..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_debug.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright © 2008 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Keith Packard <keithp@keithp.com> - * - */ - -#include <drm/drmP.h> -#include <drm/i915_drm.h> -#include "i915_drv.h" - -#if WATCH_LISTS -int -i915_verify_lists(struct drm_device *dev) -{ - static int warned; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; - int err = 0; - - if (warned) - return 0; - - for_each_engine(engine, dev_priv) { - list_for_each_entry(obj, &engine->active_list, - engine_list[engine->id]) { - if (obj->base.dev != dev || - !atomic_read(&obj->base.refcount.refcount)) { - DRM_ERROR("%s: freed active obj %p\n", - engine->name, obj); - err++; - break; - } else if (!obj->active || - obj->last_read_req[engine->id] == NULL) { - DRM_ERROR("%s: invalid active obj %p\n", - engine->name, obj); - err++; - } else if (obj->base.write_domain) { - DRM_ERROR("%s: invalid write obj %p (w %x)\n", - engine->name, - obj, obj->base.write_domain); - err++; - } - } - } - - return warned = err; -} -#endif /* WATCH_LIST */ diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 80bbe43a2e92..97c9d68b45df 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -23,9 +23,13 @@ * Authors: * Dave Airlie <airlied@redhat.com> */ + +#include <linux/dma-buf.h> +#include <linux/reservation.h> + #include <drm/drmP.h> + #include "i915_drv.h" -#include <linux/dma-buf.h> static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) { @@ -115,7 +119,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) if (ret) return ERR_PTR(ret); - addr = i915_gem_object_pin_map(obj); + addr = i915_gem_object_pin_map(obj, I915_MAP_WB); mutex_unlock(&dev->struct_mutex); return addr; @@ -218,25 +222,73 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .end_cpu_access = i915_gem_end_cpu_access, }; +static void export_fences(struct drm_i915_gem_object *obj, + struct dma_buf *dma_buf) +{ + struct reservation_object *resv = dma_buf->resv; + struct drm_i915_gem_request *req; + unsigned long active; + int idx; + + active = __I915_BO_ACTIVE(obj); + if (!active) + return; + + /* Serialise with execbuf to prevent concurrent fence-loops */ + mutex_lock(&obj->base.dev->struct_mutex); + + /* Mark the object for future fences before racily adding old fences */ + obj->base.dma_buf = dma_buf; + + ww_mutex_lock(&resv->lock, NULL); + + for_each_active(active, idx) { + req = i915_gem_active_get(&obj->last_read[idx], + &obj->base.dev->struct_mutex); + if (!req) + continue; + + if (reservation_object_reserve_shared(resv) == 0) + reservation_object_add_shared_fence(resv, &req->fence); + + i915_gem_request_put(req); + } + + req = i915_gem_active_get(&obj->last_write, + &obj->base.dev->struct_mutex); + if (req) { + reservation_object_add_excl_fence(resv, &req->fence); + i915_gem_request_put(req); + } + + ww_mutex_unlock(&resv->lock); + mutex_unlock(&obj->base.dev->struct_mutex); +} + struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags) { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct dma_buf *dma_buf; exp_info.ops = &i915_dmabuf_ops; exp_info.size = gem_obj->size; exp_info.flags = flags; exp_info.priv = gem_obj; - if (obj->ops->dmabuf_export) { int ret = obj->ops->dmabuf_export(obj); if (ret) return ERR_PTR(ret); } - return dma_buf_export(&exp_info); + dma_buf = drm_gem_dmabuf_export(dev, &exp_info); + if (IS_ERR(dma_buf)) + return dma_buf; + + export_fences(obj, dma_buf); + return dma_buf; } static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) @@ -278,8 +330,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, * Importing dmabuf exported from out own gem increases * refcount on gem itself instead of f_count of dmabuf. */ - drm_gem_object_reference(&obj->base); - return &obj->base; + return &i915_gem_object_get(obj)->base; } } @@ -300,6 +351,16 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); obj->base.import_attach = attach; + /* We use GTT as shorthand for a coherent domain, one that is + * neither in the GPU cache nor in the CPU cache, where all + * writes are immediately visible in memory. (That's not strictly + * true, but it's close! There are internal buffers such as the + * write-combined buffer or a delay through the chipset for GTT + * writes that do require us to treat GTT as a separate cache domain.) + */ + obj->base.read_domains = I915_GEM_DOMAIN_GTT; + obj->base.write_domain = 0; + return &obj->base; fail_detach: diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 3c1280ec7ff6..5b6f81c1dbca 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -33,53 +33,37 @@ #include "intel_drv.h" #include "i915_trace.h" -static int switch_to_pinned_context(struct drm_i915_private *dev_priv) +static bool +gpu_is_idle(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - if (i915.enable_execlists) - return 0; - for_each_engine(engine, dev_priv) { - struct drm_i915_gem_request *req; - int ret; - - if (engine->last_context == NULL) - continue; - - if (engine->last_context == dev_priv->kernel_context) - continue; - - req = i915_gem_request_alloc(engine, dev_priv->kernel_context); - if (IS_ERR(req)) - return PTR_ERR(req); - - ret = i915_switch_context(req); - i915_add_request_no_flush(req); - if (ret) - return ret; + if (intel_engine_is_active(engine)) + return false; } - return 0; + return true; } - static bool -mark_free(struct i915_vma *vma, struct list_head *unwind) +mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind) { - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) return false; if (WARN_ON(!list_empty(&vma->exec_list))) return false; + if (flags & PIN_NONFAULT && vma->obj->fault_mappable) + return false; + list_add(&vma->exec_list, unwind); return drm_mm_scan_add_block(&vma->node); } /** * i915_gem_evict_something - Evict vmas to make room for binding a new one - * @dev: drm_device * @vm: address space to evict from * @min_size: size of the desired free space * @alignment: alignment constraint of the desired free space @@ -102,42 +86,37 @@ mark_free(struct i915_vma *vma, struct list_head *unwind) * memory in e.g. the shrinker. */ int -i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, - int min_size, unsigned alignment, unsigned cache_level, - unsigned long start, unsigned long end, +i915_gem_evict_something(struct i915_address_space *vm, + u64 min_size, u64 alignment, + unsigned cache_level, + u64 start, u64 end, unsigned flags) { - struct list_head eviction_list, unwind_list; - struct i915_vma *vma; - int ret = 0; - int pass = 0; + struct drm_i915_private *dev_priv = to_i915(vm->dev); + struct list_head eviction_list; + struct list_head *phases[] = { + &vm->inactive_list, + &vm->active_list, + NULL, + }, **phase; + struct i915_vma *vma, *next; + int ret; - trace_i915_gem_evict(dev, min_size, alignment, flags); + trace_i915_gem_evict(vm, min_size, alignment, flags); /* * The goal is to evict objects and amalgamate space in LRU order. * The oldest idle objects reside on the inactive list, which is in - * retirement order. The next objects to retire are those on the (per - * ring) active list that do not have an outstanding flush. Once the - * hardware reports completion (the seqno is updated after the - * batchbuffer has been finished) the clean buffer objects would - * be retired to the inactive list. Any dirty objects would be added - * to the tail of the flushing list. So after processing the clean - * active objects we need to emit a MI_FLUSH to retire the flushing - * list, hence the retirement order of the flushing list is in - * advance of the dirty objects on the active lists. + * retirement order. The next objects to retire are those in flight, + * on the active list, again in retirement order. * * The retirement sequence is thus: * 1. Inactive objects (already retired) - * 2. Clean active objects - * 3. Flushing list - * 4. Dirty active objects. + * 2. Active objects (will stall on unbinding) * * On each list, the oldest objects lie at the HEAD with the freshest * object on the TAIL. */ - - INIT_LIST_HEAD(&unwind_list); if (start != 0 || end != vm->total) { drm_mm_init_scan_with_range(&vm->mm, min_size, alignment, cache_level, @@ -145,96 +124,86 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, } else drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level); -search_again: - /* First see if there is a large enough contiguous idle region... */ - list_for_each_entry(vma, &vm->inactive_list, vm_link) { - if (mark_free(vma, &unwind_list)) - goto found; - } - if (flags & PIN_NONBLOCK) - goto none; + phases[1] = NULL; - /* Now merge in the soon-to-be-expired objects... */ - list_for_each_entry(vma, &vm->active_list, vm_link) { - if (mark_free(vma, &unwind_list)) - goto found; - } +search_again: + INIT_LIST_HEAD(&eviction_list); + phase = phases; + do { + list_for_each_entry(vma, *phase, vm_link) + if (mark_free(vma, flags, &eviction_list)) + goto found; + } while (*++phase); -none: /* Nothing found, clean up and bail out! */ - while (!list_empty(&unwind_list)) { - vma = list_first_entry(&unwind_list, - struct i915_vma, - exec_list); + list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { ret = drm_mm_scan_remove_block(&vma->node); BUG_ON(ret); - list_del_init(&vma->exec_list); + INIT_LIST_HEAD(&vma->exec_list); } /* Can we unpin some objects such as idle hw contents, - * or pending flips? + * or pending flips? But since only the GGTT has global entries + * such as scanouts, rinbuffers and contexts, we can skip the + * purge when inspecting per-process local address spaces. */ - if (flags & PIN_NONBLOCK) + if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK) return -ENOSPC; - /* Only idle the GPU and repeat the search once */ - if (pass++ == 0) { - struct drm_i915_private *dev_priv = to_i915(dev); - - if (i915_is_ggtt(vm)) { - ret = switch_to_pinned_context(dev_priv); - if (ret) - return ret; - } - - ret = i915_gem_wait_for_idle(dev_priv); - if (ret) - return ret; - - i915_gem_retire_requests(dev_priv); - goto search_again; + if (gpu_is_idle(dev_priv)) { + /* If we still have pending pageflip completions, drop + * back to userspace to give our workqueues time to + * acquire our locks and unpin the old scanouts. + */ + return intel_has_pending_fb_unpin(vm->dev) ? -EAGAIN : -ENOSPC; } - /* If we still have pending pageflip completions, drop - * back to userspace to give our workqueues time to - * acquire our locks and unpin the old scanouts. + /* Not everything in the GGTT is tracked via vma (otherwise we + * could evict as required with minimal stalling) so we are forced + * to idle the GPU and explicitly retire outstanding requests in + * the hopes that we can then remove contexts and the like only + * bound by their active reference. */ - return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC; + ret = i915_gem_switch_to_kernel_context(dev_priv); + if (ret) + return ret; + + ret = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret) + return ret; + + i915_gem_retire_requests(dev_priv); + goto search_again; found: /* drm_mm doesn't allow any other other operations while - * scanning, therefore store to be evicted objects on a - * temporary list. */ - INIT_LIST_HEAD(&eviction_list); - while (!list_empty(&unwind_list)) { - vma = list_first_entry(&unwind_list, - struct i915_vma, - exec_list); - if (drm_mm_scan_remove_block(&vma->node)) { - list_move(&vma->exec_list, &eviction_list); - drm_gem_object_reference(&vma->obj->base); - continue; - } - list_del_init(&vma->exec_list); + * scanning, therefore store to-be-evicted objects on a + * temporary list and take a reference for all before + * calling unbind (which may remove the active reference + * of any of our objects, thus corrupting the list). + */ + list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { + if (drm_mm_scan_remove_block(&vma->node)) + __i915_vma_pin(vma); + else + list_del_init(&vma->exec_list); } /* Unbinding will emit any required flushes */ while (!list_empty(&eviction_list)) { - struct drm_gem_object *obj; vma = list_first_entry(&eviction_list, struct i915_vma, exec_list); - obj = &vma->obj->base; list_del_init(&vma->exec_list); + __i915_vma_unpin(vma); if (ret == 0) ret = i915_vma_unbind(vma); - - drm_gem_object_unreference(obj); } - return ret; } @@ -256,8 +225,8 @@ i915_gem_evict_for_vma(struct i915_vma *target) vma = container_of(node, typeof(*vma), node); - if (vma->pin_count) { - if (!vma->exec_entry || (vma->pin_count > 1)) + if (i915_vma_is_pinned(vma)) { + if (!vma->exec_entry || i915_vma_pin_count(vma) > 1) /* Object is pinned for some other use */ return -EBUSY; @@ -303,22 +272,23 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) struct drm_i915_private *dev_priv = to_i915(vm->dev); if (i915_is_ggtt(vm)) { - ret = switch_to_pinned_context(dev_priv); + ret = i915_gem_switch_to_kernel_context(dev_priv); if (ret) return ret; } - ret = i915_gem_wait_for_idle(dev_priv); + ret = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); if (ret) return ret; i915_gem_retire_requests(dev_priv); - WARN_ON(!list_empty(&vm->active_list)); } list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link) - if (vma->pin_count == 0) + if (!i915_vma_is_pinned(vma)) WARN_ON(i915_vma_unbind(vma)); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1978633e7549..7adb4c77cc7f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -26,22 +26,42 @@ * */ +#include <linux/dma_remapping.h> +#include <linux/reservation.h> +#include <linux/uaccess.h> + #include <drm/drmP.h> #include <drm/i915_drm.h> + #include "i915_drv.h" +#include "i915_gem_dmabuf.h" #include "i915_trace.h" #include "intel_drv.h" -#include <linux/dma_remapping.h> -#include <linux/uaccess.h> +#include "intel_frontbuffer.h" + +#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ -#define __EXEC_OBJECT_HAS_PIN (1<<31) -#define __EXEC_OBJECT_HAS_FENCE (1<<30) -#define __EXEC_OBJECT_NEEDS_MAP (1<<29) -#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_HAS_PIN (1<<31) +#define __EXEC_OBJECT_HAS_FENCE (1<<30) +#define __EXEC_OBJECT_NEEDS_MAP (1<<29) +#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */ #define BATCH_OFFSET_BIAS (256*1024) +struct i915_execbuffer_params { + struct drm_device *dev; + struct drm_file *file; + struct i915_vma *batch; + u32 dispatch_flags; + u32 args_batch_start_offset; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + struct drm_i915_gem_request *request; +}; + struct eb_vmas { + struct drm_i915_private *i915; struct list_head vmas; int and; union { @@ -51,7 +71,8 @@ struct eb_vmas { }; static struct eb_vmas * -eb_create(struct drm_i915_gem_execbuffer2 *args) +eb_create(struct drm_i915_private *i915, + struct drm_i915_gem_execbuffer2 *args) { struct eb_vmas *eb = NULL; @@ -78,6 +99,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args) } else eb->and = -args->buffer_count; + eb->i915 = i915; INIT_LIST_HEAD(&eb->vmas); return eb; } @@ -89,6 +111,26 @@ eb_reset(struct eb_vmas *eb) memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } +static struct i915_vma * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); + + /* + * SNA is doing fancy tricks with compressing batch buffers, which leads + * to negative relocation deltas. Usually that works out ok since the + * relocate address is still positive, except when the batch is placed + * very low in the GTT. Ensure this doesn't happen. + * + * Note that actual hangs have only been observed on gen7, but for + * paranoia do it everywhere. + */ + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return vma; +} + static int eb_lookup_vmas(struct eb_vmas *eb, struct drm_i915_gem_exec_object2 *exec, @@ -122,7 +164,7 @@ eb_lookup_vmas(struct eb_vmas *eb, goto err; } - drm_gem_object_reference(&obj->base); + i915_gem_object_get(obj); list_add_tail(&obj->obj_exec_link, &objects); } spin_unlock(&file->table_lock); @@ -143,8 +185,8 @@ eb_lookup_vmas(struct eb_vmas *eb, * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - vma = i915_gem_obj_lookup_or_create_vma(obj, vm); - if (IS_ERR(vma)) { + vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL); + if (unlikely(IS_ERR(vma))) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); goto err; @@ -175,7 +217,7 @@ err: struct drm_i915_gem_object, obj_exec_link); list_del_init(&obj->obj_exec_link); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); } /* * Objects already transfered to the vmas list will be unreferenced by @@ -208,7 +250,6 @@ static void i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry; - struct drm_i915_gem_object *obj = vma->obj; if (!drm_mm_node_allocated(&vma->node)) return; @@ -216,10 +257,10 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) entry = vma->exec_entry; if (entry->flags & __EXEC_OBJECT_HAS_FENCE) - i915_gem_object_unpin_fence(obj); + i915_vma_unpin_fence(vma); if (entry->flags & __EXEC_OBJECT_HAS_PIN) - vma->pin_count--; + __i915_vma_unpin(vma); entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); } @@ -234,13 +275,19 @@ static void eb_destroy(struct eb_vmas *eb) exec_list); list_del_init(&vma->exec_list); i915_gem_execbuffer_unreserve_vma(vma); - drm_gem_object_unreference(&vma->obj->base); + i915_vma_put(vma); } kfree(eb); } static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) { + if (!i915_gem_object_has_struct_page(obj)) + return false; + + if (DBG_USE_CPU_RELOC) + return DBG_USE_CPU_RELOC > 0; + return (HAS_LLC(obj->base.dev) || obj->base.write_domain == I915_GEM_DOMAIN_CPU || obj->cache_level != I915_CACHE_NONE); @@ -265,144 +312,265 @@ static inline uint64_t gen8_noncanonical_addr(uint64_t address) } static inline uint64_t -relocation_target(struct drm_i915_gem_relocation_entry *reloc, +relocation_target(const struct drm_i915_gem_relocation_entry *reloc, uint64_t target_offset) { return gen8_canonical_addr((int)reloc->delta + target_offset); } -static int -relocate_entry_cpu(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc, - uint64_t target_offset) +struct reloc_cache { + struct drm_i915_private *i915; + struct drm_mm_node node; + unsigned long vaddr; + unsigned int page; + bool use_64bit_reloc; +}; + +static void reloc_cache_init(struct reloc_cache *cache, + struct drm_i915_private *i915) { - struct drm_device *dev = obj->base.dev; - uint32_t page_offset = offset_in_page(reloc->offset); - uint64_t delta = relocation_target(reloc, target_offset); - char *vaddr; - int ret; + cache->page = -1; + cache->vaddr = 0; + cache->i915 = i915; + cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8; + cache->node.allocated = false; +} - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - return ret; +static inline void *unmask_page(unsigned long p) +{ + return (void *)(uintptr_t)(p & PAGE_MASK); +} - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, - reloc->offset >> PAGE_SHIFT)); - *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); +static inline unsigned int unmask_flags(unsigned long p) +{ + return p & ~PAGE_MASK; +} + +#define KMAP 0x4 /* after CLFLUSH_FLAGS */ + +static void reloc_cache_fini(struct reloc_cache *cache) +{ + void *vaddr; + + if (!cache->vaddr) + return; - if (INTEL_INFO(dev)->gen >= 8) { - page_offset = offset_in_page(page_offset + sizeof(uint32_t)); + vaddr = unmask_page(cache->vaddr); + if (cache->vaddr & KMAP) { + if (cache->vaddr & CLFLUSH_AFTER) + mb(); - if (page_offset == 0) { - kunmap_atomic(vaddr); - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, - (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); + kunmap_atomic(vaddr); + i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm); + } else { + wmb(); + io_mapping_unmap_atomic((void __iomem *)vaddr); + if (cache->node.allocated) { + struct i915_ggtt *ggtt = &cache->i915->ggtt; + + ggtt->base.clear_range(&ggtt->base, + cache->node.start, + cache->node.size, + true); + drm_mm_remove_node(&cache->node); + } else { + i915_vma_unpin((struct i915_vma *)cache->node.mm); } + } +} + +static void *reloc_kmap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + int page) +{ + void *vaddr; + + if (cache->vaddr) { + kunmap_atomic(unmask_page(cache->vaddr)); + } else { + unsigned int flushes; + int ret; - *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta); + ret = i915_gem_obj_prepare_shmem_write(obj, &flushes); + if (ret) + return ERR_PTR(ret); + + BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); + BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); + + cache->vaddr = flushes | KMAP; + cache->node.mm = (void *)obj; + if (flushes) + mb(); } - kunmap_atomic(vaddr); + vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); + cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; + cache->page = page; - return 0; + return vaddr; } -static int -relocate_entry_gtt(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc, - uint64_t target_offset) +static void *reloc_iomap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + int page) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - uint64_t delta = relocation_target(reloc, target_offset); - uint64_t offset; - void __iomem *reloc_page; - int ret; + struct i915_ggtt *ggtt = &cache->i915->ggtt; + unsigned long offset; + void *vaddr; - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; + if (cache->node.allocated) { + wmb(); + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, page), + cache->node.start, I915_CACHE_NONE, 0); + cache->page = page; + return unmask_page(cache->vaddr); + } - ret = i915_gem_object_put_fence(obj); - if (ret) - return ret; + if (cache->vaddr) { + io_mapping_unmap_atomic(unmask_page(cache->vaddr)); + } else { + struct i915_vma *vma; + int ret; - /* Map the page containing the relocation we're going to perform. */ - offset = i915_gem_obj_ggtt_offset(obj); - offset += reloc->offset; - reloc_page = io_mapping_map_atomic_wc(ggtt->mappable, - offset & PAGE_MASK); - iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset)); - - if (INTEL_INFO(dev)->gen >= 8) { - offset += sizeof(uint32_t); - - if (offset_in_page(offset) == 0) { - io_mapping_unmap_atomic(reloc_page); - reloc_page = - io_mapping_map_atomic_wc(ggtt->mappable, - offset); + if (use_cpu_reloc(obj)) + return NULL; + + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + return ERR_PTR(ret); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | PIN_NONBLOCK); + if (IS_ERR(vma)) { + memset(&cache->node, 0, sizeof(cache->node)); + ret = drm_mm_insert_node_in_range_generic + (&ggtt->base.mm, &cache->node, + 4096, 0, 0, + 0, ggtt->mappable_end, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); + if (ret) /* no inactive aperture space, use cpu reloc */ + return NULL; + } else { + ret = i915_vma_put_fence(vma); + if (ret) { + i915_vma_unpin(vma); + return ERR_PTR(ret); + } + + cache->node.start = vma->node.start; + cache->node.mm = (void *)vma; } + } - iowrite32(upper_32_bits(delta), - reloc_page + offset_in_page(offset)); + offset = cache->node.start; + if (cache->node.allocated) { + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, page), + offset, I915_CACHE_NONE, 0); + } else { + offset += page << PAGE_SHIFT; } - io_mapping_unmap_atomic(reloc_page); + vaddr = io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset); + cache->page = page; + cache->vaddr = (unsigned long)vaddr; - return 0; + return vaddr; } -static void -clflush_write32(void *addr, uint32_t value) +static void *reloc_vaddr(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + int page) { - /* This is not a fast path, so KISS. */ - drm_clflush_virt_range(addr, sizeof(uint32_t)); - *(uint32_t *)addr = value; - drm_clflush_virt_range(addr, sizeof(uint32_t)); + void *vaddr; + + if (cache->page == page) { + vaddr = unmask_page(cache->vaddr); + } else { + vaddr = NULL; + if ((cache->vaddr & KMAP) == 0) + vaddr = reloc_iomap(obj, cache, page); + if (!vaddr) + vaddr = reloc_kmap(obj, cache, page); + } + + return vaddr; } -static int -relocate_entry_clflush(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc, - uint64_t target_offset) +static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) { - struct drm_device *dev = obj->base.dev; - uint32_t page_offset = offset_in_page(reloc->offset); - uint64_t delta = relocation_target(reloc, target_offset); - char *vaddr; - int ret; + if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { + if (flushes & CLFLUSH_BEFORE) { + clflushopt(addr); + mb(); + } - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; + *addr = value; - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, - reloc->offset >> PAGE_SHIFT)); - clflush_write32(vaddr + page_offset, lower_32_bits(delta)); + /* Writes to the same cacheline are serialised by the CPU + * (including clflush). On the write path, we only require + * that it hits memory in an orderly fashion and place + * mb barriers at the start and end of the relocation phase + * to ensure ordering of clflush wrt to the system. + */ + if (flushes & CLFLUSH_AFTER) + clflushopt(addr); + } else + *addr = value; +} - if (INTEL_INFO(dev)->gen >= 8) { - page_offset = offset_in_page(page_offset + sizeof(uint32_t)); +static int +relocate_entry(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_relocation_entry *reloc, + struct reloc_cache *cache, + u64 target_offset) +{ + u64 offset = reloc->offset; + bool wide = cache->use_64bit_reloc; + void *vaddr; + + target_offset = relocation_target(reloc, target_offset); +repeat: + vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_offset), + cache->vaddr); + + if (wide) { + offset += sizeof(u32); + target_offset >>= 32; + wide = false; + goto repeat; + } - if (page_offset == 0) { - kunmap_atomic(vaddr); - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, - (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); - } + return 0; +} - clflush_write32(vaddr + page_offset, upper_32_bits(delta)); - } +static bool object_is_idle(struct drm_i915_gem_object *obj) +{ + unsigned long active = i915_gem_object_get_active(obj); + int idx; - kunmap_atomic(vaddr); + for_each_active(active, idx) { + if (!i915_gem_active_is_idle(&obj->last_read[idx], + &obj->base.dev->struct_mutex)) + return false; + } - return 0; + return true; } static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_vmas *eb, - struct drm_i915_gem_relocation_entry *reloc) + struct drm_i915_gem_relocation_entry *reloc, + struct reloc_cache *cache) { struct drm_device *dev = obj->base.dev; struct drm_gem_object *target_obj; @@ -465,7 +633,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Check that the relocation address is valid... */ if (unlikely(reloc->offset > - obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) { + obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) { DRM_DEBUG("Relocation beyond object bounds: " "obj %p target %d offset %d size %d.\n", obj, reloc->target_handle, @@ -482,26 +650,15 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, } /* We can't wait for rendering with pagefaults disabled */ - if (obj->active && pagefault_disabled()) + if (pagefault_disabled() && !object_is_idle(obj)) return -EFAULT; - if (use_cpu_reloc(obj)) - ret = relocate_entry_cpu(obj, reloc, target_offset); - else if (obj->map_and_fenceable) - ret = relocate_entry_gtt(obj, reloc, target_offset); - else if (static_cpu_has(X86_FEATURE_CLFLUSH)) - ret = relocate_entry_clflush(obj, reloc, target_offset); - else { - WARN_ONCE(1, "Impossible case in relocation handling\n"); - ret = -ENODEV; - } - + ret = relocate_entry(obj, reloc, cache, target_offset); if (ret) return ret; /* and update the user's relocation entry */ reloc->presumed_offset = target_offset; - return 0; } @@ -513,9 +670,11 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; struct drm_i915_gem_relocation_entry __user *user_relocs; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - int remain, ret; + struct reloc_cache cache; + int remain, ret = 0; user_relocs = u64_to_user_ptr(entry->relocs_ptr); + reloc_cache_init(&cache, eb->i915); remain = entry->relocation_count; while (remain) { @@ -525,19 +684,23 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, count = ARRAY_SIZE(stack_reloc); remain -= count; - if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) - return -EFAULT; + if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) { + ret = -EFAULT; + goto out; + } do { u64 offset = r->presumed_offset; - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r); + ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache); if (ret) - return ret; + goto out; if (r->presumed_offset != offset && - __put_user(r->presumed_offset, &user_relocs->presumed_offset)) { - return -EFAULT; + __put_user(r->presumed_offset, + &user_relocs->presumed_offset)) { + ret = -EFAULT; + goto out; } user_relocs++; @@ -545,7 +708,9 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, } while (--count); } - return 0; +out: + reloc_cache_fini(&cache); + return ret; #undef N_RELOC } @@ -555,15 +720,18 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, struct drm_i915_gem_relocation_entry *relocs) { const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - int i, ret; + struct reloc_cache cache; + int i, ret = 0; + reloc_cache_init(&cache, eb->i915); for (i = 0; i < entry->relocation_count; i++) { - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]); + ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache); if (ret) - return ret; + break; } + reloc_cache_fini(&cache); - return 0; + return ret; } static int @@ -626,23 +794,27 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= PIN_HIGH; } - ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); - if ((ret == -ENOSPC || ret == -E2BIG) && + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags); + if ((ret == -ENOSPC || ret == -E2BIG) && only_mappable_for_reloc(entry->flags)) - ret = i915_gem_object_pin(obj, vma->vm, - entry->alignment, - flags & ~PIN_MAPPABLE); + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags & ~PIN_MAPPABLE); if (ret) return ret; entry->flags |= __EXEC_OBJECT_HAS_PIN; if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - ret = i915_gem_object_get_fence(obj); + ret = i915_vma_get_fence(vma); if (ret) return ret; - if (i915_gem_object_pin_fence(obj)) + if (i915_vma_pin_fence(vma)) entry->flags |= __EXEC_OBJECT_HAS_FENCE; } @@ -667,7 +839,7 @@ need_reloc_mappable(struct i915_vma *vma) if (entry->relocation_count == 0) return false; - if (!vma->is_ggtt) + if (!i915_vma_is_ggtt(vma)) return false; /* See also use_cpu_reloc() */ @@ -684,14 +856,17 @@ static bool eb_vma_misplaced(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - struct drm_i915_gem_object *obj = vma->obj; - WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt); + WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && + !i915_vma_is_ggtt(vma)); if (entry->alignment && vma->node.start & (entry->alignment - 1)) return true; + if (vma->node.size < entry->pad_to_size) + return true; + if (entry->flags & EXEC_OBJECT_PINNED && vma->node.start != entry->offset) return true; @@ -701,7 +876,8 @@ eb_vma_misplaced(struct i915_vma *vma) return true; /* avoid costly ping-pong once a batch bo ended up non-mappable */ - if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && + !i915_vma_is_map_and_fenceable(vma)) return !only_mappable_for_reloc(entry->flags); if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && @@ -725,8 +901,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4; int retry; - i915_gem_retire_requests_ring(engine); - vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; INIT_LIST_HEAD(&ordered_vmas); @@ -746,7 +920,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; + i915_gem_object_is_tiled(obj); need_mappable = need_fence || need_reloc_mappable(vma); if (entry->flags & EXEC_OBJECT_PINNED) @@ -843,7 +1017,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); list_del_init(&vma->exec_list); i915_gem_execbuffer_unreserve_vma(vma); - drm_gem_object_unreference(&vma->obj->base); + i915_vma_put(vma); } mutex_unlock(&dev->struct_mutex); @@ -937,41 +1111,54 @@ err: return ret; } +static unsigned int eb_other_engines(struct drm_i915_gem_request *req) +{ + unsigned int mask; + + mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK; + mask <<= I915_BO_ACTIVE_SHIFT; + + return mask; +} + static int i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, struct list_head *vmas) { - const unsigned other_rings = ~intel_engine_flag(req->engine); + const unsigned int other_rings = eb_other_engines(req); struct i915_vma *vma; - uint32_t flush_domains = 0; - bool flush_chipset = false; int ret; list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; + struct reservation_object *resv; - if (obj->active & other_rings) { - ret = i915_gem_object_sync(obj, req->engine, &req); + if (obj->flags & other_rings) { + ret = i915_gem_request_await_object + (req, obj, obj->base.pending_write_domain); if (ret) return ret; } - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - flush_chipset |= i915_gem_clflush_object(obj, false); + resv = i915_gem_object_get_dmabuf_resv(obj); + if (resv) { + ret = i915_sw_fence_await_reservation + (&req->submit, resv, &i915_fence_ops, + obj->base.pending_write_domain, 10*HZ, + GFP_KERNEL | __GFP_NOWARN); + if (ret < 0) + return ret; + } - flush_domains |= obj->base.write_domain; + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) + i915_gem_clflush_object(obj, false); } - if (flush_chipset) - i915_gem_chipset_flush(req->engine->i915); - - if (flush_domains & I915_GEM_DOMAIN_GTT) - wmb(); + /* Unconditionally flush any chipset caches (for streaming writes). */ + i915_gem_chipset_flush(req->engine->i915); - /* Unconditionally invalidate gpu caches and ensure that we do flush - * any residual writes from the previous batch. - */ - return intel_ring_invalidate_all_caches(req); + /* Unconditionally invalidate GPU caches and TLBs. */ + return req->engine->emit_flush(req, EMIT_INVALIDATE); } static bool @@ -1007,6 +1194,9 @@ validate_exec_list(struct drm_device *dev, unsigned invalid_flags; int i; + /* INTERNAL flags must not overlap with external ones */ + BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); + invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; if (USES_FULL_PPGTT(dev)) invalid_flags |= EXEC_OBJECT_NEEDS_GTT; @@ -1036,6 +1226,14 @@ validate_exec_list(struct drm_device *dev, if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) return -EINVAL; + /* pad_to_size was once a reserved field, so sanitize it */ + if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { + if (offset_in_page(exec[i].pad_to_size)) + return -EINVAL; + } else { + exec[i].pad_to_size = 0; + } + /* First check for malicious input causing overflow in * the worst case where we need to allocate the entire * relocation tree as a single array. @@ -1055,7 +1253,7 @@ validate_exec_list(struct drm_device *dev, return -EFAULT; if (likely(!i915.prefault_disable)) { - if (fault_in_multipages_readable(ptr, length)) + if (fault_in_pages_readable(ptr, length)) return -EFAULT; } } @@ -1067,12 +1265,9 @@ static struct i915_gem_context * i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *engine, const u32 ctx_id) { - struct i915_gem_context *ctx = NULL; + struct i915_gem_context *ctx; struct i915_ctx_hang_stats *hs; - if (engine->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE) - return ERR_PTR(-EINVAL); - ctx = i915_gem_context_lookup(file->driver_priv, ctx_id); if (IS_ERR(ctx)) return ctx; @@ -1086,66 +1281,99 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } -void +void i915_vma_move_to_active(struct i915_vma *vma, + struct drm_i915_gem_request *req, + unsigned int flags) +{ + struct drm_i915_gem_object *obj = vma->obj; + const unsigned int idx = req->engine->id; + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + + obj->dirty = 1; /* be paranoid */ + + /* Add a reference if we're newly entering the active list. + * The order in which we add operations to the retirement queue is + * vital here: mark_active adds to the start of the callback list, + * such that subsequent callbacks are called first. Therefore we + * add the active reference first and queue for it to be dropped + * *last*. + */ + if (!i915_gem_object_is_active(obj)) + i915_gem_object_get(obj); + i915_gem_object_set_active(obj, idx); + i915_gem_active_set(&obj->last_read[idx], req); + + if (flags & EXEC_OBJECT_WRITE) { + i915_gem_active_set(&obj->last_write, req); + + intel_fb_obj_invalidate(obj, ORIGIN_CS); + + /* update for the implicit flush after a batch */ + obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + } + + if (flags & EXEC_OBJECT_NEEDS_FENCE) + i915_gem_active_set(&vma->last_fence, req); + + i915_vma_set_active(vma, idx); + i915_gem_active_set(&vma->last_read[idx], req); + list_move_tail(&vma->vm_link, &vma->vm->active_list); +} + +static void eb_export_fence(struct drm_i915_gem_object *obj, + struct drm_i915_gem_request *req, + unsigned int flags) +{ + struct reservation_object *resv; + + resv = i915_gem_object_get_dmabuf_resv(obj); + if (!resv) + return; + + /* Ignore errors from failing to allocate the new fence, we can't + * handle an error right now. Worst case should be missed + * synchronisation leading to rendering corruption. + */ + ww_mutex_lock(&resv->lock, NULL); + if (flags & EXEC_OBJECT_WRITE) + reservation_object_add_excl_fence(resv, &req->fence); + else if (reservation_object_reserve_shared(resv) == 0) + reservation_object_add_shared_fence(resv, &req->fence); + ww_mutex_unlock(&resv->lock); +} + +static void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = i915_gem_request_get_engine(req); struct i915_vma *vma; list_for_each_entry(vma, vmas, exec_list) { - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_object *obj = vma->obj; u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; - obj->dirty = 1; /* be paranoid */ obj->base.write_domain = obj->base.pending_write_domain; - if (obj->base.write_domain == 0) + if (obj->base.write_domain) + vma->exec_entry->flags |= EXEC_OBJECT_WRITE; + else obj->base.pending_read_domains |= obj->base.read_domains; obj->base.read_domains = obj->base.pending_read_domains; - i915_vma_move_to_active(vma, req); - if (obj->base.write_domain) { - i915_gem_request_assign(&obj->last_write_req, req); - - intel_fb_obj_invalidate(obj, ORIGIN_CS); - - /* update for the implicit flush after a batch */ - obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; - } - if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - i915_gem_request_assign(&obj->last_fenced_req, req); - if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { - struct drm_i915_private *dev_priv = engine->i915; - list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, - &dev_priv->mm.fence_list); - } - } - + i915_vma_move_to_active(vma, req, vma->exec_entry->flags); + eb_export_fence(obj, req, vma->exec_entry->flags); trace_i915_gem_object_change_domain(obj, old_read, old_write); } } -static void -i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) -{ - /* Unconditionally force add_request to emit a full flush. */ - params->engine->gpu_caches_dirty = true; - - /* Add a breadcrumb for the completion of the batch buffer */ - __i915_add_request(params->request, params->batch_obj, true); -} - static int -i915_reset_gen7_sol_offsets(struct drm_device *dev, - struct drm_i915_gem_request *req) +i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_ring *ring = req->ring; int ret, i; - if (!IS_GEN7(dev) || engine != &dev_priv->engine[RCS]) { + if (!IS_GEN7(req->i915) || req->engine->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } @@ -1155,21 +1383,21 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return ret; for (i = 0; i < 4; i++) { - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, GEN7_SO_WRITE_OFFSET(i)); - intel_ring_emit(engine, 0); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); + intel_ring_emit(ring, 0); } - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } -static struct drm_i915_gem_object* +static struct i915_vma * i915_gem_execbuffer_parse(struct intel_engine_cs *engine, struct drm_i915_gem_exec_object2 *shadow_exec_entry, - struct eb_vmas *eb, struct drm_i915_gem_object *batch_obj, + struct eb_vmas *eb, u32 batch_start_offset, u32 batch_len, bool is_master) @@ -1181,51 +1409,44 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool, PAGE_ALIGN(batch_len)); if (IS_ERR(shadow_batch_obj)) - return shadow_batch_obj; - - ret = i915_parse_cmds(engine, - batch_obj, - shadow_batch_obj, - batch_start_offset, - batch_len, - is_master); - if (ret) - goto err; - - ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0); - if (ret) - goto err; + return ERR_CAST(shadow_batch_obj); + + ret = intel_engine_cmd_parser(engine, + batch_obj, + shadow_batch_obj, + batch_start_offset, + batch_len, + is_master); + if (ret) { + if (ret == -EACCES) /* unhandled chained batch */ + vma = NULL; + else + vma = ERR_PTR(ret); + goto out; + } - i915_gem_object_unpin_pages(shadow_batch_obj); + vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + goto out; memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); - vma = i915_gem_obj_to_ggtt(shadow_batch_obj); vma->exec_entry = shadow_exec_entry; vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; - drm_gem_object_reference(&shadow_batch_obj->base); + i915_gem_object_get(shadow_batch_obj); list_add_tail(&vma->exec_list, &eb->vmas); - shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND; - - return shadow_batch_obj; - -err: +out: i915_gem_object_unpin_pages(shadow_batch_obj); - if (ret == -EACCES) /* unhandled chained batch */ - return batch_obj; - else - return ERR_PTR(ret); + return vma; } -int -i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas) +static int +execbuf_submit(struct i915_execbuffer_params *params, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas) { - struct drm_device *dev = params->dev; - struct intel_engine_cs *engine = params->engine; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; int instp_mode; u32 instp_mask; @@ -1239,34 +1460,31 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, if (ret) return ret; - WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<<engine->id), - "%s didn't clear reload\n", engine->name); - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; instp_mask = I915_EXEC_CONSTANTS_MASK; switch (instp_mode) { case I915_EXEC_CONSTANTS_REL_GENERAL: case I915_EXEC_CONSTANTS_ABSOLUTE: case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) { + if (instp_mode != 0 && params->engine->id != RCS) { DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); return -EINVAL; } if (instp_mode != dev_priv->relative_constants_mode) { - if (INTEL_INFO(dev)->gen < 4) { + if (INTEL_INFO(dev_priv)->gen < 4) { DRM_DEBUG("no rel constants on pre-gen4\n"); return -EINVAL; } - if (INTEL_INFO(dev)->gen > 5 && + if (INTEL_INFO(dev_priv)->gen > 5 && instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); return -EINVAL; } /* The HW changed the meaning on this bit on gen6 */ - if (INTEL_INFO(dev)->gen >= 6) + if (INTEL_INFO(dev_priv)->gen >= 6) instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; } break; @@ -1275,37 +1493,39 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, return -EINVAL; } - if (engine == &dev_priv->engine[RCS] && + if (params->engine->id == RCS && instp_mode != dev_priv->relative_constants_mode) { + struct intel_ring *ring = params->request->ring; + ret = intel_ring_begin(params->request, 4); if (ret) return ret; - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, INSTPM); - intel_ring_emit(engine, instp_mask << 16 | instp_mode); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, INSTPM); + intel_ring_emit(ring, instp_mask << 16 | instp_mode); + intel_ring_advance(ring); dev_priv->relative_constants_mode = instp_mode; } if (args->flags & I915_EXEC_GEN7_SOL_RESET) { - ret = i915_reset_gen7_sol_offsets(dev, params->request); + ret = i915_reset_gen7_sol_offsets(params->request); if (ret) return ret; } exec_len = args->batch_len; - exec_start = params->batch_obj_vm_offset + + exec_start = params->batch->node.start + params->args_batch_start_offset; if (exec_len == 0) - exec_len = params->batch_obj->base.size; + exec_len = params->batch->size - params->args_batch_start_offset; - ret = engine->dispatch_execbuffer(params->request, - exec_start, exec_len, - params->dispatch_flags); + ret = params->engine->emit_bb_start(params->request, + exec_start, exec_len, + params->dispatch_flags); if (ret) return ret; @@ -1318,43 +1538,20 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, /** * Find one BSD ring to dispatch the corresponding BSD command. - * The ring index is returned. + * The engine index is returned. */ static unsigned int -gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file) +gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, + struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; /* Check whether the file_priv has already selected one ring. */ - if ((int)file_priv->bsd_ring < 0) { - /* If not, use the ping-pong mechanism to select one. */ - mutex_lock(&dev_priv->drm.struct_mutex); - file_priv->bsd_ring = dev_priv->mm.bsd_ring_dispatch_index; - dev_priv->mm.bsd_ring_dispatch_index ^= 1; - mutex_unlock(&dev_priv->drm.struct_mutex); - } + if ((int)file_priv->bsd_engine < 0) + file_priv->bsd_engine = atomic_fetch_xor(1, + &dev_priv->mm.bsd_engine_dispatch_index); - return file_priv->bsd_ring; -} - -static struct drm_i915_gem_object * -eb_get_batch(struct eb_vmas *eb) -{ - struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); - - /* - * SNA is doing fancy tricks with compressing batch buffers, which leads - * to negative relocation deltas. Usually that works out ok since the - * relocate address is still positive, except when the batch is placed - * very low in the GTT. Ensure this doesn't happen. - * - * Note that actual hangs have only been observed on gen7, but for - * paranoia do it everywhere. - */ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return vma->obj; + return file_priv->bsd_engine; } #define I915_USER_RINGS (4) @@ -1367,31 +1564,31 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { [I915_EXEC_VEBOX] = VECS }; -static int -eb_select_ring(struct drm_i915_private *dev_priv, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args, - struct intel_engine_cs **ring) +static struct intel_engine_cs * +eb_select_engine(struct drm_i915_private *dev_priv, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) { unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; + struct intel_engine_cs *engine; if (user_ring_id > I915_USER_RINGS) { DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); - return -EINVAL; + return NULL; } if ((user_ring_id != I915_EXEC_BSD) && ((args->flags & I915_EXEC_BSD_MASK) != 0)) { DRM_DEBUG("execbuf with non bsd ring but with invalid " "bsd dispatch flags: %d\n", (int)(args->flags)); - return -EINVAL; + return NULL; } if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { - bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file); + bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file); } else if (bsd_idx >= I915_EXEC_BSD_RING1 && bsd_idx <= I915_EXEC_BSD_RING2) { bsd_idx >>= I915_EXEC_BSD_SHIFT; @@ -1399,20 +1596,20 @@ eb_select_ring(struct drm_i915_private *dev_priv, } else { DRM_DEBUG("execbuf with unknown bsd ring: %u\n", bsd_idx); - return -EINVAL; + return NULL; } - *ring = &dev_priv->engine[_VCS(bsd_idx)]; + engine = &dev_priv->engine[_VCS(bsd_idx)]; } else { - *ring = &dev_priv->engine[user_ring_map[user_ring_id]]; + engine = &dev_priv->engine[user_ring_map[user_ring_id]]; } - if (!intel_engine_initialized(*ring)) { + if (!intel_engine_initialized(engine)) { DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); - return -EINVAL; + return NULL; } - return 0; + return engine; } static int @@ -1423,9 +1620,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_i915_gem_request *req = NULL; struct eb_vmas *eb; - struct drm_i915_gem_object *batch_obj; struct drm_i915_gem_exec_object2 shadow_exec_entry; struct intel_engine_cs *engine; struct i915_gem_context *ctx; @@ -1454,9 +1649,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args->flags & I915_EXEC_IS_PINNED) dispatch_flags |= I915_DISPATCH_PINNED; - ret = eb_select_ring(dev_priv, file, args, &engine); - if (ret) - return ret; + engine = eb_select_engine(dev_priv, file, args); + if (!engine) + return -EINVAL; if (args->buffer_count < 1) { DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); @@ -1496,7 +1691,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } - i915_gem_context_reference(ctx); + i915_gem_context_get(ctx); if (ctx->ppgtt) vm = &ctx->ppgtt->base; @@ -1505,9 +1700,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, memset(¶ms_master, 0x00, sizeof(params_master)); - eb = eb_create(args); + eb = eb_create(dev_priv, args); if (eb == NULL) { - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); mutex_unlock(&dev->struct_mutex); ret = -ENOMEM; goto pre_mutex_err; @@ -1519,7 +1714,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; /* take note of the batch buffer before we might reorder the lists */ - batch_obj = eb_get_batch(eb); + params->batch = eb_get_batch(eb); /* Move the objects en-masse into the GTT, evicting if necessary. */ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; @@ -1543,34 +1738,34 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (batch_obj->base.pending_write_domain) { + if (params->batch->obj->base.pending_write_domain) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; } + if (args->batch_start_offset > params->batch->size || + args->batch_len > params->batch->size - args->batch_start_offset) { + DRM_DEBUG("Attempting to use out-of-bounds batch\n"); + ret = -EINVAL; + goto err; + } params->args_batch_start_offset = args->batch_start_offset; - if (i915_needs_cmd_parser(engine) && args->batch_len) { - struct drm_i915_gem_object *parsed_batch_obj; - - parsed_batch_obj = i915_gem_execbuffer_parse(engine, - &shadow_exec_entry, - eb, - batch_obj, - args->batch_start_offset, - args->batch_len, - drm_is_current_master(file)); - if (IS_ERR(parsed_batch_obj)) { - ret = PTR_ERR(parsed_batch_obj); + if (intel_engine_needs_cmd_parser(engine) && args->batch_len) { + struct i915_vma *vma; + + vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry, + params->batch->obj, + eb, + args->batch_start_offset, + args->batch_len, + drm_is_current_master(file)); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto err; } - /* - * parsed_batch_obj == batch_obj means batch not fully parsed: - * Accept, but don't promote to secure. - */ - - if (parsed_batch_obj != batch_obj) { + if (vma) { /* * Batch parsed and accepted: * @@ -1582,16 +1777,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, */ dispatch_flags |= I915_DISPATCH_SECURE; params->args_batch_start_offset = 0; - batch_obj = parsed_batch_obj; + params->batch = vma; } } - batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ if (dispatch_flags & I915_DISPATCH_SECURE) { + struct drm_i915_gem_object *obj = params->batch->obj; + struct i915_vma *vma; + /* * So on first glance it looks freaky that we pin the batch here * outside of the reservation loop. But: @@ -1602,22 +1800,31 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * fitting due to fragmentation. * So this is actually safe. */ - ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); - if (ret) + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto err; + } - params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj); - } else - params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); + params->batch = vma; + } /* Allocate a request for this batch buffer nice and early. */ - req = i915_gem_request_alloc(engine, ctx); - if (IS_ERR(req)) { - ret = PTR_ERR(req); + params->request = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(params->request)) { + ret = PTR_ERR(params->request); goto err_batch_unpin; } - ret = i915_gem_request_add_to_client(req, file); + /* Whilst this request exists, batch_obj will be on the + * active_list, and so will hold the active reference. Only when this + * request is retired will the the batch_obj be moved onto the + * inactive_list and lose its active reference. Hence we do not need + * to explicitly hold another reference here. + */ + params->request->batch = params->batch; + + ret = i915_gem_request_add_to_client(params->request, file); if (ret) goto err_request; @@ -1631,13 +1838,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->file = file; params->engine = engine; params->dispatch_flags = dispatch_flags; - params->batch_obj = batch_obj; params->ctx = ctx; - params->request = req; - ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas); + ret = execbuf_submit(params, args, &eb->vmas); err_request: - i915_gem_execbuffer_retire_commands(params); + __i915_add_request(params->request, ret == 0); err_batch_unpin: /* @@ -1647,11 +1852,10 @@ err_batch_unpin: * active. */ if (dispatch_flags & I915_DISPATCH_SECURE) - i915_gem_object_ggtt_unpin(batch_obj); - + i915_vma_unpin(params->batch); err: /* the request owns the ref now */ - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); eb_destroy(eb); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index 251d7a95af89..8df1fa7234e8 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -55,226 +55,228 @@ * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. */ -static void i965_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) +#define pipelined 0 + +static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(dev); i915_reg_t fence_reg_lo, fence_reg_hi; int fence_pitch_shift; + u64 val; - if (INTEL_INFO(dev)->gen >= 6) { - fence_reg_lo = FENCE_REG_GEN6_LO(reg); - fence_reg_hi = FENCE_REG_GEN6_HI(reg); + if (INTEL_INFO(fence->i915)->gen >= 6) { + fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); + fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; + } else { - fence_reg_lo = FENCE_REG_965_LO(reg); - fence_reg_hi = FENCE_REG_965_HI(reg); + fence_reg_lo = FENCE_REG_965_LO(fence->id); + fence_reg_hi = FENCE_REG_965_HI(fence->id); fence_pitch_shift = I965_FENCE_PITCH_SHIFT; } - /* To w/a incoherency with non-atomic 64-bit register updates, - * we split the 64-bit update into two 32-bit writes. In order - * for a partial fence not to be evaluated between writes, we - * precede the update with write to turn off the fence register, - * and only enable the fence as the last step. - * - * For extra levels of paranoia, we make sure each step lands - * before applying the next step. - */ - I915_WRITE(fence_reg_lo, 0); - POSTING_READ(fence_reg_lo); - - if (obj) { - u32 size = i915_gem_obj_ggtt_size(obj); - uint64_t val; - - /* Adjust fence size to match tiled area */ - if (obj->tiling_mode != I915_TILING_NONE) { - uint32_t row_size = obj->stride * - (obj->tiling_mode == I915_TILING_Y ? 32 : 8); - size = (size / row_size) * row_size; - } - - val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & - 0xfffff000) << 32; - val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; - val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I965_FENCE_TILING_Y_SHIFT; + val = 0; + if (vma) { + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); + bool is_y_tiled = tiling == I915_TILING_Y; + unsigned int stride = i915_gem_object_get_stride(vma->obj); + u32 row_size = stride * (is_y_tiled ? 32 : 8); + u32 size = rounddown((u32)vma->node.size, row_size); + + val = ((vma->node.start + size - 4096) & 0xfffff000) << 32; + val |= vma->node.start & 0xfffff000; + val |= (u64)((stride / 128) - 1) << fence_pitch_shift; + if (is_y_tiled) + val |= BIT(I965_FENCE_TILING_Y_SHIFT); val |= I965_FENCE_REG_VALID; + } - I915_WRITE(fence_reg_hi, val >> 32); - POSTING_READ(fence_reg_hi); + if (!pipelined) { + struct drm_i915_private *dev_priv = fence->i915; - I915_WRITE(fence_reg_lo, val); + /* To w/a incoherency with non-atomic 64-bit register updates, + * we split the 64-bit update into two 32-bit writes. In order + * for a partial fence not to be evaluated between writes, we + * precede the update with write to turn off the fence register, + * and only enable the fence as the last step. + * + * For extra levels of paranoia, we make sure each step lands + * before applying the next step. + */ + I915_WRITE(fence_reg_lo, 0); + POSTING_READ(fence_reg_lo); + + I915_WRITE(fence_reg_hi, upper_32_bits(val)); + I915_WRITE(fence_reg_lo, lower_32_bits(val)); POSTING_READ(fence_reg_lo); - } else { - I915_WRITE(fence_reg_hi, 0); - POSTING_READ(fence_reg_hi); } } -static void i915_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) +static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 val; - if (obj) { - u32 size = i915_gem_obj_ggtt_size(obj); + val = 0; + if (vma) { + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); + bool is_y_tiled = tiling == I915_TILING_Y; + unsigned int stride = i915_gem_object_get_stride(vma->obj); int pitch_val; int tile_width; - WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || - (size & -size) != size || - (i915_gem_obj_ggtt_offset(obj) & (size - 1)), - "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", - i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); + WARN((vma->node.start & ~I915_FENCE_START_MASK) || + !is_power_of_2(vma->node.size) || + (vma->node.start & (vma->node.size - 1)), + "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n", + vma->node.start, + i915_vma_is_map_and_fenceable(vma), + vma->node.size); - if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) + if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) tile_width = 128; else tile_width = 512; /* Note: pitch better be a power of two tile widths */ - pitch_val = obj->stride / tile_width; + pitch_val = stride / tile_width; pitch_val = ffs(pitch_val) - 1; - val = i915_gem_obj_ggtt_offset(obj); - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I830_FENCE_TILING_Y_SHIFT; - val |= I915_FENCE_SIZE_BITS(size); + val = vma->node.start; + if (is_y_tiled) + val |= BIT(I830_FENCE_TILING_Y_SHIFT); + val |= I915_FENCE_SIZE_BITS(vma->node.size); val |= pitch_val << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; - } else - val = 0; + } + + if (!pipelined) { + struct drm_i915_private *dev_priv = fence->i915; + i915_reg_t reg = FENCE_REG(fence->id); - I915_WRITE(FENCE_REG(reg), val); - POSTING_READ(FENCE_REG(reg)); + I915_WRITE(reg, val); + POSTING_READ(reg); + } } -static void i830_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) +static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t val; + u32 val; - if (obj) { - u32 size = i915_gem_obj_ggtt_size(obj); - uint32_t pitch_val; + val = 0; + if (vma) { + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); + bool is_y_tiled = tiling == I915_TILING_Y; + unsigned int stride = i915_gem_object_get_stride(vma->obj); + u32 pitch_val; - WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || - (size & -size) != size || - (i915_gem_obj_ggtt_offset(obj) & (size - 1)), - "object 0x%08llx not 512K or pot-size 0x%08x aligned\n", - i915_gem_obj_ggtt_offset(obj), size); + WARN((vma->node.start & ~I830_FENCE_START_MASK) || + !is_power_of_2(vma->node.size) || + (vma->node.start & (vma->node.size - 1)), + "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n", + vma->node.start, vma->node.size); - pitch_val = obj->stride / 128; + pitch_val = stride / 128; pitch_val = ffs(pitch_val) - 1; - val = i915_gem_obj_ggtt_offset(obj); - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I830_FENCE_TILING_Y_SHIFT; - val |= I830_FENCE_SIZE_BITS(size); + val = vma->node.start; + if (is_y_tiled) + val |= BIT(I830_FENCE_TILING_Y_SHIFT); + val |= I830_FENCE_SIZE_BITS(vma->node.size); val |= pitch_val << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; - } else - val = 0; + } - I915_WRITE(FENCE_REG(reg), val); - POSTING_READ(FENCE_REG(reg)); -} + if (!pipelined) { + struct drm_i915_private *dev_priv = fence->i915; + i915_reg_t reg = FENCE_REG(fence->id); -inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) -{ - return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; + I915_WRITE(reg, val); + POSTING_READ(reg); + } } -static void i915_gem_write_fence(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) +static void fence_write(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(dev); - - /* Ensure that all CPU reads are completed before installing a fence - * and all writes before removing the fence. + /* Previous access through the fence register is marshalled by + * the mb() inside the fault handlers (i915_gem_release_mmaps) + * and explicitly managed for internal users. */ - if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) - mb(); - - WARN(obj && (!obj->stride || !obj->tiling_mode), - "bogus fence setup with stride: 0x%x, tiling mode: %i\n", - obj->stride, obj->tiling_mode); - - if (IS_GEN2(dev)) - i830_write_fence_reg(dev, reg, obj); - else if (IS_GEN3(dev)) - i915_write_fence_reg(dev, reg, obj); - else if (INTEL_INFO(dev)->gen >= 4) - i965_write_fence_reg(dev, reg, obj); - - /* And similarly be paranoid that no direct access to this region - * is reordered to before the fence is installed. + + if (IS_GEN2(fence->i915)) + i830_write_fence_reg(fence, vma); + else if (IS_GEN3(fence->i915)) + i915_write_fence_reg(fence, vma); + else + i965_write_fence_reg(fence, vma); + + /* Access through the fenced region afterwards is + * ordered by the posting reads whilst writing the registers. */ - if (i915_gem_object_needs_mb(obj)) - mb(); -} -static inline int fence_number(struct drm_i915_private *dev_priv, - struct drm_i915_fence_reg *fence) -{ - return fence - dev_priv->fence_regs; + fence->dirty = false; } -static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, - struct drm_i915_fence_reg *fence, - bool enable) +static int fence_update(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - int reg = fence_number(dev_priv, fence); - - i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); + int ret; - if (enable) { - obj->fence_reg = reg; - fence->obj = obj; - list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); - } else { - obj->fence_reg = I915_FENCE_REG_NONE; - fence->obj = NULL; - list_del_init(&fence->lru_list); - } - obj->fence_dirty = false; -} + if (vma) { + if (!i915_vma_is_map_and_fenceable(vma)) + return -EINVAL; -static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) -{ - if (obj->tiling_mode) - i915_gem_release_mmap(obj); + if (WARN(!i915_gem_object_get_stride(vma->obj) || + !i915_gem_object_get_tiling(vma->obj), + "bogus fence setup with stride: 0x%x, tiling mode: %i\n", + i915_gem_object_get_stride(vma->obj), + i915_gem_object_get_tiling(vma->obj))) + return -EINVAL; - /* As we do not have an associated fence register, we will force - * a tiling change if we ever need to acquire one. - */ - obj->fence_dirty = false; - obj->fence_reg = I915_FENCE_REG_NONE; -} + ret = i915_gem_active_retire(&vma->last_fence, + &vma->obj->base.dev->struct_mutex); + if (ret) + return ret; + } -static int -i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) -{ - if (obj->last_fenced_req) { - int ret = i915_wait_request(obj->last_fenced_req); + if (fence->vma) { + ret = i915_gem_active_retire(&fence->vma->last_fence, + &fence->vma->obj->base.dev->struct_mutex); if (ret) return ret; + } + + if (fence->vma && fence->vma != vma) { + /* Ensure that all userspace CPU access is completed before + * stealing the fence. + */ + i915_gem_release_mmap(fence->vma->obj); + + fence->vma->fence = NULL; + fence->vma = NULL; + + list_move(&fence->link, &fence->i915->mm.fence_list); + } + + fence_write(fence, vma); + + if (vma) { + if (fence->vma != vma) { + vma->fence = fence; + fence->vma = vma; + } - i915_gem_request_assign(&obj->last_fenced_req, NULL); + list_move_tail(&fence->link, &fence->i915->mm.fence_list); } return 0; } /** - * i915_gem_object_put_fence - force-remove fence for an object - * @obj: object to map through a fence reg + * i915_vma_put_fence - force-remove fence for a VMA + * @vma: vma to map linearly (not through a fence reg) * * This function force-removes any fence from the given object, which is useful * if the kernel wants to do untiled GTT access. @@ -284,70 +286,40 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) * 0 on success, negative error code on failure. */ int -i915_gem_object_put_fence(struct drm_i915_gem_object *obj) +i915_vma_put_fence(struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct drm_i915_fence_reg *fence; - int ret; + struct drm_i915_fence_reg *fence = vma->fence; - ret = i915_gem_object_wait_fence(obj); - if (ret) - return ret; - - if (obj->fence_reg == I915_FENCE_REG_NONE) + if (!fence) return 0; - fence = &dev_priv->fence_regs[obj->fence_reg]; - - if (WARN_ON(fence->pin_count)) + if (fence->pin_count) return -EBUSY; - i915_gem_object_fence_lost(obj); - i915_gem_object_update_fence(obj, fence, false); - - return 0; + return fence_update(fence, NULL); } -static struct drm_i915_fence_reg * -i915_find_fence_reg(struct drm_device *dev) +static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_fence_reg *reg, *avail; - int i; - - /* First try to find a free reg */ - avail = NULL; - for (i = 0; i < dev_priv->num_fence_regs; i++) { - reg = &dev_priv->fence_regs[i]; - if (!reg->obj) - return reg; - - if (!reg->pin_count) - avail = reg; - } - - if (avail == NULL) - goto deadlock; + struct drm_i915_fence_reg *fence; - /* None available, try to steal one or wait for a user to finish */ - list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { - if (reg->pin_count) + list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { + if (fence->pin_count) continue; - return reg; + return fence; } -deadlock: /* Wait for completion of pending flips which consume fences */ - if (intel_has_pending_fb_unpin(dev)) + if (intel_has_pending_fb_unpin(&dev_priv->drm)) return ERR_PTR(-EAGAIN); return ERR_PTR(-EDEADLK); } /** - * i915_gem_object_get_fence - set up fencing for an object - * @obj: object to map through a fence reg + * i915_vma_get_fence - set up fencing for a vma + * @vma: vma to map through a fence reg * * When mapping objects through the GTT, userspace wants to be able to write * to them without having to worry about swizzling if the object is tiled. @@ -364,103 +336,27 @@ deadlock: * 0 on success, negative error code on failure. */ int -i915_gem_object_get_fence(struct drm_i915_gem_object *obj) +i915_vma_get_fence(struct i915_vma *vma) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - bool enable = obj->tiling_mode != I915_TILING_NONE; - struct drm_i915_fence_reg *reg; - int ret; - - /* Have we updated the tiling parameters upon the object and so - * will need to serialise the write to the associated fence register? - */ - if (obj->fence_dirty) { - ret = i915_gem_object_wait_fence(obj); - if (ret) - return ret; - } + struct drm_i915_fence_reg *fence; + struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; /* Just update our place in the LRU if our fence is getting reused. */ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - reg = &dev_priv->fence_regs[obj->fence_reg]; - if (!obj->fence_dirty) { - list_move_tail(®->lru_list, - &dev_priv->mm.fence_list); + if (vma->fence) { + fence = vma->fence; + if (!fence->dirty) { + list_move_tail(&fence->link, + &fence->i915->mm.fence_list); return 0; } - } else if (enable) { - if (WARN_ON(!obj->map_and_fenceable)) - return -EINVAL; - - reg = i915_find_fence_reg(dev); - if (IS_ERR(reg)) - return PTR_ERR(reg); - - if (reg->obj) { - struct drm_i915_gem_object *old = reg->obj; - - ret = i915_gem_object_wait_fence(old); - if (ret) - return ret; - - i915_gem_object_fence_lost(old); - } + } else if (set) { + fence = fence_find(to_i915(vma->vm->dev)); + if (IS_ERR(fence)) + return PTR_ERR(fence); } else return 0; - i915_gem_object_update_fence(obj, reg, enable); - - return 0; -} - -/** - * i915_gem_object_pin_fence - pin fencing state - * @obj: object to pin fencing for - * - * This pins the fencing state (whether tiled or untiled) to make sure the - * object is ready to be used as a scanout target. Fencing status must be - * synchronize first by calling i915_gem_object_get_fence(): - * - * The resulting fence pin reference must be released again with - * i915_gem_object_unpin_fence(). - * - * Returns: - * - * True if the object has a fence, false otherwise. - */ -bool -i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) -{ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); - - WARN_ON(!ggtt_vma || - dev_priv->fence_regs[obj->fence_reg].pin_count > - ggtt_vma->pin_count); - dev_priv->fence_regs[obj->fence_reg].pin_count++; - return true; - } else - return false; -} - -/** - * i915_gem_object_unpin_fence - unpin fencing state - * @obj: object to unpin fencing for - * - * This releases the fence pin reference acquired through - * i915_gem_object_pin_fence. It will handle both objects with and without an - * attached fence correctly, callers do not need to distinguish this. - */ -void -i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) -{ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); - dev_priv->fence_regs[obj->fence_reg].pin_count--; - } + return fence_update(fence, set); } /** @@ -477,17 +373,16 @@ void i915_gem_restore_fences(struct drm_device *dev) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; + struct i915_vma *vma = reg->vma; /* * Commit delayed tiling changes if we have an object still * attached to the fence, otherwise just clear the fence. */ - if (reg->obj) { - i915_gem_object_update_fence(reg->obj, reg, - reg->obj->tiling_mode); - } else { - i915_gem_write_fence(dev, i, NULL); - } + if (vma && !i915_gem_object_is_tiled(vma->obj)) + vma = NULL; + + fence_update(reg, vma); } } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 10f1e32767e6..0bb4232f66bc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -32,6 +32,8 @@ #include "i915_trace.h" #include "intel_drv.h" +#define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM) + /** * DOC: Global GTT views * @@ -122,8 +124,11 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, has_full_48bit_ppgtt = IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9; - if (intel_vgpu_active(dev_priv)) - has_full_ppgtt = false; /* emulation is too hard */ + if (intel_vgpu_active(dev_priv)) { + /* emulation is too hard */ + has_full_ppgtt = false; + has_full_48bit_ppgtt = false; + } if (!has_aliasing_ppgtt) return 0; @@ -158,7 +163,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, return 0; } - if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists) + if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) return has_full_48bit_ppgtt ? 3 : 2; else return has_aliasing_ppgtt ? 1 : 0; @@ -170,11 +175,13 @@ static int ppgtt_bind_vma(struct i915_vma *vma, { u32 pte_flags = 0; + vma->pages = vma->obj->pages; + /* Currently applicable only to VLV */ if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; - vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, + vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, cache_level, pte_flags); return 0; @@ -184,7 +191,7 @@ static void ppgtt_unbind_vma(struct i915_vma *vma) { vma->vm->clear_range(vma->vm, vma->node.start, - vma->obj->base.size, + vma->size, true); } @@ -324,16 +331,16 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr, static int __setup_page_dma(struct drm_device *dev, struct i915_page_dma *p, gfp_t flags) { - struct device *device = &dev->pdev->dev; + struct device *kdev = &dev->pdev->dev; p->page = alloc_page(flags); if (!p->page) return -ENOMEM; - p->daddr = dma_map_page(device, + p->daddr = dma_map_page(kdev, p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); - if (dma_mapping_error(device, p->daddr)) { + if (dma_mapping_error(kdev, p->daddr)) { __free_page(p->page); return -EINVAL; } @@ -343,15 +350,17 @@ static int __setup_page_dma(struct drm_device *dev, static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p) { - return __setup_page_dma(dev, p, GFP_KERNEL); + return __setup_page_dma(dev, p, I915_GFP_DMA); } static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p) { + struct pci_dev *pdev = dev->pdev; + if (WARN_ON(!p->page)) return; - dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(&pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); __free_page(p->page); memset(p, 0, sizeof(*p)); } @@ -405,33 +414,18 @@ static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p, fill_page_dma(dev, p, v); } -static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev) +static int +setup_scratch_page(struct drm_device *dev, + struct i915_page_dma *scratch, + gfp_t gfp) { - struct i915_page_scratch *sp; - int ret; - - sp = kzalloc(sizeof(*sp), GFP_KERNEL); - if (sp == NULL) - return ERR_PTR(-ENOMEM); - - ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO); - if (ret) { - kfree(sp); - return ERR_PTR(ret); - } - - set_pages_uc(px_page(sp), 1); - - return sp; + return __setup_page_dma(dev, scratch, gfp | __GFP_ZERO); } -static void free_scratch_page(struct drm_device *dev, - struct i915_page_scratch *sp) +static void cleanup_scratch_page(struct drm_device *dev, + struct i915_page_dma *scratch) { - set_pages_wb(px_page(sp), 1); - - cleanup_px(dev, sp); - kfree(sp); + cleanup_page_dma(dev, scratch); } static struct i915_page_table *alloc_pt(struct drm_device *dev) @@ -477,7 +471,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm, { gen8_pte_t scratch_pte; - scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), + scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, true); fill_px(vm->dev, pt, scratch_pte); @@ -488,9 +482,9 @@ static void gen6_initialize_pt(struct i915_address_space *vm, { gen6_pte_t scratch_pte; - WARN_ON(px_dma(vm->scratch_page) == 0); + WARN_ON(vm->scratch_page.daddr == 0); - scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), + scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, true, 0); fill32_px(vm->dev, pt, scratch_pte); @@ -669,6 +663,7 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, dma_addr_t addr) { + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; int ret; @@ -678,13 +673,13 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, GEN8_RING_PDP_UDW(engine, entry)); - intel_ring_emit(engine, upper_32_bits(addr)); - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, GEN8_RING_PDP_LDW(engine, entry)); - intel_ring_emit(engine, lower_32_bits(addr)); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); + intel_ring_emit(ring, upper_32_bits(addr)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); + intel_ring_emit(ring, lower_32_bits(addr)); + intel_ring_advance(ring); return 0; } @@ -773,7 +768,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, bool use_scratch) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), + gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, use_scratch); if (!USES_FULL_48BIT_PPGTT(vm->dev)) { @@ -879,9 +874,9 @@ static int gen8_init_scratch(struct i915_address_space *vm) struct drm_device *dev = vm->dev; int ret; - vm->scratch_page = alloc_scratch_page(dev); - if (IS_ERR(vm->scratch_page)) - return PTR_ERR(vm->scratch_page); + ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA); + if (ret) + return ret; vm->scratch_pt = alloc_pt(dev); if (IS_ERR(vm->scratch_pt)) { @@ -915,7 +910,7 @@ free_pd: free_pt: free_pt(dev, vm->scratch_pt); free_scratch_page: - free_scratch_page(dev, vm->scratch_page); + cleanup_scratch_page(dev, &vm->scratch_page); return ret; } @@ -959,7 +954,7 @@ static void gen8_free_scratch(struct i915_address_space *vm) free_pdp(dev, vm->scratch_pdp); free_pd(dev, vm->scratch_pd); free_pt(dev, vm->scratch_pt); - free_scratch_page(dev, vm->scratch_page); + cleanup_scratch_page(dev, &vm->scratch_page); } static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, @@ -1456,7 +1451,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) struct i915_address_space *vm = &ppgtt->base; uint64_t start = ppgtt->base.start; uint64_t length = ppgtt->base.total; - gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), + gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, true); if (!USES_FULL_48BIT_PPGTT(vm->dev)) { @@ -1573,7 +1568,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) uint32_t pte, pde; uint32_t start = ppgtt->base.start, length = ppgtt->base.total; - scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), + scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, true, 0); gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { @@ -1660,11 +1655,12 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); if (ret) return ret; @@ -1672,13 +1668,13 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, if (ret) return ret; - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(engine, PP_DIR_DCLV_2G); - intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); - intel_ring_emit(engine, get_pd_offset(ppgtt)); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); + intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); + intel_ring_emit(ring, PP_DIR_DCLV_2G); + intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); + intel_ring_emit(ring, get_pd_offset(ppgtt)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -1686,11 +1682,12 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); if (ret) return ret; @@ -1698,17 +1695,17 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, if (ret) return ret; - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(engine, PP_DIR_DCLV_2G); - intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); - intel_ring_emit(engine, get_pd_offset(ppgtt)); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); + intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); + intel_ring_emit(ring, PP_DIR_DCLV_2G); + intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); + intel_ring_emit(ring, get_pd_offset(ppgtt)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); /* XXX: RCS is the only one to auto invalidate the TLBs? */ if (engine->id != RCS) { - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); if (ret) return ret; } @@ -1796,7 +1793,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, unsigned first_pte = first_entry % GEN6_PTES; unsigned last_pte, i; - scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), + scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, true, 0); while (num_entries) { @@ -1942,14 +1939,15 @@ unwind_out: static int gen6_init_scratch(struct i915_address_space *vm) { struct drm_device *dev = vm->dev; + int ret; - vm->scratch_page = alloc_scratch_page(dev); - if (IS_ERR(vm->scratch_page)) - return PTR_ERR(vm->scratch_page); + ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA); + if (ret) + return ret; vm->scratch_pt = alloc_pt(dev); if (IS_ERR(vm->scratch_pt)) { - free_scratch_page(dev, vm->scratch_page); + cleanup_scratch_page(dev, &vm->scratch_page); return PTR_ERR(vm->scratch_pt); } @@ -1963,7 +1961,7 @@ static void gen6_free_scratch(struct i915_address_space *vm) struct drm_device *dev = vm->dev; free_pt(dev, vm->scratch_pt); - free_scratch_page(dev, vm->scratch_page); + cleanup_scratch_page(dev, &vm->scratch_page); } static void gen6_ppgtt_cleanup(struct i915_address_space *vm) @@ -2009,7 +2007,7 @@ alloc: 0, ggtt->base.total, DRM_MM_TOPDOWN); if (ret == -ENOSPC && !retried) { - ret = i915_gem_evict_something(dev, &ggtt->base, + ret = i915_gem_evict_something(&ggtt->base, GEN6_PD_SIZE, GEN6_PD_ALIGN, I915_CACHE_NONE, 0, ggtt->base.total, @@ -2101,11 +2099,12 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return 0; } -static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_private *dev_priv) { - ppgtt->base.dev = dev; + ppgtt->base.dev = &dev_priv->drm; - if (INTEL_INFO(dev)->gen < 8) + if (INTEL_INFO(dev_priv)->gen < 8) return gen6_ppgtt_init(ppgtt); else return gen8_ppgtt_init(ppgtt); @@ -2115,9 +2114,9 @@ static void i915_address_space_init(struct i915_address_space *vm, struct drm_i915_private *dev_priv) { drm_mm_init(&vm->mm, vm->start, vm->total); - vm->dev = &dev_priv->drm; INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); + INIT_LIST_HEAD(&vm->unbound_list); list_add_tail(&vm->global_link, &dev_priv->vm_list); } @@ -2140,15 +2139,17 @@ static void gtt_write_workarounds(struct drm_device *dev) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); } -static int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_private *dev_priv, + struct drm_i915_file_private *file_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - int ret = 0; + int ret; - ret = __hw_ppgtt_init(dev, ppgtt); + ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret == 0) { kref_init(&ppgtt->ref); i915_address_space_init(&ppgtt->base, dev_priv); + ppgtt->base.file = file_priv; } return ret; @@ -2180,7 +2181,8 @@ int i915_ppgtt_init_hw(struct drm_device *dev) } struct i915_hw_ppgtt * -i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) +i915_ppgtt_create(struct drm_i915_private *dev_priv, + struct drm_i915_file_private *fpriv) { struct i915_hw_ppgtt *ppgtt; int ret; @@ -2189,14 +2191,12 @@ i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_ppgtt_init(dev, ppgtt); + ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv); if (ret) { kfree(ppgtt); return ERR_PTR(ret); } - ppgtt->file_priv = fpriv; - trace_i915_ppgtt_create(&ppgtt->base); return ppgtt; @@ -2209,9 +2209,10 @@ void i915_ppgtt_release(struct kref *kref) trace_i915_ppgtt_release(&ppgtt->base); - /* vmas should already be unbound */ + /* vmas should already be unbound and destroyed */ WARN_ON(!list_empty(&ppgtt->base.active_list)); WARN_ON(!list_empty(&ppgtt->base.inactive_list)); + WARN_ON(!list_empty(&ppgtt->base.unbound_list)); list_del(&ppgtt->base.global_link); drm_mm_takedown(&ppgtt->base.mm); @@ -2220,47 +2221,21 @@ void i915_ppgtt_release(struct kref *kref) kfree(ppgtt); } -extern int intel_iommu_gfx_mapped; /* Certain Gen5 chipsets require require idling the GPU before * unmapping anything from the GTT when VT-d is enabled. */ -static bool needs_idle_maps(struct drm_device *dev) +static bool needs_idle_maps(struct drm_i915_private *dev_priv) { #ifdef CONFIG_INTEL_IOMMU /* Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) + if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) return true; #endif return false; } -static bool do_idling(struct drm_i915_private *dev_priv) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - bool ret = dev_priv->mm.interruptible; - - if (unlikely(ggtt->do_idle_maps)) { - dev_priv->mm.interruptible = false; - if (i915_gem_wait_for_idle(dev_priv)) { - DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); - /* Wait a bit, in hopes it avoids the hang */ - udelay(10); - } - } - - return ret; -} - -static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - - if (unlikely(ggtt->do_idle_maps)) - dev_priv->mm.interruptible = interruptible; -} - void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; @@ -2329,12 +2304,7 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) { -#ifdef writeq writeq(pte, addr); -#else - iowrite32((u32)pte, addr); - iowrite32(pte >> 32, addr + 4); -#endif } static void gen8_ggtt_insert_page(struct i915_address_space *vm, @@ -2527,7 +2497,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), + scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, use_scratch); for (i = 0; i < num_entries; i++) @@ -2559,7 +2529,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), + scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, use_scratch, 0); for (i = 0; i < num_entries; i++) @@ -2638,8 +2608,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, if (obj->gt_ro) pte_flags |= PTE_READ_ONLY; - vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages, - vma->node.start, + vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, cache_level, pte_flags); /* @@ -2647,7 +2616,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally * upgrade to both bound if we bind either to avoid double-binding. */ - vma->bound |= GLOBAL_BIND | LOCAL_BIND; + vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; return 0; } @@ -2669,19 +2638,17 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, pte_flags |= PTE_READ_ONLY; - if (flags & GLOBAL_BIND) { + if (flags & I915_VMA_GLOBAL_BIND) { vma->vm->insert_entries(vma->vm, - vma->ggtt_view.pages, - vma->node.start, + vma->pages, vma->node.start, cache_level, pte_flags); } - if (flags & LOCAL_BIND) { + if (flags & I915_VMA_LOCAL_BIND) { struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; appgtt->base.insert_entries(&appgtt->base, - vma->ggtt_view.pages, - vma->node.start, + vma->pages, vma->node.start, cache_level, pte_flags); } @@ -2690,42 +2657,36 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, static void ggtt_unbind_vma(struct i915_vma *vma) { - struct drm_device *dev = vma->vm->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *obj = vma->obj; - const uint64_t size = min_t(uint64_t, - obj->base.size, - vma->node.size); + struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; + const u64 size = min(vma->size, vma->node.size); - if (vma->bound & GLOBAL_BIND) { + if (vma->flags & I915_VMA_GLOBAL_BIND) vma->vm->clear_range(vma->vm, - vma->node.start, - size, + vma->node.start, size, true); - } - - if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) { - struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; + if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) appgtt->base.clear_range(&appgtt->base, - vma->node.start, - size, + vma->node.start, size, true); - } } void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - bool interruptible; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct device *kdev = &dev_priv->drm.pdev->dev; + struct i915_ggtt *ggtt = &dev_priv->ggtt; - interruptible = do_idling(dev_priv); + if (unlikely(ggtt->do_idle_maps)) { + if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) { + DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); + /* Wait a bit, in hopes it avoids the hang */ + udelay(10); + } + } - dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, + dma_unmap_sg(kdev, obj->pages->sgl, obj->pages->nents, PCI_DMA_BIDIRECTIONAL); - - undo_idling(dev_priv, interruptible); } static void i915_gtt_color_adjust(struct drm_mm_node *node, @@ -2736,19 +2697,14 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node, if (node->color != color) *start += 4096; - if (!list_empty(&node->node_list)) { - node = list_entry(node->node_list.next, - struct drm_mm_node, - node_list); - if (node->allocated && node->color != color) - *end -= 4096; - } + node = list_first_entry_or_null(&node->node_list, + struct drm_mm_node, + node_list); + if (node && node->allocated && node->color != color) + *end -= 4096; } -static int i915_gem_setup_global_gtt(struct drm_device *dev, - u64 start, - u64 mappable_end, - u64 end) +int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) { /* Let GEM Manage all of the aperture. * @@ -2759,48 +2715,15 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, * aperture. One page should be enough to keep any prefetching inside * of the aperture. */ - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_mm_node *entry; - struct drm_i915_gem_object *obj; unsigned long hole_start, hole_end; + struct drm_mm_node *entry; int ret; - BUG_ON(mappable_end > end); - - ggtt->base.start = start; - - /* Subtract the guard page before address space initialization to - * shrink the range used by drm_mm */ - ggtt->base.total = end - start - PAGE_SIZE; - i915_address_space_init(&ggtt->base, dev_priv); - ggtt->base.total += PAGE_SIZE; - ret = intel_vgt_balloon(dev_priv); if (ret) return ret; - if (!HAS_LLC(dev)) - ggtt->base.mm.color_adjust = i915_gtt_color_adjust; - - /* Mark any preallocated objects as occupied */ - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - struct i915_vma *vma = i915_gem_obj_to_vma(obj, &ggtt->base); - - DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n", - i915_gem_obj_ggtt_offset(obj), obj->base.size); - - WARN_ON(i915_gem_obj_ggtt_bound(obj)); - ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); - if (ret) { - DRM_DEBUG_KMS("Reservation failed: %i\n", ret); - return ret; - } - vma->bound |= GLOBAL_BIND; - __i915_vma_set_map_and_fenceable(vma); - list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); - } - /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", @@ -2810,18 +2733,19 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, } /* And finally clear the reserved guard page */ - ggtt->base.clear_range(&ggtt->base, end - PAGE_SIZE, PAGE_SIZE, true); + ggtt->base.clear_range(&ggtt->base, + ggtt->base.total - PAGE_SIZE, PAGE_SIZE, + true); - if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { + if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { struct i915_hw_ppgtt *ppgtt; ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); if (!ppgtt) return -ENOMEM; - ret = __hw_ppgtt_init(dev, ppgtt); + ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret) { - ppgtt->base.cleanup(&ppgtt->base); kfree(ppgtt); return ret; } @@ -2849,33 +2773,20 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, } /** - * i915_gem_init_ggtt - Initialize GEM for Global GTT - * @dev: DRM device - */ -void i915_gem_init_ggtt(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - - i915_gem_setup_global_gtt(dev, 0, ggtt->mappable_end, ggtt->base.total); -} - -/** * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization - * @dev: DRM device + * @dev_priv: i915 device */ -void i915_ggtt_cleanup_hw(struct drm_device *dev) +void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; if (dev_priv->mm.aliasing_ppgtt) { struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - ppgtt->base.cleanup(&ppgtt->base); + kfree(ppgtt); } - i915_gem_cleanup_stolen(dev); + i915_gem_cleanup_stolen(&dev_priv->drm); if (drm_mm_initialized(&ggtt->base.mm)) { intel_vgt_deballoon(dev_priv); @@ -2885,6 +2796,9 @@ void i915_ggtt_cleanup_hw(struct drm_device *dev) } ggtt->base.cleanup(&ggtt->base); + + arch_phys_wc_del(ggtt->mtrr); + io_mapping_fini(&ggtt->mappable); } static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) @@ -2965,17 +2879,14 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) return (gen9_gmch_ctl - 0xf0 + 1) << 22; } -static int ggtt_probe_common(struct drm_device *dev, - size_t gtt_size) +static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct i915_page_scratch *scratch_page; - phys_addr_t ggtt_phys_addr; + struct pci_dev *pdev = ggtt->base.dev->pdev; + phys_addr_t phys_addr; + int ret; /* For Modern GENs the PTEs and register space are split in the BAR */ - ggtt_phys_addr = pci_resource_start(dev->pdev, 0) + - (pci_resource_len(dev->pdev, 0) / 2); + phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; /* * On BXT writes larger than 64 bit to the GTT pagetable range will be @@ -2984,25 +2895,25 @@ static int ggtt_probe_common(struct drm_device *dev, * resort to an uncached mapping. The WC issue is easily caught by the * readback check when writing GTT PTE entries. */ - if (IS_BROXTON(dev)) - ggtt->gsm = ioremap_nocache(ggtt_phys_addr, gtt_size); + if (IS_BROXTON(ggtt->base.dev)) + ggtt->gsm = ioremap_nocache(phys_addr, size); else - ggtt->gsm = ioremap_wc(ggtt_phys_addr, gtt_size); + ggtt->gsm = ioremap_wc(phys_addr, size); if (!ggtt->gsm) { - DRM_ERROR("Failed to map the gtt page table\n"); + DRM_ERROR("Failed to map the ggtt page table\n"); return -ENOMEM; } - scratch_page = alloc_scratch_page(dev); - if (IS_ERR(scratch_page)) { + ret = setup_scratch_page(ggtt->base.dev, + &ggtt->base.scratch_page, + GFP_DMA32); + if (ret) { DRM_ERROR("Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ iounmap(ggtt->gsm); - return PTR_ERR(scratch_page); + return ret; } - ggtt->base.scratch_page = scratch_page; - return 0; } @@ -3079,42 +2990,49 @@ static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); } +static void gen6_gmch_remove(struct i915_address_space *vm) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + iounmap(ggtt->gsm); + cleanup_scratch_page(vm->dev, &vm->scratch_page); +} + static int gen8_gmch_probe(struct i915_ggtt *ggtt) { - struct drm_device *dev = ggtt->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); + struct pci_dev *pdev = dev_priv->drm.pdev; + unsigned int size; u16 snb_gmch_ctl; - int ret; /* TODO: We're not aware of mappable constraints on gen8 yet */ - ggtt->mappable_base = pci_resource_start(dev->pdev, 2); - ggtt->mappable_end = pci_resource_len(dev->pdev, 2); + ggtt->mappable_base = pci_resource_start(pdev, 2); + ggtt->mappable_end = pci_resource_len(pdev, 2); - if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) - pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) + pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); - pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - if (INTEL_INFO(dev)->gen >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); - ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); - } else if (IS_CHERRYVIEW(dev)) { + size = gen8_get_total_gtt_size(snb_gmch_ctl); + } else if (IS_CHERRYVIEW(dev_priv)) { ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); - ggtt->size = chv_get_total_gtt_size(snb_gmch_ctl); + size = chv_get_total_gtt_size(snb_gmch_ctl); } else { ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); - ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); + size = gen8_get_total_gtt_size(snb_gmch_ctl); } - ggtt->base.total = (ggtt->size / sizeof(gen8_pte_t)) << PAGE_SHIFT; + ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; - if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) + if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) chv_setup_private_ppat(dev_priv); else bdw_setup_private_ppat(dev_priv); - ret = ggtt_probe_common(dev, ggtt->size); - + ggtt->base.cleanup = gen6_gmch_remove; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; ggtt->base.insert_page = gen8_ggtt_insert_page; @@ -3126,57 +3044,65 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) if (IS_CHERRYVIEW(dev_priv)) ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; - return ret; + return ggtt_probe_common(ggtt, size); } static int gen6_gmch_probe(struct i915_ggtt *ggtt) { - struct drm_device *dev = ggtt->base.dev; + struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); + struct pci_dev *pdev = dev_priv->drm.pdev; + unsigned int size; u16 snb_gmch_ctl; - int ret; - ggtt->mappable_base = pci_resource_start(dev->pdev, 2); - ggtt->mappable_end = pci_resource_len(dev->pdev, 2); + ggtt->mappable_base = pci_resource_start(pdev, 2); + ggtt->mappable_end = pci_resource_len(pdev, 2); /* 64/512MB is the current min/max we actually know of, but this is just * a coarse sanity check. */ - if ((ggtt->mappable_end < (64<<20) || (ggtt->mappable_end > (512<<20)))) { + if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); return -ENXIO; } - if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) - pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); - pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) + pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); - ggtt->size = gen6_get_total_gtt_size(snb_gmch_ctl); - ggtt->base.total = (ggtt->size / sizeof(gen6_pte_t)) << PAGE_SHIFT; - ret = ggtt_probe_common(dev, ggtt->size); + size = gen6_get_total_gtt_size(snb_gmch_ctl); + ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; ggtt->base.clear_range = gen6_ggtt_clear_range; ggtt->base.insert_page = gen6_ggtt_insert_page; ggtt->base.insert_entries = gen6_ggtt_insert_entries; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.cleanup = gen6_gmch_remove; + + if (HAS_EDRAM(dev_priv)) + ggtt->base.pte_encode = iris_pte_encode; + else if (IS_HASWELL(dev_priv)) + ggtt->base.pte_encode = hsw_pte_encode; + else if (IS_VALLEYVIEW(dev_priv)) + ggtt->base.pte_encode = byt_pte_encode; + else if (INTEL_GEN(dev_priv) >= 7) + ggtt->base.pte_encode = ivb_pte_encode; + else + ggtt->base.pte_encode = snb_pte_encode; - return ret; + return ggtt_probe_common(ggtt, size); } -static void gen6_gmch_remove(struct i915_address_space *vm) +static void i915_gmch_remove(struct i915_address_space *vm) { - struct i915_ggtt *ggtt = container_of(vm, struct i915_ggtt, base); - - iounmap(ggtt->gsm); - free_scratch_page(vm->dev, vm->scratch_page); + intel_gmch_remove(); } static int i915_gmch_probe(struct i915_ggtt *ggtt) { - struct drm_device *dev = ggtt->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); int ret; ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); @@ -3188,12 +3114,13 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, &ggtt->mappable_base, &ggtt->mappable_end); - ggtt->do_idle_maps = needs_idle_maps(&dev_priv->drm); + ggtt->do_idle_maps = needs_idle_maps(dev_priv); ggtt->base.insert_page = i915_ggtt_insert_page; ggtt->base.insert_entries = i915_ggtt_insert_entries; ggtt->base.clear_range = i915_ggtt_clear_range; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.cleanup = i915_gmch_remove; if (unlikely(ggtt->do_idle_maps)) DRM_INFO("applying Ironlake quirks for intel_iommu\n"); @@ -3201,65 +3128,40 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) return 0; } -static void i915_gmch_remove(struct i915_address_space *vm) -{ - intel_gmch_remove(); -} - /** - * i915_ggtt_init_hw - Initialize GGTT hardware - * @dev: DRM device + * i915_ggtt_probe_hw - Probe GGTT hardware location + * @dev_priv: i915 device */ -int i915_ggtt_init_hw(struct drm_device *dev) +int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; int ret; - if (INTEL_INFO(dev)->gen <= 5) { - ggtt->probe = i915_gmch_probe; - ggtt->base.cleanup = i915_gmch_remove; - } else if (INTEL_INFO(dev)->gen < 8) { - ggtt->probe = gen6_gmch_probe; - ggtt->base.cleanup = gen6_gmch_remove; - - if (HAS_EDRAM(dev)) - ggtt->base.pte_encode = iris_pte_encode; - else if (IS_HASWELL(dev)) - ggtt->base.pte_encode = hsw_pte_encode; - else if (IS_VALLEYVIEW(dev)) - ggtt->base.pte_encode = byt_pte_encode; - else if (INTEL_INFO(dev)->gen >= 7) - ggtt->base.pte_encode = ivb_pte_encode; - else - ggtt->base.pte_encode = snb_pte_encode; - } else { - ggtt->probe = gen8_gmch_probe; - ggtt->base.cleanup = gen6_gmch_remove; - } - - ggtt->base.dev = dev; - ggtt->base.is_ggtt = true; + ggtt->base.dev = &dev_priv->drm; - ret = ggtt->probe(ggtt); + if (INTEL_GEN(dev_priv) <= 5) + ret = i915_gmch_probe(ggtt); + else if (INTEL_GEN(dev_priv) < 8) + ret = gen6_gmch_probe(ggtt); + else + ret = gen8_gmch_probe(ggtt); if (ret) return ret; if ((ggtt->base.total - 1) >> 32) { DRM_ERROR("We never expected a Global GTT with more than 32bits" - "of address space! Found %lldM!\n", + " of address space! Found %lldM!\n", ggtt->base.total >> 20); ggtt->base.total = 1ULL << 32; ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); } - /* - * Initialise stolen early so that we may reserve preallocated - * objects for the BIOS to KMS transition. - */ - ret = i915_gem_init_stolen(dev); - if (ret) - goto out_gtt_cleanup; + if (ggtt->mappable_end > ggtt->base.total) { + DRM_ERROR("mappable aperture extends past end of GGTT," + " aperture=%llx, total=%llx\n", + ggtt->mappable_end, ggtt->base.total); + ggtt->mappable_end = ggtt->base.total; + } /* GMADR is the PCI mmio aperture into the global GTT. */ DRM_INFO("Memory usable by graphics device = %lluM\n", @@ -3272,16 +3174,55 @@ int i915_ggtt_init_hw(struct drm_device *dev) #endif return 0; +} + +/** + * i915_ggtt_init_hw - Initialize GGTT hardware + * @dev_priv: i915 device + */ +int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) +{ + struct i915_ggtt *ggtt = &dev_priv->ggtt; + int ret; + + INIT_LIST_HEAD(&dev_priv->vm_list); + + /* Subtract the guard page before address space initialization to + * shrink the range used by drm_mm. + */ + ggtt->base.total -= PAGE_SIZE; + i915_address_space_init(&ggtt->base, dev_priv); + ggtt->base.total += PAGE_SIZE; + if (!HAS_LLC(dev_priv)) + ggtt->base.mm.color_adjust = i915_gtt_color_adjust; + + if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, + dev_priv->ggtt.mappable_base, + dev_priv->ggtt.mappable_end)) { + ret = -EIO; + goto out_gtt_cleanup; + } + + ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); + + /* + * Initialise stolen early so that we may reserve preallocated + * objects for the BIOS to KMS transition. + */ + ret = i915_gem_init_stolen(&dev_priv->drm); + if (ret) + goto out_gtt_cleanup; + + return 0; out_gtt_cleanup: ggtt->base.cleanup(&ggtt->base); - return ret; } -int i915_ggtt_enable_hw(struct drm_device *dev) +int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) { - if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) + if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) return -EIO; return 0; @@ -3291,8 +3232,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; + struct drm_i915_gem_object *obj, *on; i915_check_and_clear_faults(dev_priv); @@ -3300,20 +3240,32 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total, true); - /* Cache flush objects bound into GGTT and rebind them. */ - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ + + /* clflush objects bound into the GGTT and rebind them. */ + list_for_each_entry_safe(obj, on, + &dev_priv->mm.bound_list, global_list) { + bool ggtt_bound = false; + struct i915_vma *vma; + list_for_each_entry(vma, &obj->vma_list, obj_link) { if (vma->vm != &ggtt->base) continue; + if (!i915_vma_unbind(vma)) + continue; + WARN_ON(i915_vma_bind(vma, obj->cache_level, PIN_UPDATE)); + ggtt_bound = true; } - if (obj->pin_display) + if (ggtt_bound) WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); } + ggtt->base.closed = false; + if (INTEL_INFO(dev)->gen >= 8) { if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) chv_setup_private_ppat(dev_priv); @@ -3331,7 +3283,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) struct i915_hw_ppgtt *ppgtt; - if (vm->is_ggtt) + if (i915_is_ggtt(vm)) ppgtt = dev_priv->mm.aliasing_ppgtt; else ppgtt = i915_vm_to_ppgtt(vm); @@ -3344,65 +3296,155 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) i915_ggtt_flush(dev_priv); } +static void +i915_vma_retire(struct i915_gem_active *active, + struct drm_i915_gem_request *rq) +{ + const unsigned int idx = rq->engine->id; + struct i915_vma *vma = + container_of(active, struct i915_vma, last_read[idx]); + + GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); + + i915_vma_clear_active(vma, idx); + if (i915_vma_is_active(vma)) + return; + + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) + WARN_ON(i915_vma_unbind(vma)); +} + +void i915_vma_destroy(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->node.allocated); + GEM_BUG_ON(i915_vma_is_active(vma)); + GEM_BUG_ON(!i915_vma_is_closed(vma)); + GEM_BUG_ON(vma->fence); + + list_del(&vma->vm_link); + if (!i915_vma_is_ggtt(vma)) + i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); + + kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); +} + +void i915_vma_close(struct i915_vma *vma) +{ + GEM_BUG_ON(i915_vma_is_closed(vma)); + vma->flags |= I915_VMA_CLOSED; + + list_del_init(&vma->obj_link); + if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) + WARN_ON(i915_vma_unbind(vma)); +} + static struct i915_vma * -__i915_gem_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view) +__i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) { struct i915_vma *vma; + int i; - if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) - return ERR_PTR(-EINVAL); + GEM_BUG_ON(vm->closed); vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&vma->vm_link); - INIT_LIST_HEAD(&vma->obj_link); INIT_LIST_HEAD(&vma->exec_list); + for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) + init_request_active(&vma->last_read[i], i915_vma_retire); + init_request_active(&vma->last_fence, NULL); + list_add(&vma->vm_link, &vm->unbound_list); vma->vm = vm; vma->obj = obj; - vma->is_ggtt = i915_is_ggtt(vm); + vma->size = obj->base.size; + + if (view) { + vma->ggtt_view = *view; + if (view->type == I915_GGTT_VIEW_PARTIAL) { + vma->size = view->params.partial.size; + vma->size <<= PAGE_SHIFT; + } else if (view->type == I915_GGTT_VIEW_ROTATED) { + vma->size = + intel_rotation_info_size(&view->params.rotated); + vma->size <<= PAGE_SHIFT; + } + } - if (i915_is_ggtt(vm)) - vma->ggtt_view = *ggtt_view; - else + if (i915_is_ggtt(vm)) { + vma->flags |= I915_VMA_GGTT; + } else { i915_ppgtt_get(i915_vm_to_ppgtt(vm)); + } list_add_tail(&vma->obj_link, &obj->vma_list); - return vma; } +static inline bool vma_matches(struct i915_vma *vma, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + if (vma->vm != vm) + return false; + + if (!i915_vma_is_ggtt(vma)) + return true; + + if (!view) + return vma->ggtt_view.type == 0; + + if (vma->ggtt_view.type != view->type) + return false; + + return memcmp(&vma->ggtt_view.params, + &view->params, + sizeof(view->params)) == 0; +} + struct i915_vma * -i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) +i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + GEM_BUG_ON(view && !i915_is_ggtt(vm)); + GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); + + return __i915_vma_create(obj, vm, view); +} + +struct i915_vma * +i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) { struct i915_vma *vma; - vma = i915_gem_obj_to_vma(obj, vm); - if (!vma) - vma = __i915_gem_vma_create(obj, vm, - i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL); + list_for_each_entry_reverse(vma, &obj->vma_list, obj_link) + if (vma_matches(vma, vm, view)) + return vma; - return vma; + return NULL; } struct i915_vma * -i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view) +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); + struct i915_vma *vma; + GEM_BUG_ON(view && !i915_is_ggtt(vm)); + + vma = i915_gem_obj_to_vma(obj, vm, view); if (!vma) - vma = __i915_gem_vma_create(obj, &ggtt->base, view); + vma = __i915_vma_create(obj, vm, view); + GEM_BUG_ON(i915_vma_is_closed(vma)); return vma; - } static struct scatterlist * @@ -3434,18 +3476,16 @@ rotate_pages(const dma_addr_t *in, unsigned int offset, } static struct sg_table * -intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info, +intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, struct drm_i915_gem_object *obj) { const size_t n_pages = obj->base.size / PAGE_SIZE; - unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height; - unsigned int size_pages_uv; + unsigned int size = intel_rotation_info_size(rot_info); struct sgt_iter sgt_iter; dma_addr_t dma_addr; unsigned long i; dma_addr_t *page_addr_list; struct sg_table *st; - unsigned int uv_start_page; struct scatterlist *sg; int ret = -ENOMEM; @@ -3456,18 +3496,12 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info, if (!page_addr_list) return ERR_PTR(ret); - /* Account for UV plane with NV12. */ - if (rot_info->pixel_format == DRM_FORMAT_NV12) - size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height; - else - size_pages_uv = 0; - /* Allocate target SG list. */ st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) goto err_st_alloc; - ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL); + ret = sg_alloc_table(st, size, GFP_KERNEL); if (ret) goto err_sg_alloc; @@ -3480,32 +3514,14 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info, st->nents = 0; sg = st->sgl; - /* Rotate the pages. */ - sg = rotate_pages(page_addr_list, 0, - rot_info->plane[0].width, rot_info->plane[0].height, - rot_info->plane[0].width, - st, sg); - - /* Append the UV plane if NV12. */ - if (rot_info->pixel_format == DRM_FORMAT_NV12) { - uv_start_page = size_pages; - - /* Check for tile-row un-alignment. */ - if (offset_in_page(rot_info->uv_offset)) - uv_start_page--; - - rot_info->uv_start_page = uv_start_page; - - sg = rotate_pages(page_addr_list, rot_info->uv_start_page, - rot_info->plane[1].width, rot_info->plane[1].height, - rot_info->plane[1].width, - st, sg); + for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { + sg = rotate_pages(page_addr_list, rot_info->plane[i].offset, + rot_info->plane[i].width, rot_info->plane[i].height, + rot_info->plane[i].stride, st, sg); } - DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n", - obj->base.size, rot_info->plane[0].width, - rot_info->plane[0].height, size_pages + size_pages_uv, - size_pages); + DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n", + obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); drm_free_large(page_addr_list); @@ -3516,10 +3532,9 @@ err_sg_alloc: err_st_alloc: drm_free_large(page_addr_list); - DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n", - obj->base.size, ret, rot_info->plane[0].width, - rot_info->plane[0].height, size_pages + size_pages_uv, - size_pages); + DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); + return ERR_PTR(ret); } @@ -3569,28 +3584,27 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) { int ret = 0; - if (vma->ggtt_view.pages) + if (vma->pages) return 0; if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) - vma->ggtt_view.pages = vma->obj->pages; + vma->pages = vma->obj->pages; else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) - vma->ggtt_view.pages = + vma->pages = intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) - vma->ggtt_view.pages = - intel_partial_pages(&vma->ggtt_view, vma->obj); + vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); else WARN_ONCE(1, "GGTT view %u not implemented!\n", vma->ggtt_view.type); - if (!vma->ggtt_view.pages) { + if (!vma->pages) { DRM_ERROR("Failed to get pages for GGTT view type %u!\n", vma->ggtt_view.type); ret = -EINVAL; - } else if (IS_ERR(vma->ggtt_view.pages)) { - ret = PTR_ERR(vma->ggtt_view.pages); - vma->ggtt_view.pages = NULL; + } else if (IS_ERR(vma->pages)) { + ret = PTR_ERR(vma->pages); + vma->pages = NULL; DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", vma->ggtt_view.type, ret); } @@ -3611,34 +3625,32 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { - int ret; u32 bind_flags; + u32 vma_flags; + int ret; if (WARN_ON(flags == 0)) return -EINVAL; bind_flags = 0; if (flags & PIN_GLOBAL) - bind_flags |= GLOBAL_BIND; + bind_flags |= I915_VMA_GLOBAL_BIND; if (flags & PIN_USER) - bind_flags |= LOCAL_BIND; + bind_flags |= I915_VMA_LOCAL_BIND; + vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); if (flags & PIN_UPDATE) - bind_flags |= vma->bound; + bind_flags |= vma_flags; else - bind_flags &= ~vma->bound; - + bind_flags &= ~vma_flags; if (bind_flags == 0) return 0; - if (vma->bound == 0 && vma->vm->allocate_va_range) { - /* XXX: i915_vma_pin() will fix this +- hack */ - vma->pin_count++; + if (vma_flags == 0 && vma->vm->allocate_va_range) { trace_i915_va_alloc(vma); ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->node.size); - vma->pin_count--; if (ret) return ret; } @@ -3647,56 +3659,47 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (ret) return ret; - vma->bound |= bind_flags; - + vma->flags |= bind_flags; return 0; } -/** - * i915_ggtt_view_size - Get the size of a GGTT view. - * @obj: Object the view is of. - * @view: The view in question. - * - * @return The size of the GGTT view in bytes. - */ -size_t -i915_ggtt_view_size(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view) -{ - if (view->type == I915_GGTT_VIEW_NORMAL) { - return obj->base.size; - } else if (view->type == I915_GGTT_VIEW_ROTATED) { - return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT; - } else if (view->type == I915_GGTT_VIEW_PARTIAL) { - return view->params.partial.size << PAGE_SHIFT; - } else { - WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type); - return obj->base.size; - } -} - void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) { void __iomem *ptr; + /* Access through the GTT requires the device to be awake. */ + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); + lockdep_assert_held(&vma->vm->dev->struct_mutex); - if (WARN_ON(!vma->obj->map_and_fenceable)) - return ERR_PTR(-ENODEV); + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) + return IO_ERR_PTR(-ENODEV); - GEM_BUG_ON(!vma->is_ggtt); - GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0); + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); ptr = vma->iomap; if (ptr == NULL) { - ptr = io_mapping_map_wc(i915_vm_to_ggtt(vma->vm)->mappable, + ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, vma->node.start, vma->node.size); if (ptr == NULL) - return ERR_PTR(-ENOMEM); + return IO_ERR_PTR(-ENOMEM); vma->iomap = ptr; } - vma->pin_count++; + __i915_vma_pin(vma); return ptr; } + +void i915_vma_unpin_and_release(struct i915_vma **p_vma) +{ + struct i915_vma *vma; + + vma = fetch_and_zero(p_vma); + if (!vma) + return; + + i915_vma_unpin(vma); + i915_vma_put(vma); +} diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index aa5f31d1c2ed..ec78be2f8c77 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -36,7 +36,15 @@ #include <linux/io-mapping.h> +#include "i915_gem_request.h" + +#define I915_FENCE_REG_NONE -1 +#define I915_MAX_NUM_FENCES 32 +/* 32 fences + sign bit for FENCE_REG_NONE */ +#define I915_MAX_NUM_FENCE_BITS 6 + struct drm_i915_file_private; +struct drm_i915_fence_reg; typedef uint32_t gen6_pte_t; typedef uint64_t gen8_pte_t; @@ -137,12 +145,9 @@ enum i915_ggtt_view_type { }; struct intel_rotation_info { - unsigned int uv_offset; - uint32_t pixel_format; - unsigned int uv_start_page; struct { /* tiles */ - unsigned int width, height; + unsigned int width, height, stride, offset; } plane[2]; }; @@ -156,8 +161,6 @@ struct i915_ggtt_view { } partial; struct intel_rotation_info rotated; } params; - - struct sg_table *pages; }; extern const struct i915_ggtt_view i915_ggtt_view_normal; @@ -177,13 +180,38 @@ struct i915_vma { struct drm_mm_node node; struct drm_i915_gem_object *obj; struct i915_address_space *vm; + struct drm_i915_fence_reg *fence; + struct sg_table *pages; void __iomem *iomap; + u64 size; + u64 display_alignment; + + unsigned int flags; + /** + * How many users have pinned this object in GTT space. The following + * users can each hold at most one reference: pwrite/pread, execbuffer + * (objects are not allowed multiple times for the same batchbuffer), + * and the framebuffer code. When switching/pageflipping, the + * framebuffer code has at most two buffers pinned per crtc. + * + * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 + * bits with absolutely no headroom. So use 4 bits. + */ +#define I915_VMA_PIN_MASK 0xf +#define I915_VMA_PIN_OVERFLOW BIT(5) /** Flags and address space this VMA is bound to */ -#define GLOBAL_BIND (1<<0) -#define LOCAL_BIND (1<<1) - unsigned int bound : 4; - bool is_ggtt : 1; +#define I915_VMA_GLOBAL_BIND BIT(6) +#define I915_VMA_LOCAL_BIND BIT(7) +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) + +#define I915_VMA_GGTT BIT(8) +#define I915_VMA_CAN_FENCE BIT(9) +#define I915_VMA_CLOSED BIT(10) + + unsigned int active; + struct i915_gem_active last_read[I915_NUM_ENGINES]; + struct i915_gem_active last_fence; /** * Support different GGTT views into the same object. @@ -208,20 +236,66 @@ struct i915_vma { struct hlist_node exec_node; unsigned long exec_handle; struct drm_i915_gem_exec_object2 *exec_entry; - - /** - * How many users have pinned this object in GTT space. The following - * users can each hold at most one reference: pwrite/pread, execbuffer - * (objects are not allowed multiple times for the same batchbuffer), - * and the framebuffer code. When switching/pageflipping, the - * framebuffer code has at most two buffers pinned per crtc. - * - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 - * bits with absolutely no headroom. So use 4 bits. */ - unsigned int pin_count:4; -#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf }; +struct i915_vma * +i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view); +void i915_vma_unpin_and_release(struct i915_vma **p_vma); + +static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_GGTT; +} + +static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_CAN_FENCE; +} + +static inline bool i915_vma_is_closed(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_CLOSED; +} + +static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) +{ + return vma->active; +} + +static inline bool i915_vma_is_active(const struct i915_vma *vma) +{ + return i915_vma_get_active(vma); +} + +static inline void i915_vma_set_active(struct i915_vma *vma, + unsigned int engine) +{ + vma->active |= BIT(engine); +} + +static inline void i915_vma_clear_active(struct i915_vma *vma, + unsigned int engine) +{ + vma->active &= ~BIT(engine); +} + +static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, + unsigned int engine) +{ + return vma->active & BIT(engine); +} + +static inline u32 i915_ggtt_offset(const struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(!vma->node.allocated); + GEM_BUG_ON(upper_32_bits(vma->node.start)); + GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); + return lower_32_bits(vma->node.start); +} + struct i915_page_dma { struct page *page; union { @@ -238,10 +312,6 @@ struct i915_page_dma { #define px_page(px) (px_base(px)->page) #define px_dma(px) (px_base(px)->daddr) -struct i915_page_scratch { - struct i915_page_dma base; -}; - struct i915_page_table { struct i915_page_dma base; @@ -272,13 +342,22 @@ struct i915_pml4 { struct i915_address_space { struct drm_mm mm; struct drm_device *dev; + /* Every address space belongs to a struct file - except for the global + * GTT that is owned by the driver (and so @file is set to NULL). In + * principle, no information should leak from one context to another + * (or between files/processes etc) unless explicitly shared by the + * owner. Tracking the owner is important in order to free up per-file + * objects along with the file, to aide resource tracking, and to + * assign blame. + */ + struct drm_i915_file_private *file; struct list_head global_link; u64 start; /* Start offset always 0 for dri2 */ u64 total; /* size addr space maps (ex. 2GB for ggtt) */ - bool is_ggtt; + bool closed; - struct i915_page_scratch *scratch_page; + struct i915_page_dma scratch_page; struct i915_page_table *scratch_pt; struct i915_page_directory *scratch_pd; struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */ @@ -306,6 +385,13 @@ struct i915_address_space { */ struct list_head inactive_list; + /** + * List of vma that have been unbound. + * + * A reference is not held on the buffer while on this list. + */ + struct list_head unbound_list; + /* FIXME: Need a more generic return type */ gen6_pte_t (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, @@ -338,7 +424,7 @@ struct i915_address_space { u32 flags); }; -#define i915_is_ggtt(V) ((V)->is_ggtt) +#define i915_is_ggtt(V) (!(V)->file) /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal @@ -349,14 +435,13 @@ struct i915_address_space { */ struct i915_ggtt { struct i915_address_space base; + struct io_mapping mappable; /* Mapping to our CPU mappable region */ size_t stolen_size; /* Total size of stolen memory */ size_t stolen_usable_size; /* Total size minus BIOS reserved */ size_t stolen_reserved_base; size_t stolen_reserved_size; - size_t size; /* Total size of Global GTT */ u64 mappable_end; /* End offset that we can CPU map */ - struct io_mapping *mappable; /* Mapping to our CPU mappable region */ phys_addr_t mappable_base; /* PA of our GMADR */ /** "Graphics Stolen Memory" holds the global PTEs */ @@ -365,8 +450,6 @@ struct i915_ggtt { bool do_idle_maps; int mtrr; - - int (*probe)(struct i915_ggtt *ggtt); }; struct i915_hw_ppgtt { @@ -380,8 +463,6 @@ struct i915_hw_ppgtt { struct i915_page_directory pd; /* GEN6-7 */ }; - struct drm_i915_file_private *file_priv; - gen6_pte_t __iomem *pd_addr; int (*enable)(struct i915_hw_ppgtt *ppgtt); @@ -521,14 +602,15 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) px_dma(ppgtt->base.scratch_pd); } -int i915_ggtt_init_hw(struct drm_device *dev); -int i915_ggtt_enable_hw(struct drm_device *dev); -void i915_gem_init_ggtt(struct drm_device *dev); -void i915_ggtt_cleanup_hw(struct drm_device *dev); +int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); +int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); +int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); +int i915_gem_init_ggtt(struct drm_i915_private *dev_priv); +void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv); int i915_ppgtt_init_hw(struct drm_device *dev); void i915_ppgtt_release(struct kref *kref); -struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, +struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv, struct drm_i915_file_private *fpriv); static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) { @@ -548,23 +630,67 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev); int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj); void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj); -static inline bool -i915_ggtt_view_equal(const struct i915_ggtt_view *a, - const struct i915_ggtt_view *b) +/* Flags used by pin/bind&friends. */ +#define PIN_NONBLOCK BIT(0) +#define PIN_MAPPABLE BIT(1) +#define PIN_ZONE_4G BIT(2) +#define PIN_NONFAULT BIT(3) + +#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */ +#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT(7) /* I915_VMA_LOCAL_BIND */ +#define PIN_UPDATE BIT(8) + +#define PIN_HIGH BIT(9) +#define PIN_OFFSET_BIAS BIT(10) +#define PIN_OFFSET_FIXED BIT(11) +#define PIN_OFFSET_MASK (~4095) + +int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags); +static inline int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - if (WARN_ON(!a || !b)) - return false; - - if (a->type != b->type) - return false; - if (a->type != I915_GGTT_VIEW_NORMAL) - return !memcmp(&a->params, &b->params, sizeof(a->params)); - return true; + BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); + BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); + + /* Pin early to prevent the shrinker/eviction logic from destroying + * our vma as we insert and bind. + */ + if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) + return 0; + + return __i915_vma_do_pin(vma, size, alignment, flags); } -size_t -i915_ggtt_view_size(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view); +static inline int i915_vma_pin_count(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_PIN_MASK; +} + +static inline bool i915_vma_is_pinned(const struct i915_vma *vma) +{ + return i915_vma_pin_count(vma); +} + +static inline void __i915_vma_pin(struct i915_vma *vma) +{ + vma->flags++; + GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); +} + +static inline void __i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + vma->flags--; +} + +static inline void i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + __i915_vma_unpin(vma); +} /** * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture @@ -580,6 +706,7 @@ i915_ggtt_view_size(struct drm_i915_gem_object *obj, * Returns a valid iomapped pointer or ERR_PTR. */ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); +#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) /** * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap @@ -593,9 +720,14 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); static inline void i915_vma_unpin_iomap(struct i915_vma *vma) { lockdep_assert_held(&vma->vm->dev->struct_mutex); - GEM_BUG_ON(vma->pin_count == 0); GEM_BUG_ON(vma->iomap == NULL); - vma->pin_count--; + i915_vma_unpin(vma); +} + +static inline struct page *i915_vma_first_page(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + return sg_page(vma->pages->sgl); } #endif diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index f75bbd67a13a..95b7e9afd5f8 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -28,10 +28,17 @@ #include "i915_drv.h" #include "intel_renderstate.h" +struct render_state { + const struct intel_renderstate_rodata *rodata; + struct i915_vma *vma; + u32 aux_batch_size; + u32 aux_batch_offset; +}; + static const struct intel_renderstate_rodata * -render_state_get_rodata(const int gen) +render_state_get_rodata(const struct drm_i915_gem_request *req) { - switch (gen) { + switch (INTEL_GEN(req->i915)) { case 6: return &gen6_null_state; case 7: @@ -45,35 +52,6 @@ render_state_get_rodata(const int gen) return NULL; } -static int render_state_init(struct render_state *so, - struct drm_i915_private *dev_priv) -{ - int ret; - - so->gen = INTEL_GEN(dev_priv); - so->rodata = render_state_get_rodata(so->gen); - if (so->rodata == NULL) - return 0; - - if (so->rodata->batch_items * 4 > 4096) - return -EINVAL; - - so->obj = i915_gem_object_create(&dev_priv->drm, 4096); - if (IS_ERR(so->obj)) - return PTR_ERR(so->obj); - - ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0); - if (ret) - goto free_gem; - - so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj); - return 0; - -free_gem: - drm_gem_object_unreference(&so->obj->base); - return ret; -} - /* * Macro to add commands to auxiliary batch. * This macro only checks for page overflow before inserting the commands, @@ -94,27 +72,28 @@ free_gem: static int render_state_setup(struct render_state *so) { - struct drm_device *dev = so->obj->base.dev; + struct drm_device *dev = so->vma->vm->dev; const struct intel_renderstate_rodata *rodata = so->rodata; + const bool has_64bit_reloc = INTEL_GEN(dev) >= 8; unsigned int i = 0, reloc_index = 0; struct page *page; u32 *d; int ret; - ret = i915_gem_object_set_to_cpu_domain(so->obj, true); + ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true); if (ret) return ret; - page = i915_gem_object_get_dirty_page(so->obj, 0); + page = i915_gem_object_get_dirty_page(so->vma->obj, 0); d = kmap(page); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; if (i * 4 == rodata->reloc[reloc_index]) { - u64 r = s + so->ggtt_offset; + u64 r = s + so->vma->node.start; s = lower_32_bits(r); - if (so->gen >= 8) { + if (has_64bit_reloc) { if (i + 1 >= rodata->batch_items || rodata->batch[i + 1] != 0) { ret = -EINVAL; @@ -174,7 +153,7 @@ static int render_state_setup(struct render_state *so) kunmap(page); - ret = i915_gem_object_set_to_gtt_domain(so->obj, false); + ret = i915_gem_object_set_to_gtt_domain(so->vma->obj, false); if (ret) return ret; @@ -192,67 +171,60 @@ err_out: #undef OUT_BATCH -void i915_gem_render_state_fini(struct render_state *so) -{ - i915_gem_object_ggtt_unpin(so->obj); - drm_gem_object_unreference(&so->obj->base); -} - -int i915_gem_render_state_prepare(struct intel_engine_cs *engine, - struct render_state *so) +int i915_gem_render_state_init(struct drm_i915_gem_request *req) { + struct render_state so; + struct drm_i915_gem_object *obj; int ret; - if (WARN_ON(engine->id != RCS)) + if (WARN_ON(req->engine->id != RCS)) return -ENOENT; - ret = render_state_init(so, engine->i915); - if (ret) - return ret; - - if (so->rodata == NULL) + so.rodata = render_state_get_rodata(req); + if (!so.rodata) return 0; - ret = render_state_setup(so); - if (ret) { - i915_gem_render_state_fini(so); - return ret; - } + if (so.rodata->batch_items * 4 > 4096) + return -EINVAL; - return 0; -} + obj = i915_gem_object_create(&req->i915->drm, 4096); + if (IS_ERR(obj)) + return PTR_ERR(obj); -int i915_gem_render_state_init(struct drm_i915_gem_request *req) -{ - struct render_state so; - int ret; + so.vma = i915_vma_create(obj, &req->i915->ggtt.base, NULL); + if (IS_ERR(so.vma)) { + ret = PTR_ERR(so.vma); + goto err_obj; + } - ret = i915_gem_render_state_prepare(req->engine, &so); + ret = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL); if (ret) - return ret; + goto err_obj; - if (so.rodata == NULL) - return 0; + ret = render_state_setup(&so); + if (ret) + goto err_unpin; - ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset, - so.rodata->batch_items * 4, - I915_DISPATCH_SECURE); + ret = req->engine->emit_bb_start(req, so.vma->node.start, + so.rodata->batch_items * 4, + I915_DISPATCH_SECURE); if (ret) - goto out; + goto err_unpin; if (so.aux_batch_size > 8) { - ret = req->engine->dispatch_execbuffer(req, - (so.ggtt_offset + - so.aux_batch_offset), - so.aux_batch_size, - I915_DISPATCH_SECURE); + ret = req->engine->emit_bb_start(req, + (so.vma->node.start + + so.aux_batch_offset), + so.aux_batch_size, + I915_DISPATCH_SECURE); if (ret) - goto out; + goto err_unpin; } - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); - -out: - i915_gem_render_state_fini(&so); + i915_vma_move_to_active(so.vma, req, 0); +err_unpin: + i915_vma_unpin(so.vma); +err_obj: + i915_gem_object_put(obj); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 6aaa3a10a630..18cce3f06e9c 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -24,26 +24,8 @@ #ifndef _I915_GEM_RENDER_STATE_H_ #define _I915_GEM_RENDER_STATE_H_ -#include <linux/types.h> - -struct intel_renderstate_rodata { - const u32 *reloc; - const u32 *batch; - const u32 batch_items; -}; - -struct render_state { - const struct intel_renderstate_rodata *rodata; - struct drm_i915_gem_object *obj; - u64 ggtt_offset; - int gen; - u32 aux_batch_size; - u32 aux_batch_offset; -}; +struct drm_i915_gem_request; int i915_gem_render_state_init(struct drm_i915_gem_request *req); -void i915_gem_render_state_fini(struct render_state *so); -int i915_gem_render_state_prepare(struct intel_engine_cs *engine, - struct render_state *so); #endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c new file mode 100644 index 000000000000..8832f8ec1583 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -0,0 +1,947 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/prefetch.h> + +#include "i915_drv.h" + +static const char *i915_fence_get_driver_name(struct fence *fence) +{ + return "i915"; +} + +static const char *i915_fence_get_timeline_name(struct fence *fence) +{ + /* Timelines are bound by eviction to a VM. However, since + * we only have a global seqno at the moment, we only have + * a single timeline. Note that each timeline will have + * multiple execution contexts (fence contexts) as we allow + * engines within a single timeline to execute in parallel. + */ + return "global"; +} + +static bool i915_fence_signaled(struct fence *fence) +{ + return i915_gem_request_completed(to_request(fence)); +} + +static bool i915_fence_enable_signaling(struct fence *fence) +{ + if (i915_fence_signaled(fence)) + return false; + + intel_engine_enable_signaling(to_request(fence)); + return true; +} + +static signed long i915_fence_wait(struct fence *fence, + bool interruptible, + signed long timeout_jiffies) +{ + s64 timeout_ns, *timeout; + int ret; + + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) { + timeout_ns = jiffies_to_nsecs(timeout_jiffies); + timeout = &timeout_ns; + } else { + timeout = NULL; + } + + ret = i915_wait_request(to_request(fence), + interruptible, timeout, + NO_WAITBOOST); + if (ret == -ETIME) + return 0; + + if (ret < 0) + return ret; + + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) + timeout_jiffies = nsecs_to_jiffies(timeout_ns); + + return timeout_jiffies; +} + +static void i915_fence_value_str(struct fence *fence, char *str, int size) +{ + snprintf(str, size, "%u", fence->seqno); +} + +static void i915_fence_timeline_value_str(struct fence *fence, char *str, + int size) +{ + snprintf(str, size, "%u", + intel_engine_get_seqno(to_request(fence)->engine)); +} + +static void i915_fence_release(struct fence *fence) +{ + struct drm_i915_gem_request *req = to_request(fence); + + kmem_cache_free(req->i915->requests, req); +} + +const struct fence_ops i915_fence_ops = { + .get_driver_name = i915_fence_get_driver_name, + .get_timeline_name = i915_fence_get_timeline_name, + .enable_signaling = i915_fence_enable_signaling, + .signaled = i915_fence_signaled, + .wait = i915_fence_wait, + .release = i915_fence_release, + .fence_value_str = i915_fence_value_str, + .timeline_value_str = i915_fence_timeline_value_str, +}; + +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file) +{ + struct drm_i915_private *dev_private; + struct drm_i915_file_private *file_priv; + + WARN_ON(!req || !file || req->file_priv); + + if (!req || !file) + return -EINVAL; + + if (req->file_priv) + return -EINVAL; + + dev_private = req->i915; + file_priv = file->driver_priv; + + spin_lock(&file_priv->mm.lock); + req->file_priv = file_priv; + list_add_tail(&req->client_list, &file_priv->mm.request_list); + spin_unlock(&file_priv->mm.lock); + + return 0; +} + +static inline void +i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) +{ + struct drm_i915_file_private *file_priv = request->file_priv; + + if (!file_priv) + return; + + spin_lock(&file_priv->mm.lock); + list_del(&request->client_list); + request->file_priv = NULL; + spin_unlock(&file_priv->mm.lock); +} + +void i915_gem_retire_noop(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ + /* Space left intentionally blank */ +} + +static void i915_gem_request_retire(struct drm_i915_gem_request *request) +{ + struct i915_gem_active *active, *next; + + trace_i915_gem_request_retire(request); + list_del(&request->link); + + /* We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + * + * Note this requires that we are always called in request + * completion order. + */ + list_del(&request->ring_link); + request->ring->last_retired_head = request->postfix; + + /* Walk through the active list, calling retire on each. This allows + * objects to track their GPU activity and mark themselves as idle + * when their *last* active request is completed (updating state + * tracking lists for eviction, active references for GEM, etc). + * + * As the ->retire() may free the node, we decouple it first and + * pass along the auxiliary information (to avoid dereferencing + * the node after the callback). + */ + list_for_each_entry_safe(active, next, &request->active_list, link) { + /* In microbenchmarks or focusing upon time inside the kernel, + * we may spend an inordinate amount of time simply handling + * the retirement of requests and processing their callbacks. + * Of which, this loop itself is particularly hot due to the + * cache misses when jumping around the list of i915_gem_active. + * So we try to keep this loop as streamlined as possible and + * also prefetch the next i915_gem_active to try and hide + * the likely cache miss. + */ + prefetchw(next); + + INIT_LIST_HEAD(&active->link); + RCU_INIT_POINTER(active->request, NULL); + + active->retire(active, request); + } + + i915_gem_request_remove_from_client(request); + + if (request->previous_context) { + if (i915.enable_execlists) + intel_lr_context_unpin(request->previous_context, + request->engine); + } + + i915_gem_context_put(request->ctx); + i915_gem_request_put(request); +} + +void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) +{ + struct intel_engine_cs *engine = req->engine; + struct drm_i915_gem_request *tmp; + + lockdep_assert_held(&req->i915->drm.struct_mutex); + GEM_BUG_ON(list_empty(&req->link)); + + do { + tmp = list_first_entry(&engine->request_list, + typeof(*tmp), link); + + i915_gem_request_retire(tmp); + } while (tmp != req); +} + +static int i915_gem_check_wedge(struct drm_i915_private *dev_priv) +{ + struct i915_gpu_error *error = &dev_priv->gpu_error; + + if (i915_terminally_wedged(error)) + return -EIO; + + if (i915_reset_in_progress(error)) { + /* Non-interruptible callers can't handle -EAGAIN, hence return + * -EIO unconditionally for these. + */ + if (!dev_priv->mm.interruptible) + return -EIO; + + return -EAGAIN; + } + + return 0; +} + +static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) +{ + struct intel_engine_cs *engine; + int ret; + + /* Carefully retire all requests without writing to the rings */ + for_each_engine(engine, dev_priv) { + ret = intel_engine_idle(engine, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret) + return ret; + } + i915_gem_retire_requests(dev_priv); + + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ + if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { + while (intel_kick_waiters(dev_priv) || + intel_kick_signalers(dev_priv)) + yield(); + } + + /* Finally reset hw state */ + for_each_engine(engine, dev_priv) + intel_engine_init_seqno(engine, seqno); + + return 0; +} + +int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + int ret; + + if (seqno == 0) + return -EINVAL; + + /* HWS page needs to be set less than what we + * will inject to ring + */ + ret = i915_gem_init_seqno(dev_priv, seqno - 1); + if (ret) + return ret; + + dev_priv->next_seqno = seqno; + return 0; +} + +static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) +{ + /* reserve 0 for non-seqno */ + if (unlikely(dev_priv->next_seqno == 0)) { + int ret; + + ret = i915_gem_init_seqno(dev_priv, 0); + if (ret) + return ret; + + dev_priv->next_seqno = 1; + } + + *seqno = dev_priv->next_seqno++; + return 0; +} + +static int __i915_sw_fence_call +submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + struct drm_i915_gem_request *request = + container_of(fence, typeof(*request), submit); + + /* Will be called from irq-context when using foreign DMA fences */ + + switch (state) { + case FENCE_COMPLETE: + request->engine->last_submitted_seqno = request->fence.seqno; + request->engine->submit_request(request); + break; + + case FENCE_FREE: + break; + } + + return NOTIFY_DONE; +} + +/** + * i915_gem_request_alloc - allocate a request structure + * + * @engine: engine that we wish to issue the request on. + * @ctx: context that the request will be associated with. + * This can be NULL if the request is not directly related to + * any specific user context, in which case this function will + * choose an appropriate context to use. + * + * Returns a pointer to the allocated request if successful, + * or an error code if not. + */ +struct drm_i915_gem_request * +i915_gem_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_gem_request *req; + u32 seqno; + int ret; + + /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex + * and restart. + */ + ret = i915_gem_check_wedge(dev_priv); + if (ret) + return ERR_PTR(ret); + + /* Move the oldest request to the slab-cache (if not in use!) */ + req = list_first_entry_or_null(&engine->request_list, + typeof(*req), link); + if (req && i915_gem_request_completed(req)) + i915_gem_request_retire(req); + + /* Beware: Dragons be flying overhead. + * + * We use RCU to look up requests in flight. The lookups may + * race with the request being allocated from the slab freelist. + * That is the request we are writing to here, may be in the process + * of being read by __i915_gem_active_get_rcu(). As such, + * we have to be very careful when overwriting the contents. During + * the RCU lookup, we change chase the request->engine pointer, + * read the request->fence.seqno and increment the reference count. + * + * The reference count is incremented atomically. If it is zero, + * the lookup knows the request is unallocated and complete. Otherwise, + * it is either still in use, or has been reallocated and reset + * with fence_init(). This increment is safe for release as we check + * that the request we have a reference to and matches the active + * request. + * + * Before we increment the refcount, we chase the request->engine + * pointer. We must not call kmem_cache_zalloc() or else we set + * that pointer to NULL and cause a crash during the lookup. If + * we see the request is completed (based on the value of the + * old engine and seqno), the lookup is complete and reports NULL. + * If we decide the request is not completed (new engine or seqno), + * then we grab a reference and double check that it is still the + * active request - which it won't be and restart the lookup. + * + * Do not use kmem_cache_zalloc() here! + */ + req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); + if (!req) + return ERR_PTR(-ENOMEM); + + ret = i915_gem_get_seqno(dev_priv, &seqno); + if (ret) + goto err; + + spin_lock_init(&req->lock); + fence_init(&req->fence, + &i915_fence_ops, + &req->lock, + engine->fence_context, + seqno); + + i915_sw_fence_init(&req->submit, submit_notify); + + INIT_LIST_HEAD(&req->active_list); + req->i915 = dev_priv; + req->engine = engine; + req->ctx = i915_gem_context_get(ctx); + + /* No zalloc, must clear what we need by hand */ + req->previous_context = NULL; + req->file_priv = NULL; + req->batch = NULL; + + /* + * Reserve space in the ring buffer for all the commands required to + * eventually emit this request. This is to guarantee that the + * i915_add_request() call can't fail. Note that the reserve may need + * to be redone if the request is not actually submitted straight + * away, e.g. because a GPU scheduler has deferred it. + */ + req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; + + if (i915.enable_execlists) + ret = intel_logical_ring_alloc_request_extras(req); + else + ret = intel_ring_alloc_request_extras(req); + if (ret) + goto err_ctx; + + /* Record the position of the start of the request so that + * should we detect the updated seqno part-way through the + * GPU processing the request, we never over-estimate the + * position of the head. + */ + req->head = req->ring->tail; + + return req; + +err_ctx: + i915_gem_context_put(ctx); +err: + kmem_cache_free(dev_priv->requests, req); + return ERR_PTR(ret); +} + +static int +i915_gem_request_await_request(struct drm_i915_gem_request *to, + struct drm_i915_gem_request *from) +{ + int idx, ret; + + GEM_BUG_ON(to == from); + + if (to->engine == from->engine) + return 0; + + idx = intel_engine_sync_index(from->engine, to->engine); + if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx]) + return 0; + + trace_i915_gem_ring_sync_to(to, from); + if (!i915.semaphores) { + if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) { + ret = i915_sw_fence_await_dma_fence(&to->submit, + &from->fence, 0, + GFP_KERNEL); + if (ret < 0) + return ret; + } + } else { + ret = to->engine->semaphore.sync_to(to, from); + if (ret) + return ret; + } + + from->engine->semaphore.sync_seqno[idx] = from->fence.seqno; + return 0; +} + +/** + * i915_gem_request_await_object - set this request to (async) wait upon a bo + * + * @to: request we are wishing to use + * @obj: object which may be in use on another ring. + * + * This code is meant to abstract object synchronization with the GPU. + * Conceptually we serialise writes between engines inside the GPU. + * We only allow one engine to write into a buffer at any time, but + * multiple readers. To ensure each has a coherent view of memory, we must: + * + * - If there is an outstanding write request to the object, the new + * request must wait for it to complete (either CPU or in hw, requests + * on the same ring will be naturally ordered). + * + * - If we are a write request (pending_write_domain is set), the new + * request must wait for outstanding read requests to complete. + * + * Returns 0 if successful, else propagates up the lower layer error. + */ +int +i915_gem_request_await_object(struct drm_i915_gem_request *to, + struct drm_i915_gem_object *obj, + bool write) +{ + struct i915_gem_active *active; + unsigned long active_mask; + int idx; + + if (write) { + active_mask = i915_gem_object_get_active(obj); + active = obj->last_read; + } else { + active_mask = 1; + active = &obj->last_write; + } + + for_each_active(active_mask, idx) { + struct drm_i915_gem_request *request; + int ret; + + request = i915_gem_active_peek(&active[idx], + &obj->base.dev->struct_mutex); + if (!request) + continue; + + ret = i915_gem_request_await_request(to, request); + if (ret) + return ret; + } + + return 0; +} + +static void i915_gem_mark_busy(const struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + dev_priv->gt.active_engines |= intel_engine_flag(engine); + if (dev_priv->gt.awake) + return; + + intel_runtime_pm_get_noresume(dev_priv); + dev_priv->gt.awake = true; + + intel_enable_gt_powersave(dev_priv); + i915_update_gfx_val(dev_priv); + if (INTEL_GEN(dev_priv) >= 6) + gen6_rps_busy(dev_priv); + + queue_delayed_work(dev_priv->wq, + &dev_priv->gt.retire_work, + round_jiffies_up_relative(HZ)); +} + +/* + * NB: This function is not allowed to fail. Doing so would mean the the + * request is not being tracked for completion but the work itself is + * going to happen on the hardware. This would be a Bad Thing(tm). + */ +void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_ring *ring = request->ring; + struct drm_i915_gem_request *prev; + u32 request_start; + u32 reserved_tail; + int ret; + + trace_i915_gem_request_add(request); + + /* + * To ensure that this call will not fail, space for its emissions + * should already have been reserved in the ring buffer. Let the ring + * know that it is time to use that space up. + */ + request_start = ring->tail; + reserved_tail = request->reserved_space; + request->reserved_space = 0; + + /* + * Emit any outstanding flushes - execbuf can fail to emit the flush + * after having emitted the batchbuffer command. Hence we need to fix + * things up similar to emitting the lazy request. The difference here + * is that the flush _must_ happen before the next request, no matter + * what. + */ + if (flush_caches) { + ret = engine->emit_flush(request, EMIT_FLUSH); + + /* Not allowed to fail! */ + WARN(ret, "engine->emit_flush() failed: %d!\n", ret); + } + + /* Record the position of the start of the breadcrumb so that + * should we detect the updated seqno part-way through the + * GPU processing the request, we never over-estimate the + * position of the ring's HEAD. + */ + request->postfix = ring->tail; + + /* Not allowed to fail! */ + ret = engine->emit_request(request); + WARN(ret, "(%s)->emit_request failed: %d!\n", engine->name, ret); + + /* Sanity check that the reserved size was large enough. */ + ret = ring->tail - request_start; + if (ret < 0) + ret += ring->size; + WARN_ONCE(ret > reserved_tail, + "Not enough space reserved (%d bytes) " + "for adding the request (%d bytes)\n", + reserved_tail, ret); + + /* Seal the request and mark it as pending execution. Note that + * we may inspect this state, without holding any locks, during + * hangcheck. Hence we apply the barrier to ensure that we do not + * see a more recent value in the hws than we are tracking. + */ + + prev = i915_gem_active_raw(&engine->last_request, + &request->i915->drm.struct_mutex); + if (prev) + i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, + &request->submitq); + + request->emitted_jiffies = jiffies; + request->previous_seqno = engine->last_pending_seqno; + engine->last_pending_seqno = request->fence.seqno; + i915_gem_active_set(&engine->last_request, request); + list_add_tail(&request->link, &engine->request_list); + list_add_tail(&request->ring_link, &ring->request_list); + + i915_gem_mark_busy(engine); + + local_bh_disable(); + i915_sw_fence_commit(&request->submit); + local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ +} + +static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) +{ + unsigned long flags; + + spin_lock_irqsave(&q->lock, flags); + if (list_empty(&wait->task_list)) + __add_wait_queue(q, wait); + spin_unlock_irqrestore(&q->lock, flags); +} + +static unsigned long local_clock_us(unsigned int *cpu) +{ + unsigned long t; + + /* Cheaply and approximately convert from nanoseconds to microseconds. + * The result and subsequent calculations are also defined in the same + * approximate microseconds units. The principal source of timing + * error here is from the simple truncation. + * + * Note that local_clock() is only defined wrt to the current CPU; + * the comparisons are no longer valid if we switch CPUs. Instead of + * blocking preemption for the entire busywait, we can detect the CPU + * switch and use that as indicator of system load and a reason to + * stop busywaiting, see busywait_stop(). + */ + *cpu = get_cpu(); + t = local_clock() >> 10; + put_cpu(); + + return t; +} + +static bool busywait_stop(unsigned long timeout, unsigned int cpu) +{ + unsigned int this_cpu; + + if (time_after(local_clock_us(&this_cpu), timeout)) + return true; + + return this_cpu != cpu; +} + +bool __i915_spin_request(const struct drm_i915_gem_request *req, + int state, unsigned long timeout_us) +{ + unsigned int cpu; + + /* When waiting for high frequency requests, e.g. during synchronous + * rendering split between the CPU and GPU, the finite amount of time + * required to set up the irq and wait upon it limits the response + * rate. By busywaiting on the request completion for a short while we + * can service the high frequency waits as quick as possible. However, + * if it is a slow request, we want to sleep as quickly as possible. + * The tradeoff between waiting and sleeping is roughly the time it + * takes to sleep on a request, on the order of a microsecond. + */ + + timeout_us += local_clock_us(&cpu); + do { + if (i915_gem_request_completed(req)) + return true; + + if (signal_pending_state(state, current)) + break; + + if (busywait_stop(timeout_us, cpu)) + break; + + cpu_relax_lowlatency(); + } while (!need_resched()); + + return false; +} + +/** + * i915_wait_request - wait until execution of request has finished + * @req: duh! + * @flags: how to wait + * @timeout: in - how long to wait (NULL forever); out - how much time remaining + * @rps: client to charge for RPS boosting + * + * Note: It is of utmost importance that the passed in seqno and reset_counter + * values have been read by the caller in an smp safe manner. Where read-side + * locks are involved, it is sufficient to read the reset_counter before + * unlocking the lock that protects the seqno. For lockless tricks, the + * reset_counter _must_ be read before, and an appropriate smp_rmb must be + * inserted. + * + * Returns 0 if the request was found within the alloted time. Else returns the + * errno with remaining time filled in timeout argument. + */ +int i915_wait_request(struct drm_i915_gem_request *req, + unsigned int flags, + s64 *timeout, + struct intel_rps_client *rps) +{ + const int state = flags & I915_WAIT_INTERRUPTIBLE ? + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + DEFINE_WAIT(reset); + struct intel_wait wait; + unsigned long timeout_remain; + int ret = 0; + + might_sleep(); +#if IS_ENABLED(CONFIG_LOCKDEP) + GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) != + !!(flags & I915_WAIT_LOCKED)); +#endif + + if (i915_gem_request_completed(req)) + return 0; + + timeout_remain = MAX_SCHEDULE_TIMEOUT; + if (timeout) { + if (WARN_ON(*timeout < 0)) + return -EINVAL; + + if (*timeout == 0) + return -ETIME; + + /* Record current time in case interrupted, or wedged */ + timeout_remain = nsecs_to_jiffies_timeout(*timeout); + *timeout += ktime_get_raw_ns(); + } + + trace_i915_gem_request_wait_begin(req); + + /* This client is about to stall waiting for the GPU. In many cases + * this is undesirable and limits the throughput of the system, as + * many clients cannot continue processing user input/output whilst + * blocked. RPS autotuning may take tens of milliseconds to respond + * to the GPU load and thus incurs additional latency for the client. + * We can circumvent that by promoting the GPU frequency to maximum + * before we wait. This makes the GPU throttle up much more quickly + * (good for benchmarks and user experience, e.g. window animations), + * but at a cost of spending more power processing the workload + * (bad for battery). Not all clients even want their results + * immediately and for them we should just let the GPU select its own + * frequency to maximise efficiency. To prevent a single client from + * forcing the clocks too high for the whole system, we only allow + * each client to waitboost once in a busy period. + */ + if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6) + gen6_rps_boost(req->i915, rps, req->emitted_jiffies); + + /* Optimistic short spin before touching IRQs */ + if (i915_spin_request(req, state, 5)) + goto complete; + + set_current_state(state); + if (flags & I915_WAIT_LOCKED) + add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); + + intel_wait_init(&wait, req->fence.seqno); + if (intel_engine_add_wait(req->engine, &wait)) + /* In order to check that we haven't missed the interrupt + * as we enabled it, we need to kick ourselves to do a + * coherent check on the seqno before we sleep. + */ + goto wakeup; + + for (;;) { + if (signal_pending_state(state, current)) { + ret = -ERESTARTSYS; + break; + } + + timeout_remain = io_schedule_timeout(timeout_remain); + if (timeout_remain == 0) { + ret = -ETIME; + break; + } + + if (intel_wait_complete(&wait)) + break; + + set_current_state(state); + +wakeup: + /* Carefully check if the request is complete, giving time + * for the seqno to be visible following the interrupt. + * We also have to check in case we are kicked by the GPU + * reset in order to drop the struct_mutex. + */ + if (__i915_request_irq_complete(req)) + break; + + /* If the GPU is hung, and we hold the lock, reset the GPU + * and then check for completion. On a full reset, the engine's + * HW seqno will be advanced passed us and we are complete. + * If we do a partial reset, we have to wait for the GPU to + * resume and update the breadcrumb. + * + * If we don't hold the mutex, we can just wait for the worker + * to come along and update the breadcrumb (either directly + * itself, or indirectly by recovering the GPU). + */ + if (flags & I915_WAIT_LOCKED && + i915_reset_in_progress(&req->i915->gpu_error)) { + __set_current_state(TASK_RUNNING); + i915_reset(req->i915); + reset_wait_queue(&req->i915->gpu_error.wait_queue, + &reset); + continue; + } + + /* Only spin if we know the GPU is processing this request */ + if (i915_spin_request(req, state, 2)) + break; + } + + intel_engine_remove_wait(req->engine, &wait); + if (flags & I915_WAIT_LOCKED) + remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); + __set_current_state(TASK_RUNNING); + +complete: + trace_i915_gem_request_wait_end(req); + + if (timeout) { + *timeout -= ktime_get_raw_ns(); + if (*timeout < 0) + *timeout = 0; + + /* + * Apparently ktime isn't accurate enough and occasionally has a + * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch + * things up to make the test happy. We allow up to 1 jiffy. + * + * This is a regrssion from the timespec->ktime conversion. + */ + if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) + *timeout = 0; + } + + if (IS_RPS_USER(rps) && + req->fence.seqno == req->engine->last_submitted_seqno) { + /* The GPU is now idle and this client has stalled. + * Since no other client has submitted a request in the + * meantime, assume that this client is the only one + * supplying work to the GPU but is unable to keep that + * work supplied because it is waiting. Since the GPU is + * then never kept fully busy, RPS autoclocking will + * keep the clocks relatively low, causing further delays. + * Compensate by giving the synchronous client credit for + * a waitboost next time. + */ + spin_lock(&req->i915->rps.client_lock); + list_del_init(&rps->link); + spin_unlock(&req->i915->rps.client_lock); + } + + return ret; +} + +static bool engine_retire_requests(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_request *request, *next; + + list_for_each_entry_safe(request, next, &engine->request_list, link) { + if (!i915_gem_request_completed(request)) + return false; + + i915_gem_request_retire(request); + } + + return true; +} + +void i915_gem_retire_requests(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + unsigned int tmp; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (dev_priv->gt.active_engines == 0) + return; + + GEM_BUG_ON(!dev_priv->gt.awake); + + for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines, tmp) + if (engine_retire_requests(engine)) + dev_priv->gt.active_engines &= ~intel_engine_flag(engine); + + if (dev_priv->gt.active_engines == 0) + queue_delayed_work(dev_priv->wq, + &dev_priv->gt.idle_work, + msecs_to_jiffies(100)); +} diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h new file mode 100644 index 000000000000..974bd7bcc801 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -0,0 +1,689 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef I915_GEM_REQUEST_H +#define I915_GEM_REQUEST_H + +#include <linux/fence.h> + +#include "i915_gem.h" +#include "i915_sw_fence.h" + +struct intel_wait { + struct rb_node node; + struct task_struct *tsk; + u32 seqno; +}; + +struct intel_signal_node { + struct rb_node node; + struct intel_wait wait; +}; + +/** + * Request queue structure. + * + * The request queue allows us to note sequence numbers that have been emitted + * and may be associated with active buffers to be retired. + * + * By keeping this list, we can avoid having to do questionable sequence + * number comparisons on buffer last_read|write_seqno. It also allows an + * emission time to be associated with the request for tracking how far ahead + * of the GPU the submission is. + * + * When modifying this structure be very aware that we perform a lockless + * RCU lookup of it that may race against reallocation of the struct + * from the slab freelist. We intentionally do not zero the structure on + * allocation so that the lookup can use the dangling pointers (and is + * cogniscent that those pointers may be wrong). Instead, everything that + * needs to be initialised must be done so explicitly. + * + * The requests are reference counted. + */ +struct drm_i915_gem_request { + struct fence fence; + spinlock_t lock; + + /** On Which ring this request was generated */ + struct drm_i915_private *i915; + + /** + * Context and ring buffer related to this request + * Contexts are refcounted, so when this request is associated with a + * context, we must increment the context's refcount, to guarantee that + * it persists while any request is linked to it. Requests themselves + * are also refcounted, so the request will only be freed when the last + * reference to it is dismissed, and the code in + * i915_gem_request_free() will then decrement the refcount on the + * context. + */ + struct i915_gem_context *ctx; + struct intel_engine_cs *engine; + struct intel_ring *ring; + struct intel_signal_node signaling; + + struct i915_sw_fence submit; + wait_queue_t submitq; + + /** GEM sequence number associated with the previous request, + * when the HWS breadcrumb is equal to this the GPU is processing + * this request. + */ + u32 previous_seqno; + + /** Position in the ring of the start of the request */ + u32 head; + + /** + * Position in the ring of the start of the postfix. + * This is required to calculate the maximum available ring space + * without overwriting the postfix. + */ + u32 postfix; + + /** Position in the ring of the end of the whole request */ + u32 tail; + + /** Position in the ring of the end of any workarounds after the tail */ + u32 wa_tail; + + /** Preallocate space in the ring for the emitting the request */ + u32 reserved_space; + + /** + * Context related to the previous request. + * As the contexts are accessed by the hardware until the switch is + * completed to a new context, the hardware may still be writing + * to the context object after the breadcrumb is visible. We must + * not unpin/unbind/prune that object whilst still active and so + * we keep the previous context pinned until the following (this) + * request is retired. + */ + struct i915_gem_context *previous_context; + + /** Batch buffer related to this request if any (used for + * error state dump only). + */ + struct i915_vma *batch; + struct list_head active_list; + + /** Time at which this request was emitted, in jiffies. */ + unsigned long emitted_jiffies; + + /** engine->request_list entry for this request */ + struct list_head link; + + /** ring->request_list entry for this request */ + struct list_head ring_link; + + struct drm_i915_file_private *file_priv; + /** file_priv list entry for this request */ + struct list_head client_list; + + /** Link in the execlist submission queue, guarded by execlist_lock. */ + struct list_head execlist_link; +}; + +extern const struct fence_ops i915_fence_ops; + +static inline bool fence_is_i915(struct fence *fence) +{ + return fence->ops == &i915_fence_ops; +} + +struct drm_i915_gem_request * __must_check +i915_gem_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file); +void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); + +static inline u32 +i915_gem_request_get_seqno(struct drm_i915_gem_request *req) +{ + return req ? req->fence.seqno : 0; +} + +static inline struct intel_engine_cs * +i915_gem_request_get_engine(struct drm_i915_gem_request *req) +{ + return req ? req->engine : NULL; +} + +static inline struct drm_i915_gem_request * +to_request(struct fence *fence) +{ + /* We assume that NULL fence/request are interoperable */ + BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); + GEM_BUG_ON(fence && !fence_is_i915(fence)); + return container_of(fence, struct drm_i915_gem_request, fence); +} + +static inline struct drm_i915_gem_request * +i915_gem_request_get(struct drm_i915_gem_request *req) +{ + return to_request(fence_get(&req->fence)); +} + +static inline struct drm_i915_gem_request * +i915_gem_request_get_rcu(struct drm_i915_gem_request *req) +{ + return to_request(fence_get_rcu(&req->fence)); +} + +static inline void +i915_gem_request_put(struct drm_i915_gem_request *req) +{ + fence_put(&req->fence); +} + +static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, + struct drm_i915_gem_request *src) +{ + if (src) + i915_gem_request_get(src); + + if (*pdst) + i915_gem_request_put(*pdst); + + *pdst = src; +} + +int +i915_gem_request_await_object(struct drm_i915_gem_request *to, + struct drm_i915_gem_object *obj, + bool write); + +void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); +#define i915_add_request(req) \ + __i915_add_request(req, true) +#define i915_add_request_no_flush(req) \ + __i915_add_request(req, false) + +struct intel_rps_client; +#define NO_WAITBOOST ERR_PTR(-1) +#define IS_RPS_CLIENT(p) (!IS_ERR(p)) +#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p)) + +int i915_wait_request(struct drm_i915_gem_request *req, + unsigned int flags, + s64 *timeout, + struct intel_rps_client *rps) + __attribute__((nonnull(1))); +#define I915_WAIT_INTERRUPTIBLE BIT(0) +#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ + +static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); + +/** + * Returns true if seq1 is later than seq2. + */ +static inline bool i915_seqno_passed(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) >= 0; +} + +static inline bool +i915_gem_request_started(const struct drm_i915_gem_request *req) +{ + return i915_seqno_passed(intel_engine_get_seqno(req->engine), + req->previous_seqno); +} + +static inline bool +i915_gem_request_completed(const struct drm_i915_gem_request *req) +{ + return i915_seqno_passed(intel_engine_get_seqno(req->engine), + req->fence.seqno); +} + +bool __i915_spin_request(const struct drm_i915_gem_request *request, + int state, unsigned long timeout_us); +static inline bool i915_spin_request(const struct drm_i915_gem_request *request, + int state, unsigned long timeout_us) +{ + return (i915_gem_request_started(request) && + __i915_spin_request(request, state, timeout_us)); +} + +/* We treat requests as fences. This is not be to confused with our + * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. + * We use the fences to synchronize access from the CPU with activity on the + * GPU, for example, we should not rewrite an object's PTE whilst the GPU + * is reading them. We also track fences at a higher level to provide + * implicit synchronisation around GEM objects, e.g. set-domain will wait + * for outstanding GPU rendering before marking the object ready for CPU + * access, or a pageflip will wait until the GPU is complete before showing + * the frame on the scanout. + * + * In order to use a fence, the object must track the fence it needs to + * serialise with. For example, GEM objects want to track both read and + * write access so that we can perform concurrent read operations between + * the CPU and GPU engines, as well as waiting for all rendering to + * complete, or waiting for the last GPU user of a "fence register". The + * object then embeds a #i915_gem_active to track the most recent (in + * retirement order) request relevant for the desired mode of access. + * The #i915_gem_active is updated with i915_gem_active_set() to track the + * most recent fence request, typically this is done as part of + * i915_vma_move_to_active(). + * + * When the #i915_gem_active completes (is retired), it will + * signal its completion to the owner through a callback as well as mark + * itself as idle (i915_gem_active.request == NULL). The owner + * can then perform any action, such as delayed freeing of an active + * resource including itself. + */ +struct i915_gem_active; + +typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, + struct drm_i915_gem_request *); + +struct i915_gem_active { + struct drm_i915_gem_request __rcu *request; + struct list_head link; + i915_gem_retire_fn retire; +}; + +void i915_gem_retire_noop(struct i915_gem_active *, + struct drm_i915_gem_request *request); + +/** + * init_request_active - prepares the activity tracker for use + * @active - the active tracker + * @func - a callback when then the tracker is retired (becomes idle), + * can be NULL + * + * init_request_active() prepares the embedded @active struct for use as + * an activity tracker, that is for tracking the last known active request + * associated with it. When the last request becomes idle, when it is retired + * after completion, the optional callback @func is invoked. + */ +static inline void +init_request_active(struct i915_gem_active *active, + i915_gem_retire_fn retire) +{ + INIT_LIST_HEAD(&active->link); + active->retire = retire ?: i915_gem_retire_noop; +} + +/** + * i915_gem_active_set - updates the tracker to watch the current request + * @active - the active tracker + * @request - the request to watch + * + * i915_gem_active_set() watches the given @request for completion. Whilst + * that @request is busy, the @active reports busy. When that @request is + * retired, the @active tracker is updated to report idle. + */ +static inline void +i915_gem_active_set(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ + list_move(&active->link, &request->active_list); + rcu_assign_pointer(active->request, request); +} + +static inline struct drm_i915_gem_request * +__i915_gem_active_peek(const struct i915_gem_active *active) +{ + /* Inside the error capture (running with the driver in an unknown + * state), we want to bend the rules slightly (a lot). + * + * Work is in progress to make it safer, in the meantime this keeps + * the known issue from spamming the logs. + */ + return rcu_dereference_protected(active->request, 1); +} + +/** + * i915_gem_active_raw - return the active request + * @active - the active tracker + * + * i915_gem_active_raw() returns the current request being tracked, or NULL. + * It does not obtain a reference on the request for the caller, so the caller + * must hold struct_mutex. + */ +static inline struct drm_i915_gem_request * +i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex) +{ + return rcu_dereference_protected(active->request, + lockdep_is_held(mutex)); +} + +/** + * i915_gem_active_peek - report the active request being monitored + * @active - the active tracker + * + * i915_gem_active_peek() returns the current request being tracked if + * still active, or NULL. It does not obtain a reference on the request + * for the caller, so the caller must hold struct_mutex. + */ +static inline struct drm_i915_gem_request * +i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) +{ + struct drm_i915_gem_request *request; + + request = i915_gem_active_raw(active, mutex); + if (!request || i915_gem_request_completed(request)) + return NULL; + + return request; +} + +/** + * i915_gem_active_get - return a reference to the active request + * @active - the active tracker + * + * i915_gem_active_get() returns a reference to the active request, or NULL + * if the active tracker is idle. The caller must hold struct_mutex. + */ +static inline struct drm_i915_gem_request * +i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) +{ + return i915_gem_request_get(i915_gem_active_peek(active, mutex)); +} + +/** + * __i915_gem_active_get_rcu - return a reference to the active request + * @active - the active tracker + * + * __i915_gem_active_get() returns a reference to the active request, or NULL + * if the active tracker is idle. The caller must hold the RCU read lock, but + * the returned pointer is safe to use outside of RCU. + */ +static inline struct drm_i915_gem_request * +__i915_gem_active_get_rcu(const struct i915_gem_active *active) +{ + /* Performing a lockless retrieval of the active request is super + * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing + * slab of request objects will not be freed whilst we hold the + * RCU read lock. It does not guarantee that the request itself + * will not be freed and then *reused*. Viz, + * + * Thread A Thread B + * + * req = active.request + * retire(req) -> free(req); + * (req is now first on the slab freelist) + * active.request = NULL + * + * req = new submission on a new object + * ref(req) + * + * To prevent the request from being reused whilst the caller + * uses it, we take a reference like normal. Whilst acquiring + * the reference we check that it is not in a destroyed state + * (refcnt == 0). That prevents the request being reallocated + * whilst the caller holds on to it. To check that the request + * was not reallocated as we acquired the reference we have to + * check that our request remains the active request across + * the lookup, in the same manner as a seqlock. The visibility + * of the pointer versus the reference counting is controlled + * by using RCU barriers (rcu_dereference and rcu_assign_pointer). + * + * In the middle of all that, we inspect whether the request is + * complete. Retiring is lazy so the request may be completed long + * before the active tracker is updated. Querying whether the + * request is complete is far cheaper (as it involves no locked + * instructions setting cachelines to exclusive) than acquiring + * the reference, so we do it first. The RCU read lock ensures the + * pointer dereference is valid, but does not ensure that the + * seqno nor HWS is the right one! However, if the request was + * reallocated, that means the active tracker's request was complete. + * If the new request is also complete, then both are and we can + * just report the active tracker is idle. If the new request is + * incomplete, then we acquire a reference on it and check that + * it remained the active request. + * + * It is then imperative that we do not zero the request on + * reallocation, so that we can chase the dangling pointers! + * See i915_gem_request_alloc(). + */ + do { + struct drm_i915_gem_request *request; + + request = rcu_dereference(active->request); + if (!request || i915_gem_request_completed(request)) + return NULL; + + /* An especially silly compiler could decide to recompute the + * result of i915_gem_request_completed, more specifically + * re-emit the load for request->fence.seqno. A race would catch + * a later seqno value, which could flip the result from true to + * false. Which means part of the instructions below might not + * be executed, while later on instructions are executed. Due to + * barriers within the refcounting the inconsistency can't reach + * past the call to i915_gem_request_get_rcu, but not executing + * that while still executing i915_gem_request_put() creates + * havoc enough. Prevent this with a compiler barrier. + */ + barrier(); + + request = i915_gem_request_get_rcu(request); + + /* What stops the following rcu_access_pointer() from occurring + * before the above i915_gem_request_get_rcu()? If we were + * to read the value before pausing to get the reference to + * the request, we may not notice a change in the active + * tracker. + * + * The rcu_access_pointer() is a mere compiler barrier, which + * means both the CPU and compiler are free to perform the + * memory read without constraint. The compiler only has to + * ensure that any operations after the rcu_access_pointer() + * occur afterwards in program order. This means the read may + * be performed earlier by an out-of-order CPU, or adventurous + * compiler. + * + * The atomic operation at the heart of + * i915_gem_request_get_rcu(), see fence_get_rcu(), is + * atomic_inc_not_zero() which is only a full memory barrier + * when successful. That is, if i915_gem_request_get_rcu() + * returns the request (and so with the reference counted + * incremented) then the following read for rcu_access_pointer() + * must occur after the atomic operation and so confirm + * that this request is the one currently being tracked. + * + * The corresponding write barrier is part of + * rcu_assign_pointer(). + */ + if (!request || request == rcu_access_pointer(active->request)) + return rcu_pointer_handoff(request); + + i915_gem_request_put(request); + } while (1); +} + +/** + * i915_gem_active_get_unlocked - return a reference to the active request + * @active - the active tracker + * + * i915_gem_active_get_unlocked() returns a reference to the active request, + * or NULL if the active tracker is idle. The reference is obtained under RCU, + * so no locking is required by the caller. + * + * The reference should be freed with i915_gem_request_put(). + */ +static inline struct drm_i915_gem_request * +i915_gem_active_get_unlocked(const struct i915_gem_active *active) +{ + struct drm_i915_gem_request *request; + + rcu_read_lock(); + request = __i915_gem_active_get_rcu(active); + rcu_read_unlock(); + + return request; +} + +/** + * i915_gem_active_isset - report whether the active tracker is assigned + * @active - the active tracker + * + * i915_gem_active_isset() returns true if the active tracker is currently + * assigned to a request. Due to the lazy retiring, that request may be idle + * and this may report stale information. + */ +static inline bool +i915_gem_active_isset(const struct i915_gem_active *active) +{ + return rcu_access_pointer(active->request); +} + +/** + * i915_gem_active_is_idle - report whether the active tracker is idle + * @active - the active tracker + * + * i915_gem_active_is_idle() returns true if the active tracker is currently + * unassigned or if the request is complete (but not yet retired). Requires + * the caller to hold struct_mutex (but that can be relaxed if desired). + */ +static inline bool +i915_gem_active_is_idle(const struct i915_gem_active *active, + struct mutex *mutex) +{ + return !i915_gem_active_peek(active, mutex); +} + +/** + * i915_gem_active_wait - waits until the request is completed + * @active - the active request on which to wait + * + * i915_gem_active_wait() waits until the request is completed before + * returning. Note that it does not guarantee that the request is + * retired first, see i915_gem_active_retire(). + * + * i915_gem_active_wait() returns immediately if the active + * request is already complete. + */ +static inline int __must_check +i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) +{ + struct drm_i915_gem_request *request; + + request = i915_gem_active_peek(active, mutex); + if (!request) + return 0; + + return i915_wait_request(request, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + NULL, NULL); +} + +/** + * i915_gem_active_wait_unlocked - waits until the request is completed + * @active - the active request on which to wait + * @flags - how to wait + * @timeout - how long to wait at most + * @rps - userspace client to charge for a waitboost + * + * i915_gem_active_wait_unlocked() waits until the request is completed before + * returning, without requiring any locks to be held. Note that it does not + * retire any requests before returning. + * + * This function relies on RCU in order to acquire the reference to the active + * request without holding any locks. See __i915_gem_active_get_rcu() for the + * glory details on how that is managed. Once the reference is acquired, we + * can then wait upon the request, and afterwards release our reference, + * free of any locking. + * + * This function wraps i915_wait_request(), see it for the full details on + * the arguments. + * + * Returns 0 if successful, or a negative error code. + */ +static inline int +i915_gem_active_wait_unlocked(const struct i915_gem_active *active, + unsigned int flags, + s64 *timeout, + struct intel_rps_client *rps) +{ + struct drm_i915_gem_request *request; + int ret = 0; + + request = i915_gem_active_get_unlocked(active); + if (request) { + ret = i915_wait_request(request, flags, timeout, rps); + i915_gem_request_put(request); + } + + return ret; +} + +/** + * i915_gem_active_retire - waits until the request is retired + * @active - the active request on which to wait + * + * i915_gem_active_retire() waits until the request is completed, + * and then ensures that at least the retirement handler for this + * @active tracker is called before returning. If the @active + * tracker is idle, the function returns immediately. + */ +static inline int __must_check +i915_gem_active_retire(struct i915_gem_active *active, + struct mutex *mutex) +{ + struct drm_i915_gem_request *request; + int ret; + + request = i915_gem_active_raw(active, mutex); + if (!request) + return 0; + + ret = i915_wait_request(request, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + NULL, NULL); + if (ret) + return ret; + + list_del_init(&active->link); + RCU_INIT_POINTER(active->request, NULL); + + active->retire(active, request); + + return 0; +} + +/* Convenience functions for peeking at state inside active's request whilst + * guarded by the struct_mutex. + */ + +static inline uint32_t +i915_gem_active_get_seqno(const struct i915_gem_active *active, + struct mutex *mutex) +{ + return i915_gem_request_get_seqno(i915_gem_active_peek(active, mutex)); +} + +static inline struct intel_engine_cs * +i915_gem_active_get_engine(const struct i915_gem_active *active, + struct mutex *mutex) +{ + return i915_gem_request_get_engine(i915_gem_active_peek(active, mutex)); +} + +#define for_each_active(mask, idx) \ + for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) + +#endif /* I915_GEM_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 6f10b421487b..1c237d02f30b 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -48,19 +48,15 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) #endif } -static int num_vma_bound(struct drm_i915_gem_object *obj) +static bool any_vma_pinned(struct drm_i915_gem_object *obj) { struct i915_vma *vma; - int count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (drm_mm_node_allocated(&vma->node)) - count++; - if (vma->pin_count) - count++; - } + list_for_each_entry(vma, &obj->vma_list, obj_link) + if (i915_vma_is_pinned(vma)) + return true; - return count; + return false; } static bool swap_available(void) @@ -82,7 +78,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) * to the GPU, simply unbinding from the GPU is not going to succeed * in releasing our pin count on the pages themselves. */ - if (obj->pages_pin_count != num_vma_bound(obj)) + if (obj->pages_pin_count > obj->bind_count) + return false; + + if (any_vma_pinned(obj)) return false; /* We can only return physical pages to the system if we can either @@ -163,17 +162,16 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, */ for (phase = phases; phase->list; phase++) { struct list_head still_in_list; + struct drm_i915_gem_object *obj; if ((flags & phase->bit) == 0) continue; INIT_LIST_HEAD(&still_in_list); - while (count < target && !list_empty(phase->list)) { - struct drm_i915_gem_object *obj; - struct i915_vma *vma, *v; - - obj = list_first_entry(phase->list, - typeof(*obj), global_list); + while (count < target && + (obj = list_first_entry_or_null(phase->list, + typeof(*obj), + global_list))) { list_move_tail(&obj->global_list, &still_in_list); if (flags & I915_SHRINK_PURGEABLE && @@ -184,24 +182,21 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, !is_vmalloc_addr(obj->mapping)) continue; - if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active) + if ((flags & I915_SHRINK_ACTIVE) == 0 && + i915_gem_object_is_active(obj)) continue; if (!can_release_pages(obj)) continue; - drm_gem_object_reference(&obj->base); + i915_gem_object_get(obj); /* For the unbound phase, this should be a no-op! */ - list_for_each_entry_safe(vma, v, - &obj->vma_list, obj_link) - if (i915_vma_unbind(vma)) - break; - + i915_gem_object_unbind(obj); if (i915_gem_object_put_pages(obj) == 0) count += obj->base.size >> PAGE_SHIFT; - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); } list_splice(&still_in_list, phase->list); } @@ -210,6 +205,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, intel_runtime_pm_put(dev_priv); i915_gem_retire_requests(dev_priv); + /* expedite the RCU grace period to free some request slabs */ + synchronize_rcu_expedited(); return count; } @@ -230,10 +227,15 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, */ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) { - return i915_gem_shrink(dev_priv, -1UL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_ACTIVE); + unsigned long freed; + + freed = i915_gem_shrink(dev_priv, -1UL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); + rcu_barrier(); /* wait until our RCU delayed slab frees are completed */ + + return freed; } static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) @@ -242,9 +244,6 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) if (!mutex_is_locked_by(&dev->struct_mutex, current)) return false; - if (to_i915(dev)->mm.shrinker_no_lock_stealing) - return false; - *unlock = false; } else *unlock = true; @@ -273,7 +272,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) count += obj->base.size >> PAGE_SHIFT; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - if (!obj->active && can_release_pages(obj)) + if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) count += obj->base.size >> PAGE_SHIFT; } @@ -321,17 +320,22 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, struct shrinker_lock_uninterruptible *slu, int timeout_ms) { - unsigned long timeout = msecs_to_jiffies(timeout_ms) + 1; + unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms); + + do { + if (i915_gem_wait_for_idle(dev_priv, 0) == 0 && + i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock)) + break; - while (!i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock)) { schedule_timeout_killable(1); if (fatal_signal_pending(current)) return false; - if (--timeout == 0) { + + if (time_after(jiffies, timeout)) { pr_err("Unable to lock GPU to purge memory.\n"); return false; } - } + } while (1); slu->was_interruptible = dev_priv->mm.interruptible; dev_priv->mm.interruptible = false; @@ -410,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr return NOTIFY_DONE; /* Force everything onto the inactive lists */ - ret = i915_gem_wait_for_idle(dev_priv); + ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 66be299a1486..59989e8ee5dc 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -92,6 +92,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, static unsigned long i915_stolen_to_physical(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; struct i915_ggtt *ggtt = &dev_priv->ggtt; struct resource *r; u32 base; @@ -111,33 +112,44 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) if (INTEL_INFO(dev)->gen >= 3) { u32 bsm; - pci_read_config_dword(dev->pdev, INTEL_BSM, &bsm); + pci_read_config_dword(pdev, INTEL_BSM, &bsm); base = bsm & INTEL_BSM_MASK; } else if (IS_I865G(dev)) { + u32 tseg_size = 0; u16 toud = 0; + u8 tmp; - /* - * FIXME is the graphics stolen memory region - * always at TOUD? Ie. is it always the last - * one to be allocated by the BIOS? - */ - pci_bus_read_config_word(dev->pdev->bus, PCI_DEVFN(0, 0), + pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), + I845_ESMRAMC, &tmp); + + if (tmp & TSEG_ENABLE) { + switch (tmp & I845_TSEG_SIZE_MASK) { + case I845_TSEG_SIZE_512K: + tseg_size = KB(512); + break; + case I845_TSEG_SIZE_1M: + tseg_size = MB(1); + break; + } + } + + pci_bus_read_config_word(pdev->bus, PCI_DEVFN(0, 0), I865_TOUD, &toud); - base = toud << 16; + base = (toud << 16) + tseg_size; } else if (IS_I85X(dev)) { u32 tseg_size = 0; u32 tom; u8 tmp; - pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0), + pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), I85X_ESMRAMC, &tmp); if (tmp & TSEG_ENABLE) tseg_size = MB(1); - pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 1), + pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 1), I85X_DRB3, &tmp); tom = tmp * MB(32); @@ -147,7 +159,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) u32 tom; u8 tmp; - pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0), + pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), I845_ESMRAMC, &tmp); if (tmp & TSEG_ENABLE) { @@ -161,7 +173,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) } } - pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0), + pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), I830_DRB3, &tmp); tom = tmp * MB(32); @@ -171,7 +183,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) u32 tom; u8 tmp; - pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0), + pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), I830_ESMRAMC, &tmp); if (tmp & TSEG_ENABLE) { @@ -181,7 +193,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) tseg_size = KB(512); } - pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0), + pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), I830_DRB3, &tmp); tom = tmp * MB(32); @@ -685,7 +697,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, if (gtt_offset == I915_GTT_OFFSET_NONE) return obj; - vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base); + vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -698,24 +710,25 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, */ vma->node.start = gtt_offset; vma->node.size = size; - if (drm_mm_initialized(&ggtt->base.mm)) { - ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); - if (ret) { - DRM_DEBUG_KMS("failed to allocate stolen GTT space\n"); - goto err; - } - vma->bound |= GLOBAL_BIND; - __i915_vma_set_map_and_fenceable(vma); - list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); + ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); + if (ret) { + DRM_DEBUG_KMS("failed to allocate stolen GTT space\n"); + goto err; } + vma->pages = obj->pages; + vma->flags |= I915_VMA_GLOBAL_BIND; + __i915_vma_set_map_and_fenceable(vma); + list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); + obj->bind_count++; + list_add_tail(&obj->global_list, &dev_priv->mm.bound_list); i915_gem_object_pin_pages(obj); return obj; err: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return NULL; } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 8030199731db..a14b1e3d4c78 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -68,6 +68,9 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) if (tiling_mode == I915_TILING_NONE) return true; + if (tiling_mode > I915_TILING_LAST) + return false; + if (IS_GEN2(dev) || (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))) tile_width = 128; @@ -113,36 +116,58 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) return true; } -/* Is the current GTT allocation valid for the change in tiling? */ -static bool -i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) +static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode) { + struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); u32 size; - if (tiling_mode == I915_TILING_NONE) - return true; - - if (INTEL_INFO(obj->base.dev)->gen >= 4) + if (!i915_vma_is_map_and_fenceable(vma)) return true; - if (IS_GEN3(obj->base.dev)) { - if (i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) + if (INTEL_GEN(dev_priv) == 3) { + if (vma->node.start & ~I915_FENCE_START_MASK) return false; } else { - if (i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) + if (vma->node.start & ~I830_FENCE_START_MASK) return false; } - size = i915_gem_get_gtt_size(obj->base.dev, obj->base.size, tiling_mode); - if (i915_gem_obj_ggtt_size(obj) != size) + size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode); + if (vma->node.size < size) return false; - if (i915_gem_obj_ggtt_offset(obj) & (size - 1)) + if (vma->node.start & (size - 1)) return false; return true; } +/* Make the current GTT allocation valid for the change in tiling. */ +static int +i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_vma *vma; + int ret; + + if (tiling_mode == I915_TILING_NONE) + return 0; + + if (INTEL_GEN(dev_priv) >= 4) + return 0; + + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (i915_vma_fence_prepare(vma, tiling_mode)) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + } + + return 0; +} + /** * i915_gem_set_tiling - IOCTL handler to set tiling mode * @dev: DRM device @@ -164,15 +189,18 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_i915_gem_set_tiling *args = data; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; - int ret = 0; + int err = 0; + + /* Make sure we don't cross-contaminate obj->tiling_and_stride */ + BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK); - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; if (!i915_tiling_ok(dev, args->stride, obj->base.size, args->tiling_mode)) { - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return -EINVAL; } @@ -180,7 +208,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, mutex_lock(&dev->struct_mutex); if (obj->pin_display || obj->framebuffer_references) { - ret = -EBUSY; + err = -EBUSY; goto err; } @@ -213,8 +241,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, } } - if (args->tiling_mode != obj->tiling_mode || - args->stride != obj->stride) { + if (args->tiling_mode != i915_gem_object_get_tiling(obj) || + args->stride != i915_gem_object_get_stride(obj)) { /* We need to rebind the object if its current allocation * no longer meets the alignment restrictions for its new * tiling mode. Otherwise we can just leave it alone, but @@ -227,34 +255,36 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, * has to also include the unfenced register the GPU uses * whilst executing a fenced command for an untiled object. */ - if (obj->map_and_fenceable && - !i915_gem_object_fence_ok(obj, args->tiling_mode)) - ret = i915_vma_unbind(i915_gem_obj_to_ggtt(obj)); - if (ret == 0) { + err = i915_gem_object_fence_prepare(obj, args->tiling_mode); + if (!err) { + struct i915_vma *vma; + if (obj->pages && obj->madv == I915_MADV_WILLNEED && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (args->tiling_mode == I915_TILING_NONE) i915_gem_object_unpin_pages(obj); - if (obj->tiling_mode == I915_TILING_NONE) + if (!i915_gem_object_is_tiled(obj)) i915_gem_object_pin_pages(obj); } - obj->fence_dirty = - obj->last_fenced_req || - obj->fence_reg != I915_FENCE_REG_NONE; + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!vma->fence) + continue; - obj->tiling_mode = args->tiling_mode; - obj->stride = args->stride; + vma->fence->dirty = true; + } + obj->tiling_and_stride = + args->stride | args->tiling_mode; /* Force the fence to be reacquired for GTT access */ i915_gem_release_mmap(obj); } } /* we have to maintain this existing ABI... */ - args->stride = obj->stride; - args->tiling_mode = obj->tiling_mode; + args->stride = i915_gem_object_get_stride(obj); + args->tiling_mode = i915_gem_object_get_tiling(obj); /* Try to preallocate memory required to save swizzling on put-pages */ if (i915_gem_object_needs_bit17_swizzle(obj)) { @@ -268,12 +298,12 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, } err: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); intel_runtime_pm_put(dev_priv); - return ret; + return err; } /** @@ -297,14 +327,12 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; - mutex_lock(&dev->struct_mutex); - - args->tiling_mode = obj->tiling_mode; - switch (obj->tiling_mode) { + args->tiling_mode = READ_ONCE(obj->tiling_and_stride) & TILING_MASK; + switch (args->tiling_mode) { case I915_TILING_X: args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; break; @@ -328,8 +356,6 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; - drm_gem_object_unreference(&obj->base); - mutex_unlock(&dev->struct_mutex); - + i915_gem_object_put_unlocked(obj); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 2314c88323e3..e537930c64b5 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -63,33 +63,12 @@ struct i915_mmu_object { static void wait_rendering(struct drm_i915_gem_object *obj) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; - int i, n; - - if (!obj->active) - return; - - n = 0; - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; - - req = obj->last_read_req[i]; - if (req == NULL) - continue; - - requests[n++] = i915_gem_request_reference(req); - } - - mutex_unlock(&dev->struct_mutex); - - for (i = 0; i < n; i++) - __i915_wait_request(requests[i], false, NULL, NULL); - - mutex_lock(&dev->struct_mutex); + unsigned long active = __I915_BO_ACTIVE(obj); + int idx; - for (i = 0; i < n; i++) - i915_gem_request_unreference(requests[i]); + for_each_active(active, idx) + i915_gem_active_wait_unlocked(&obj->last_read[idx], + 0, NULL, NULL); } static void cancel_userptr(struct work_struct *work) @@ -98,28 +77,19 @@ static void cancel_userptr(struct work_struct *work) struct drm_i915_gem_object *obj = mo->obj; struct drm_device *dev = obj->base.dev; + wait_rendering(obj); + mutex_lock(&dev->struct_mutex); /* Cancel any active worker and force us to re-evaluate gup */ obj->userptr.work = NULL; if (obj->pages != NULL) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_vma *vma, *tmp; - bool was_interruptible; - - wait_rendering(obj); - - was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; - - list_for_each_entry_safe(vma, tmp, &obj->vma_list, obj_link) - WARN_ON(i915_vma_unbind(vma)); + /* We are inside a kthread context and can't be interrupted */ + WARN_ON(i915_gem_object_unbind(obj)); WARN_ON(i915_gem_object_put_pages(obj)); - - dev_priv->mm.interruptible = was_interruptible; } - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); } @@ -572,12 +542,10 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) } } obj->userptr.work = ERR_PTR(ret); - if (ret) - __i915_gem_userptr_set_active(obj, false); } obj->userptr.workers--; - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); release_pages(pvec, pinned, 0); @@ -622,8 +590,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, obj->userptr.work = &work->work; obj->userptr.workers++; - work->obj = obj; - drm_gem_object_reference(&obj->base); + work->obj = i915_gem_object_get(obj); work->task = current; get_task_struct(work->task); @@ -659,15 +626,14 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) * to the vma (discard or cloning) which should prevent the more * egregious cases from causing harm. */ - if (IS_ERR(obj->userptr.work)) { - /* active flag will have been dropped already by the worker */ - ret = PTR_ERR(obj->userptr.work); - obj->userptr.work = NULL; - return ret; - } - if (obj->userptr.work) + + if (obj->userptr.work) { /* active flag should still be held for the pending work */ - return -EAGAIN; + if (IS_ERR(obj->userptr.work)) + return PTR_ERR(obj->userptr.work); + else + return -EAGAIN; + } /* Let the mmu-notifier know that we have begun and need cancellation */ ret = __i915_gem_userptr_set_active(obj, true); @@ -846,7 +812,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9d73d2216adc..334f15df7c8d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -30,9 +30,9 @@ #include <generated/utsrelease.h> #include "i915_drv.h" -static const char *ring_str(int ring) +static const char *engine_str(int engine) { - switch (ring) { + switch (engine) { case RCS: return "render"; case VCS: return "bsd"; case BCS: return "blt"; @@ -42,16 +42,6 @@ static const char *ring_str(int ring) } } -static const char *pin_flag(int pinned) -{ - if (pinned > 0) - return " P"; - else if (pinned < 0) - return " p"; - else - return ""; -} - static const char *tiling_flag(int tiling) { switch (tiling) { @@ -189,7 +179,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, { int i; - err_printf(m, " %s [%d]:\n", name, count); + err_printf(m, "%s [%d]:\n", name, count); while (count--) { err_printf(m, " %08x_%08x %8u %02x %02x [ ", @@ -202,13 +192,12 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_printf(m, "%02x ", err->rseqno[i]); err_printf(m, "] %02x", err->wseqno); - err_puts(m, pin_flag(err->pinned)); err_puts(m, tiling_flag(err->tiling)); err_puts(m, dirty_flag(err->dirty)); err_puts(m, purgeable_flag(err->purgeable)); err_puts(m, err->userptr ? " userptr" : ""); - err_puts(m, err->ring != -1 ? " " : ""); - err_puts(m, ring_str(err->ring)); + err_puts(m, err->engine != -1 ? " " : ""); + err_puts(m, engine_str(err->engine)); err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); if (err->name) @@ -221,7 +210,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, } } -static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a) +static const char *hangcheck_action_to_str(enum intel_engine_hangcheck_action a) { switch (a) { case HANGCHECK_IDLE: @@ -239,70 +228,74 @@ static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a) return "unknown"; } -static void i915_ring_error_state(struct drm_i915_error_state_buf *m, - struct drm_device *dev, - struct drm_i915_error_state *error, - int ring_idx) +static void error_print_engine(struct drm_i915_error_state_buf *m, + struct drm_i915_error_engine *ee) { - struct drm_i915_error_ring *ring = &error->ring[ring_idx]; - - if (!ring->valid) - return; - - err_printf(m, "%s command stream:\n", ring_str(ring_idx)); - err_printf(m, " START: 0x%08x\n", ring->start); - err_printf(m, " HEAD: 0x%08x\n", ring->head); - err_printf(m, " TAIL: 0x%08x\n", ring->tail); - err_printf(m, " CTL: 0x%08x\n", ring->ctl); - err_printf(m, " HWS: 0x%08x\n", ring->hws); - err_printf(m, " ACTHD: 0x%08x %08x\n", (u32)(ring->acthd>>32), (u32)ring->acthd); - err_printf(m, " IPEIR: 0x%08x\n", ring->ipeir); - err_printf(m, " IPEHR: 0x%08x\n", ring->ipehr); - err_printf(m, " INSTDONE: 0x%08x\n", ring->instdone); - if (INTEL_INFO(dev)->gen >= 4) { - err_printf(m, " BBADDR: 0x%08x %08x\n", (u32)(ring->bbaddr>>32), (u32)ring->bbaddr); - err_printf(m, " BB_STATE: 0x%08x\n", ring->bbstate); - err_printf(m, " INSTPS: 0x%08x\n", ring->instps); + err_printf(m, "%s command stream:\n", engine_str(ee->engine_id)); + err_printf(m, " START: 0x%08x\n", ee->start); + err_printf(m, " HEAD: 0x%08x\n", ee->head); + err_printf(m, " TAIL: 0x%08x\n", ee->tail); + err_printf(m, " CTL: 0x%08x\n", ee->ctl); + err_printf(m, " MODE: 0x%08x\n", ee->mode); + err_printf(m, " HWS: 0x%08x\n", ee->hws); + err_printf(m, " ACTHD: 0x%08x %08x\n", + (u32)(ee->acthd>>32), (u32)ee->acthd); + err_printf(m, " IPEIR: 0x%08x\n", ee->ipeir); + err_printf(m, " IPEHR: 0x%08x\n", ee->ipehr); + err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone); + if (ee->batchbuffer) { + u64 start = ee->batchbuffer->gtt_offset; + u64 end = start + ee->batchbuffer->gtt_size; + + err_printf(m, " batch: [0x%08x_%08x, 0x%08x_%08x]\n", + upper_32_bits(start), lower_32_bits(start), + upper_32_bits(end), lower_32_bits(end)); } - err_printf(m, " INSTPM: 0x%08x\n", ring->instpm); - err_printf(m, " FADDR: 0x%08x %08x\n", upper_32_bits(ring->faddr), - lower_32_bits(ring->faddr)); - if (INTEL_INFO(dev)->gen >= 6) { - err_printf(m, " RC PSMI: 0x%08x\n", ring->rc_psmi); - err_printf(m, " FAULT_REG: 0x%08x\n", ring->fault_reg); + if (INTEL_GEN(m->i915) >= 4) { + err_printf(m, " BBADDR: 0x%08x_%08x\n", + (u32)(ee->bbaddr>>32), (u32)ee->bbaddr); + err_printf(m, " BB_STATE: 0x%08x\n", ee->bbstate); + err_printf(m, " INSTPS: 0x%08x\n", ee->instps); + } + err_printf(m, " INSTPM: 0x%08x\n", ee->instpm); + err_printf(m, " FADDR: 0x%08x %08x\n", upper_32_bits(ee->faddr), + lower_32_bits(ee->faddr)); + if (INTEL_GEN(m->i915) >= 6) { + err_printf(m, " RC PSMI: 0x%08x\n", ee->rc_psmi); + err_printf(m, " FAULT_REG: 0x%08x\n", ee->fault_reg); err_printf(m, " SYNC_0: 0x%08x [last synced 0x%08x]\n", - ring->semaphore_mboxes[0], - ring->semaphore_seqno[0]); + ee->semaphore_mboxes[0], + ee->semaphore_seqno[0]); err_printf(m, " SYNC_1: 0x%08x [last synced 0x%08x]\n", - ring->semaphore_mboxes[1], - ring->semaphore_seqno[1]); - if (HAS_VEBOX(dev)) { + ee->semaphore_mboxes[1], + ee->semaphore_seqno[1]); + if (HAS_VEBOX(m->i915)) { err_printf(m, " SYNC_2: 0x%08x [last synced 0x%08x]\n", - ring->semaphore_mboxes[2], - ring->semaphore_seqno[2]); + ee->semaphore_mboxes[2], + ee->semaphore_seqno[2]); } } - if (USES_PPGTT(dev)) { - err_printf(m, " GFX_MODE: 0x%08x\n", ring->vm_info.gfx_mode); + if (USES_PPGTT(m->i915)) { + err_printf(m, " GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode); - if (INTEL_INFO(dev)->gen >= 8) { + if (INTEL_GEN(m->i915) >= 8) { int i; for (i = 0; i < 4; i++) err_printf(m, " PDP%d: 0x%016llx\n", - i, ring->vm_info.pdp[i]); + i, ee->vm_info.pdp[i]); } else { err_printf(m, " PP_DIR_BASE: 0x%08x\n", - ring->vm_info.pp_dir_base); + ee->vm_info.pp_dir_base); } } - err_printf(m, " seqno: 0x%08x\n", ring->seqno); - err_printf(m, " last_seqno: 0x%08x\n", ring->last_seqno); - err_printf(m, " waiting: %s\n", yesno(ring->waiting)); - err_printf(m, " ring->head: 0x%08x\n", ring->cpu_ring_head); - err_printf(m, " ring->tail: 0x%08x\n", ring->cpu_ring_tail); + err_printf(m, " seqno: 0x%08x\n", ee->seqno); + err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno); + err_printf(m, " waiting: %s\n", yesno(ee->waiting)); + err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); + err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); err_printf(m, " hangcheck: %s [%d]\n", - hangcheck_action_to_str(ring->hangcheck_action), - ring->hangcheck_score); + hangcheck_action_to_str(ee->hangcheck_action), + ee->hangcheck_score); } void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) @@ -328,11 +321,22 @@ static void print_error_obj(struct drm_i915_error_state_buf *m, } } +static void err_print_capabilities(struct drm_i915_error_state_buf *m, + const struct intel_device_info *info) +{ +#define PRINT_FLAG(x) err_printf(m, #x ": %s\n", yesno(info->x)) +#define SEP_SEMICOLON ; + DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG, SEP_SEMICOLON); +#undef PRINT_FLAG +#undef SEP_SEMICOLON +} + int i915_error_state_to_str(struct drm_i915_error_state_buf *m, const struct i915_error_state_file_priv *error_priv) { struct drm_device *dev = error_priv->dev; struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; struct drm_i915_error_state *error = error_priv->error; struct drm_i915_error_object *obj; int i, j, offset, elt; @@ -347,27 +351,28 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, error->time.tv_usec); err_printf(m, "Kernel: " UTS_RELEASE "\n"); + err_print_capabilities(m, &error->device_info); max_hangcheck_score = 0; - for (i = 0; i < ARRAY_SIZE(error->ring); i++) { - if (error->ring[i].hangcheck_score > max_hangcheck_score) - max_hangcheck_score = error->ring[i].hangcheck_score; + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + if (error->engine[i].hangcheck_score > max_hangcheck_score) + max_hangcheck_score = error->engine[i].hangcheck_score; } - for (i = 0; i < ARRAY_SIZE(error->ring); i++) { - if (error->ring[i].hangcheck_score == max_hangcheck_score && - error->ring[i].pid != -1) { + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + if (error->engine[i].hangcheck_score == max_hangcheck_score && + error->engine[i].pid != -1) { err_printf(m, "Active process (on ring %s): %s [%d]\n", - ring_str(i), - error->ring[i].comm, - error->ring[i].pid); + engine_str(i), + error->engine[i].comm, + error->engine[i].pid); } } err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Suspend count: %u\n", error->suspend_count); - err_printf(m, "PCI ID: 0x%04x\n", dev->pdev->device); - err_printf(m, "PCI Revision: 0x%02x\n", dev->pdev->revision); + err_printf(m, "PCI ID: 0x%04x\n", pdev->device); + err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision); err_printf(m, "PCI Subsystem: %04x:%04x\n", - dev->pdev->subsystem_vendor, - dev->pdev->subsystem_device); + pdev->subsystem_vendor, + pdev->subsystem_device); err_printf(m, "IOMMU enabled?: %d\n", error->iommu); if (HAS_CSR(dev)) { @@ -414,36 +419,55 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if (IS_GEN7(dev)) err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); - for (i = 0; i < ARRAY_SIZE(error->ring); i++) - i915_ring_error_state(m, dev, error, i); + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + if (error->engine[i].engine_id != -1) + error_print_engine(m, &error->engine[i]); + } + + for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) { + char buf[128]; + int len, first = 1; + + if (!error->active_vm[i]) + break; - for (i = 0; i < error->vm_count; i++) { - err_printf(m, "vm[%d]\n", i); + len = scnprintf(buf, sizeof(buf), "Active ("); + for (j = 0; j < ARRAY_SIZE(error->engine); j++) { + if (error->engine[j].vm != error->active_vm[i]) + continue; - print_error_buffers(m, "Active", + len += scnprintf(buf + len, sizeof(buf), "%s%s", + first ? "" : ", ", + dev_priv->engine[j].name); + first = 0; + } + scnprintf(buf + len, sizeof(buf), ")"); + print_error_buffers(m, buf, error->active_bo[i], error->active_bo_count[i]); - - print_error_buffers(m, "Pinned", - error->pinned_bo[i], - error->pinned_bo_count[i]); } - for (i = 0; i < ARRAY_SIZE(error->ring); i++) { - obj = error->ring[i].batchbuffer; + print_error_buffers(m, "Pinned (global)", + error->pinned_bo, + error->pinned_bo_count); + + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + struct drm_i915_error_engine *ee = &error->engine[i]; + + obj = ee->batchbuffer; if (obj) { err_puts(m, dev_priv->engine[i].name); - if (error->ring[i].pid != -1) + if (ee->pid != -1) err_printf(m, " (submitted by %s [%d])", - error->ring[i].comm, - error->ring[i].pid); + ee->comm, + ee->pid); err_printf(m, " --- gtt_offset = 0x%08x %08x\n", upper_32_bits(obj->gtt_offset), lower_32_bits(obj->gtt_offset)); print_error_obj(m, obj); } - obj = error->ring[i].wa_batchbuffer; + obj = ee->wa_batchbuffer; if (obj) { err_printf(m, "%s (w/a) --- gtt_offset = 0x%08x\n", dev_priv->engine[i].name, @@ -451,38 +475,43 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, print_error_obj(m, obj); } - if (error->ring[i].num_requests) { + if (ee->num_requests) { err_printf(m, "%s --- %d requests\n", dev_priv->engine[i].name, - error->ring[i].num_requests); - for (j = 0; j < error->ring[i].num_requests; j++) { - err_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n", - error->ring[i].requests[j].seqno, - error->ring[i].requests[j].jiffies, - error->ring[i].requests[j].tail); + ee->num_requests); + for (j = 0; j < ee->num_requests; j++) { + err_printf(m, " pid %d, seqno 0x%08x, emitted %ld, head 0x%08x, tail 0x%08x\n", + ee->requests[j].pid, + ee->requests[j].seqno, + ee->requests[j].jiffies, + ee->requests[j].head, + ee->requests[j].tail); } } - if (error->ring[i].num_waiters) { + if (IS_ERR(ee->waiters)) { + err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n", + dev_priv->engine[i].name); + } else if (ee->num_waiters) { err_printf(m, "%s --- %d waiters\n", dev_priv->engine[i].name, - error->ring[i].num_waiters); - for (j = 0; j < error->ring[i].num_waiters; j++) { + ee->num_waiters); + for (j = 0; j < ee->num_waiters; j++) { err_printf(m, " seqno 0x%08x for %s [%d]\n", - error->ring[i].waiters[j].seqno, - error->ring[i].waiters[j].comm, - error->ring[i].waiters[j].pid); + ee->waiters[j].seqno, + ee->waiters[j].comm, + ee->waiters[j].pid); } } - if ((obj = error->ring[i].ringbuffer)) { + if ((obj = ee->ringbuffer)) { err_printf(m, "%s --- ringbuffer = 0x%08x\n", dev_priv->engine[i].name, lower_32_bits(obj->gtt_offset)); print_error_obj(m, obj); } - if ((obj = error->ring[i].hws_page)) { + if ((obj = ee->hws_page)) { u64 hws_offset = obj->gtt_offset; u32 *hws_page = &obj->pages[0][0]; @@ -504,7 +533,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } } - obj = error->ring[i].wa_ctx; + obj = ee->wa_ctx; if (obj) { u64 wa_ctx_offset = obj->gtt_offset; u32 *wa_ctx_page = &obj->pages[0][0]; @@ -526,7 +555,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } } - if ((obj = error->ring[i].ctx)) { + if ((obj = ee->ctx)) { err_printf(m, "%s --- HW Context = 0x%08x\n", dev_priv->engine[i].name, lower_32_bits(obj->gtt_offset)); @@ -534,7 +563,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } } - if ((obj = error->semaphore_obj)) { + if ((obj = error->semaphore)) { err_printf(m, "Semaphore page = 0x%08x\n", lower_32_bits(obj->gtt_offset)); for (elt = 0; elt < PAGE_SIZE/16; elt += 4) { @@ -611,26 +640,27 @@ static void i915_error_state_free(struct kref *error_ref) typeof(*error), ref); int i; - for (i = 0; i < ARRAY_SIZE(error->ring); i++) { - i915_error_object_free(error->ring[i].batchbuffer); - i915_error_object_free(error->ring[i].wa_batchbuffer); - i915_error_object_free(error->ring[i].ringbuffer); - i915_error_object_free(error->ring[i].hws_page); - i915_error_object_free(error->ring[i].ctx); - i915_error_object_free(error->ring[i].wa_ctx); - kfree(error->ring[i].requests); - kfree(error->ring[i].waiters); + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + struct drm_i915_error_engine *ee = &error->engine[i]; + + i915_error_object_free(ee->batchbuffer); + i915_error_object_free(ee->wa_batchbuffer); + i915_error_object_free(ee->ringbuffer); + i915_error_object_free(ee->hws_page); + i915_error_object_free(ee->ctx); + i915_error_object_free(ee->wa_ctx); + + kfree(ee->requests); + if (!IS_ERR_OR_NULL(ee->waiters)) + kfree(ee->waiters); } - i915_error_object_free(error->semaphore_obj); + i915_error_object_free(error->semaphore); - for (i = 0; i < error->vm_count; i++) + for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) kfree(error->active_bo[i]); - - kfree(error->active_bo); - kfree(error->active_bo_count); kfree(error->pinned_bo); - kfree(error->pinned_bo_count); + kfree(error->overlay); kfree(error->display); kfree(error); @@ -638,46 +668,45 @@ static void i915_error_state_free(struct kref *error_ref) static struct drm_i915_error_object * i915_error_object_create(struct drm_i915_private *dev_priv, - struct drm_i915_gem_object *src, - struct i915_address_space *vm) + struct i915_vma *vma) { struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_i915_gem_object *src; struct drm_i915_error_object *dst; - struct i915_vma *vma = NULL; int num_pages; bool use_ggtt; int i = 0; u64 reloc_offset; - if (src == NULL || src->pages == NULL) + if (!vma) + return NULL; + + src = vma->obj; + if (!src->pages) return NULL; num_pages = src->base.size >> PAGE_SHIFT; dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC); - if (dst == NULL) + if (!dst) return NULL; - if (i915_gem_obj_bound(src, vm)) - dst->gtt_offset = i915_gem_obj_offset(src, vm); - else - dst->gtt_offset = -1; + dst->gtt_offset = vma->node.start; + dst->gtt_size = vma->node.size; reloc_offset = dst->gtt_offset; - if (i915_is_ggtt(vm)) - vma = i915_gem_obj_to_ggtt(src); use_ggtt = (src->cache_level == I915_CACHE_NONE && - vma && (vma->bound & GLOBAL_BIND) && + (vma->flags & I915_VMA_GLOBAL_BIND) && reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end); /* Cannot access stolen address directly, try to use the aperture */ if (src->stolen) { use_ggtt = true; - if (!(vma && vma->bound & GLOBAL_BIND)) + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) goto unwind; - reloc_offset = i915_gem_obj_ggtt_offset(src); + reloc_offset = vma->node.start; if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end) goto unwind; } @@ -705,7 +734,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv, * captures what the GPU read. */ - s = io_mapping_map_atomic_wc(ggtt->mappable, + s = io_mapping_map_atomic_wc(&ggtt->mappable, reloc_offset); memcpy_fromio(d, s, PAGE_SIZE); io_mapping_unmap_atomic(s); @@ -737,8 +766,24 @@ unwind: kfree(dst); return NULL; } -#define i915_error_ggtt_object_create(dev_priv, src) \ - i915_error_object_create((dev_priv), (src), &(dev_priv)->ggtt.base) + +/* The error capture is special as tries to run underneath the normal + * locking rules - so we use the raw version of the i915_gem_active lookup. + */ +static inline uint32_t +__active_get_seqno(struct i915_gem_active *active) +{ + return i915_gem_request_get_seqno(__i915_gem_active_peek(active)); +} + +static inline int +__active_get_engine_id(struct i915_gem_active *active) +{ + struct intel_engine_cs *engine; + + engine = i915_gem_request_get_engine(__i915_gem_active_peek(active)); + return engine ? engine->id : -1; +} static void capture_bo(struct drm_i915_error_buffer *err, struct i915_vma *vma) @@ -748,32 +793,34 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->size = obj->base.size; err->name = obj->base.name; + for (i = 0; i < I915_NUM_ENGINES; i++) - err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read_req[i]); - err->wseqno = i915_gem_request_get_seqno(obj->last_write_req); + err->rseqno[i] = __active_get_seqno(&obj->last_read[i]); + err->wseqno = __active_get_seqno(&obj->last_write); + err->engine = __active_get_engine_id(&obj->last_write); + err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; - err->fence_reg = obj->fence_reg; - err->pinned = 0; - if (i915_gem_obj_is_pinned(obj)) - err->pinned = 1; - err->tiling = obj->tiling_mode; + err->fence_reg = vma->fence ? vma->fence->id : -1; + err->tiling = i915_gem_object_get_tiling(obj); err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; err->userptr = obj->userptr.mm != NULL; - err->ring = obj->last_write_req ? - i915_gem_request_get_engine(obj->last_write_req)->id : -1; err->cache_level = obj->cache_level; } -static u32 capture_active_bo(struct drm_i915_error_buffer *err, - int count, struct list_head *head) +static u32 capture_error_bo(struct drm_i915_error_buffer *err, + int count, struct list_head *head, + bool pinned_only) { struct i915_vma *vma; int i = 0; list_for_each_entry(vma, head, vm_link) { + if (pinned_only && !i915_vma_is_pinned(vma)) + continue; + capture_bo(err++, vma); if (++i == count) break; @@ -782,28 +829,6 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err, return i; } -static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, - int count, struct list_head *head, - struct i915_address_space *vm) -{ - struct drm_i915_gem_object *obj; - struct drm_i915_error_buffer * const first = err; - struct drm_i915_error_buffer * const last = err + count; - - list_for_each_entry(obj, head, global_list) { - struct i915_vma *vma; - - if (err == last) - break; - - list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->vm == vm && vma->pin_count > 0) - capture_bo(err++, vma); - } - - return err - first; -} - /* Generate a semi-unique error code. The code is not meant to have meaning, The * code's only purpose is to try to prevent false duplicated bug reports by * grossly estimating a GPU error state. @@ -815,7 +840,7 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, */ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error, - int *ring_id) + int *engine_id) { uint32_t error_code = 0; int i; @@ -826,11 +851,11 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, * strictly a client bug. Use instdone to differentiate those some. */ for (i = 0; i < I915_NUM_ENGINES; i++) { - if (error->ring[i].hangcheck_action == HANGCHECK_HUNG) { - if (ring_id) - *ring_id = i; + if (error->engine[i].hangcheck_action == HANGCHECK_HUNG) { + if (engine_id) + *engine_id = i; - return error->ring[i].ipehr ^ error->ring[i].instdone; + return error->engine[i].ipehr ^ error->engine[i].instdone; } } @@ -855,22 +880,17 @@ static void i915_gem_record_fences(struct drm_i915_private *dev_priv, } -static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, +static void gen8_record_semaphore_state(struct drm_i915_error_state *error, struct intel_engine_cs *engine, - struct drm_i915_error_ring *ering) + struct drm_i915_error_engine *ee) { + struct drm_i915_private *dev_priv = engine->i915; struct intel_engine_cs *to; enum intel_engine_id id; - if (!i915_semaphore_is_enabled(dev_priv)) + if (!error->semaphore) return; - if (!error->semaphore_obj) - error->semaphore_obj = - i915_error_ggtt_object_create(dev_priv, - dev_priv->semaphore_obj); - for_each_engine_id(to, dev_priv, id) { int idx; u16 signal_offset; @@ -879,44 +899,52 @@ static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv, if (engine == to) continue; - signal_offset = (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) - / 4; - tmp = error->semaphore_obj->pages[0]; - idx = intel_ring_sync_index(engine, to); + signal_offset = + (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4; + tmp = error->semaphore->pages[0]; + idx = intel_engine_sync_index(engine, to); - ering->semaphore_mboxes[idx] = tmp[signal_offset]; - ering->semaphore_seqno[idx] = engine->semaphore.sync_seqno[idx]; + ee->semaphore_mboxes[idx] = tmp[signal_offset]; + ee->semaphore_seqno[idx] = engine->semaphore.sync_seqno[idx]; } } -static void gen6_record_semaphore_state(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine, - struct drm_i915_error_ring *ering) +static void gen6_record_semaphore_state(struct intel_engine_cs *engine, + struct drm_i915_error_engine *ee) { - ering->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base)); - ering->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base)); - ering->semaphore_seqno[0] = engine->semaphore.sync_seqno[0]; - ering->semaphore_seqno[1] = engine->semaphore.sync_seqno[1]; + struct drm_i915_private *dev_priv = engine->i915; + + ee->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base)); + ee->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base)); + ee->semaphore_seqno[0] = engine->semaphore.sync_seqno[0]; + ee->semaphore_seqno[1] = engine->semaphore.sync_seqno[1]; if (HAS_VEBOX(dev_priv)) { - ering->semaphore_mboxes[2] = + ee->semaphore_mboxes[2] = I915_READ(RING_SYNC_2(engine->mmio_base)); - ering->semaphore_seqno[2] = engine->semaphore.sync_seqno[2]; + ee->semaphore_seqno[2] = engine->semaphore.sync_seqno[2]; } } -static void engine_record_waiters(struct intel_engine_cs *engine, - struct drm_i915_error_ring *ering) +static void error_record_engine_waiters(struct intel_engine_cs *engine, + struct drm_i915_error_engine *ee) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct drm_i915_error_waiter *waiter; struct rb_node *rb; int count; - ering->num_waiters = 0; - ering->waiters = NULL; + ee->num_waiters = 0; + ee->waiters = NULL; + + if (RB_EMPTY_ROOT(&b->waiters)) + return; + + if (!spin_trylock(&b->lock)) { + ee->waiters = ERR_PTR(-EDEADLK); + return; + } - spin_lock(&b->lock); count = 0; for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) count++; @@ -930,9 +958,13 @@ static void engine_record_waiters(struct intel_engine_cs *engine, if (!waiter) return; - ering->waiters = waiter; + if (!spin_trylock(&b->lock)) { + kfree(waiter); + ee->waiters = ERR_PTR(-EDEADLK); + return; + } - spin_lock(&b->lock); + ee->waiters = waiter; for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = container_of(rb, typeof(*w), node); @@ -941,57 +973,59 @@ static void engine_record_waiters(struct intel_engine_cs *engine, waiter->seqno = w->seqno; waiter++; - if (++ering->num_waiters == count) + if (++ee->num_waiters == count) break; } spin_unlock(&b->lock); } -static void i915_record_ring_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, - struct intel_engine_cs *engine, - struct drm_i915_error_ring *ering) +static void error_record_engine_registers(struct drm_i915_error_state *error, + struct intel_engine_cs *engine, + struct drm_i915_error_engine *ee) { + struct drm_i915_private *dev_priv = engine->i915; + if (INTEL_GEN(dev_priv) >= 6) { - ering->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); - ering->fault_reg = I915_READ(RING_FAULT_REG(engine)); + ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); + ee->fault_reg = I915_READ(RING_FAULT_REG(engine)); if (INTEL_GEN(dev_priv) >= 8) - gen8_record_semaphore_state(dev_priv, error, engine, - ering); + gen8_record_semaphore_state(error, engine, ee); else - gen6_record_semaphore_state(dev_priv, engine, ering); + gen6_record_semaphore_state(engine, ee); } if (INTEL_GEN(dev_priv) >= 4) { - ering->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base)); - ering->ipeir = I915_READ(RING_IPEIR(engine->mmio_base)); - ering->ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); - ering->instdone = I915_READ(RING_INSTDONE(engine->mmio_base)); - ering->instps = I915_READ(RING_INSTPS(engine->mmio_base)); - ering->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); + ee->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base)); + ee->ipeir = I915_READ(RING_IPEIR(engine->mmio_base)); + ee->ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); + ee->instdone = I915_READ(RING_INSTDONE(engine->mmio_base)); + ee->instps = I915_READ(RING_INSTPS(engine->mmio_base)); + ee->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); if (INTEL_GEN(dev_priv) >= 8) { - ering->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32; - ering->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32; + ee->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32; + ee->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32; } - ering->bbstate = I915_READ(RING_BBSTATE(engine->mmio_base)); + ee->bbstate = I915_READ(RING_BBSTATE(engine->mmio_base)); } else { - ering->faddr = I915_READ(DMA_FADD_I8XX); - ering->ipeir = I915_READ(IPEIR); - ering->ipehr = I915_READ(IPEHR); - ering->instdone = I915_READ(GEN2_INSTDONE); + ee->faddr = I915_READ(DMA_FADD_I8XX); + ee->ipeir = I915_READ(IPEIR); + ee->ipehr = I915_READ(IPEHR); + ee->instdone = I915_READ(GEN2_INSTDONE); } - ering->waiting = intel_engine_has_waiter(engine); - ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); - ering->acthd = intel_ring_get_active_head(engine); - ering->seqno = intel_engine_get_seqno(engine); - ering->last_seqno = engine->last_submitted_seqno; - ering->start = I915_READ_START(engine); - ering->head = I915_READ_HEAD(engine); - ering->tail = I915_READ_TAIL(engine); - ering->ctl = I915_READ_CTL(engine); - - if (I915_NEED_GFX_HWS(dev_priv)) { + ee->waiting = intel_engine_has_waiter(engine); + ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); + ee->acthd = intel_engine_get_active_head(engine); + ee->seqno = intel_engine_get_seqno(engine); + ee->last_seqno = engine->last_submitted_seqno; + ee->start = I915_READ_START(engine); + ee->head = I915_READ_HEAD(engine); + ee->tail = I915_READ_TAIL(engine); + ee->ctl = I915_READ_CTL(engine); + if (INTEL_GEN(dev_priv) > 2) + ee->mode = I915_READ_MODE(engine); + + if (!HWS_NEEDS_PHYSICAL(dev_priv)) { i915_reg_t mmio; if (IS_GEN7(dev_priv)) { @@ -1017,107 +1051,150 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv, mmio = RING_HWS_PGA(engine->mmio_base); } - ering->hws = I915_READ(mmio); + ee->hws = I915_READ(mmio); } - ering->hangcheck_score = engine->hangcheck.score; - ering->hangcheck_action = engine->hangcheck.action; + ee->hangcheck_score = engine->hangcheck.score; + ee->hangcheck_action = engine->hangcheck.action; if (USES_PPGTT(dev_priv)) { int i; - ering->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine)); + ee->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine)); if (IS_GEN6(dev_priv)) - ering->vm_info.pp_dir_base = + ee->vm_info.pp_dir_base = I915_READ(RING_PP_DIR_BASE_READ(engine)); else if (IS_GEN7(dev_priv)) - ering->vm_info.pp_dir_base = + ee->vm_info.pp_dir_base = I915_READ(RING_PP_DIR_BASE(engine)); else if (INTEL_GEN(dev_priv) >= 8) for (i = 0; i < 4; i++) { - ering->vm_info.pdp[i] = + ee->vm_info.pdp[i] = I915_READ(GEN8_RING_PDP_UDW(engine, i)); - ering->vm_info.pdp[i] <<= 32; - ering->vm_info.pdp[i] |= + ee->vm_info.pdp[i] <<= 32; + ee->vm_info.pdp[i] |= I915_READ(GEN8_RING_PDP_LDW(engine, i)); } } } - -static void i915_gem_record_active_context(struct intel_engine_cs *engine, - struct drm_i915_error_state *error, - struct drm_i915_error_ring *ering) +static void engine_record_requests(struct intel_engine_cs *engine, + struct drm_i915_gem_request *first, + struct drm_i915_error_engine *ee) { - struct drm_i915_private *dev_priv = engine->i915; - struct drm_i915_gem_object *obj; + struct drm_i915_gem_request *request; + int count; - /* Currently render ring is the only HW context user */ - if (engine->id != RCS || !error->ccid) + count = 0; + request = first; + list_for_each_entry_from(request, &engine->request_list, link) + count++; + if (!count) return; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - if (!i915_gem_obj_ggtt_bound(obj)) - continue; + ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC); + if (!ee->requests) + return; - if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) { - ering->ctx = i915_error_ggtt_object_create(dev_priv, obj); + ee->num_requests = count; + + count = 0; + request = first; + list_for_each_entry_from(request, &engine->request_list, link) { + struct drm_i915_error_request *erq; + + if (count >= ee->num_requests) { + /* + * If the ring request list was changed in + * between the point where the error request + * list was created and dimensioned and this + * point then just exit early to avoid crashes. + * + * We don't need to communicate that the + * request list changed state during error + * state capture and that the error state is + * slightly incorrect as a consequence since we + * are typically only interested in the request + * list state at the point of error state + * capture, not in any changes happening during + * the capture. + */ break; } + + erq = &ee->requests[count++]; + erq->seqno = request->fence.seqno; + erq->jiffies = request->emitted_jiffies; + erq->head = request->head; + erq->tail = request->tail; + + rcu_read_lock(); + erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0; + rcu_read_unlock(); } + ee->num_requests = count; } static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_i915_gem_request *request; - int i, count; + int i; + + error->semaphore = + i915_error_object_create(dev_priv, dev_priv->semaphore); for (i = 0; i < I915_NUM_ENGINES; i++) { struct intel_engine_cs *engine = &dev_priv->engine[i]; + struct drm_i915_error_engine *ee = &error->engine[i]; + struct drm_i915_gem_request *request; - error->ring[i].pid = -1; + ee->pid = -1; + ee->engine_id = -1; if (!intel_engine_initialized(engine)) continue; - error->ring[i].valid = true; + ee->engine_id = i; - i915_record_ring_state(dev_priv, error, engine, &error->ring[i]); - engine_record_waiters(engine, &error->ring[i]); + error_record_engine_registers(error, engine, ee); + error_record_engine_waiters(engine, ee); request = i915_gem_find_active_request(engine); if (request) { - struct i915_address_space *vm; - struct intel_ringbuffer *rb; + struct intel_ring *ring; + struct pid *pid; - vm = request->ctx->ppgtt ? + ee->vm = request->ctx->ppgtt ? &request->ctx->ppgtt->base : &ggtt->base; /* We need to copy these to an anonymous buffer * as the simplest method to avoid being overwritten * by userspace. */ - error->ring[i].batchbuffer = + ee->batchbuffer = i915_error_object_create(dev_priv, - request->batch_obj, - vm); + request->batch); if (HAS_BROKEN_CS_TLB(dev_priv)) - error->ring[i].wa_batchbuffer = - i915_error_ggtt_object_create(dev_priv, - engine->scratch.obj); + ee->wa_batchbuffer = + i915_error_object_create(dev_priv, + engine->scratch); + + ee->ctx = + i915_error_object_create(dev_priv, + request->ctx->engine[i].state); - if (request->pid) { + pid = request->ctx->pid; + if (pid) { struct task_struct *task; rcu_read_lock(); - task = pid_task(request->pid, PIDTYPE_PID); + task = pid_task(pid, PIDTYPE_PID); if (task) { - strcpy(error->ring[i].comm, task->comm); - error->ring[i].pid = task->pid; + strcpy(ee->comm, task->comm); + ee->pid = task->pid; } rcu_read_unlock(); } @@ -1125,153 +1202,106 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, error->simulated |= request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE; - rb = request->ringbuf; - error->ring[i].cpu_ring_head = rb->head; - error->ring[i].cpu_ring_tail = rb->tail; - error->ring[i].ringbuffer = - i915_error_ggtt_object_create(dev_priv, - rb->obj); - } - - error->ring[i].hws_page = - i915_error_ggtt_object_create(dev_priv, - engine->status_page.obj); + ring = request->ring; + ee->cpu_ring_head = ring->head; + ee->cpu_ring_tail = ring->tail; + ee->ringbuffer = + i915_error_object_create(dev_priv, ring->vma); - if (engine->wa_ctx.obj) { - error->ring[i].wa_ctx = - i915_error_ggtt_object_create(dev_priv, - engine->wa_ctx.obj); + engine_record_requests(engine, request, ee); } - i915_gem_record_active_context(engine, error, &error->ring[i]); - - count = 0; - list_for_each_entry(request, &engine->request_list, list) - count++; - - error->ring[i].num_requests = count; - error->ring[i].requests = - kcalloc(count, sizeof(*error->ring[i].requests), - GFP_ATOMIC); - if (error->ring[i].requests == NULL) { - error->ring[i].num_requests = 0; - continue; - } - - count = 0; - list_for_each_entry(request, &engine->request_list, list) { - struct drm_i915_error_request *erq; - - if (count >= error->ring[i].num_requests) { - /* - * If the ring request list was changed in - * between the point where the error request - * list was created and dimensioned and this - * point then just exit early to avoid crashes. - * - * We don't need to communicate that the - * request list changed state during error - * state capture and that the error state is - * slightly incorrect as a consequence since we - * are typically only interested in the request - * list state at the point of error state - * capture, not in any changes happening during - * the capture. - */ - break; - } + ee->hws_page = + i915_error_object_create(dev_priv, + engine->status_page.vma); - erq = &error->ring[i].requests[count++]; - erq->seqno = request->seqno; - erq->jiffies = request->emitted_jiffies; - erq->tail = request->postfix; - } + ee->wa_ctx = + i915_error_object_create(dev_priv, engine->wa_ctx.vma); } } -/* FIXME: Since pin count/bound list is global, we duplicate what we capture per - * VM. - */ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error, struct i915_address_space *vm, - const int ndx) + int idx) { - struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL; - struct drm_i915_gem_object *obj; + struct drm_i915_error_buffer *active_bo; struct i915_vma *vma; - int i; + int count; - i = 0; + count = 0; list_for_each_entry(vma, &vm->active_list, vm_link) - i++; - error->active_bo_count[ndx] = i; - - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->vm == vm && vma->pin_count > 0) - i++; - } - error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx]; - - if (i) { - active_bo = kcalloc(i, sizeof(*active_bo), GFP_ATOMIC); - if (active_bo) - pinned_bo = active_bo + error->active_bo_count[ndx]; - } + count++; + active_bo = NULL; + if (count) + active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC); if (active_bo) - error->active_bo_count[ndx] = - capture_active_bo(active_bo, - error->active_bo_count[ndx], - &vm->active_list); - - if (pinned_bo) - error->pinned_bo_count[ndx] = - capture_pinned_bo(pinned_bo, - error->pinned_bo_count[ndx], - &dev_priv->mm.bound_list, vm); - error->active_bo[ndx] = active_bo; - error->pinned_bo[ndx] = pinned_bo; + count = capture_error_bo(active_bo, count, &vm->active_list, false); + else + count = 0; + + error->active_vm[idx] = vm; + error->active_bo[idx] = active_bo; + error->active_bo_count[idx] = count; } -static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) +static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, + struct drm_i915_error_state *error) { - struct i915_address_space *vm; - int cnt = 0, i = 0; - - list_for_each_entry(vm, &dev_priv->vm_list, global_link) - cnt++; - - error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC); - error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC); - error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count), - GFP_ATOMIC); - error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count), - GFP_ATOMIC); - - if (error->active_bo == NULL || - error->pinned_bo == NULL || - error->active_bo_count == NULL || - error->pinned_bo_count == NULL) { - kfree(error->active_bo); - kfree(error->active_bo_count); - kfree(error->pinned_bo); - kfree(error->pinned_bo_count); - - error->active_bo = NULL; - error->active_bo_count = NULL; - error->pinned_bo = NULL; - error->pinned_bo_count = NULL; - } else { - list_for_each_entry(vm, &dev_priv->vm_list, global_link) - i915_gem_capture_vm(dev_priv, error, vm, i++); + int cnt = 0, i, j; - error->vm_count = cnt; + BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo)); + BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm)); + BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count)); + + /* Scan each engine looking for unique active contexts/vm */ + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + struct drm_i915_error_engine *ee = &error->engine[i]; + bool found; + + if (!ee->vm) + continue; + + found = false; + for (j = 0; j < i && !found; j++) + found = error->engine[j].vm == ee->vm; + if (!found) + i915_gem_capture_vm(dev_priv, error, ee->vm, cnt++); } } +static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, + struct drm_i915_error_state *error) +{ + struct i915_address_space *vm = &dev_priv->ggtt.base; + struct drm_i915_error_buffer *bo; + struct i915_vma *vma; + int count_inactive, count_active; + + count_inactive = 0; + list_for_each_entry(vma, &vm->active_list, vm_link) + count_inactive++; + + count_active = 0; + list_for_each_entry(vma, &vm->inactive_list, vm_link) + count_active++; + + bo = NULL; + if (count_inactive + count_active) + bo = kcalloc(count_inactive + count_active, + sizeof(*bo), GFP_ATOMIC); + if (!bo) + return; + + count_inactive = capture_error_bo(bo, count_inactive, + &vm->active_list, true); + count_active = capture_error_bo(bo + count_inactive, count_active, + &vm->inactive_list, true); + error->pinned_bo_count = count_inactive + count_active; + error->pinned_bo = bo; +} + /* Capture all registers which don't fit into another category. */ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error) @@ -1352,20 +1382,20 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, const char *error_msg) { u32 ecode; - int ring_id = -1, len; + int engine_id = -1, len; - ecode = i915_error_generate_code(dev_priv, error, &ring_id); + ecode = i915_error_generate_code(dev_priv, error, &engine_id); len = scnprintf(error->error_msg, sizeof(error->error_msg), "GPU HANG: ecode %d:%d:0x%08x", - INTEL_GEN(dev_priv), ring_id, ecode); + INTEL_GEN(dev_priv), engine_id, ecode); - if (ring_id != -1 && error->ring[ring_id].pid != -1) + if (engine_id != -1 && error->engine[engine_id].pid != -1) len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->ring[ring_id].comm, - error->ring[ring_id].pid); + error->engine[engine_id].comm, + error->engine[engine_id].pid); scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", reason: %s, action: %s", @@ -1382,6 +1412,10 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, #endif error->reset_count = i915_reset_count(&dev_priv->gpu_error); error->suspend_count = dev_priv->suspend_count; + + memcpy(&error->device_info, + INTEL_INFO(dev_priv), + sizeof(error->device_info)); } /** @@ -1415,9 +1449,10 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, i915_capture_gen_state(dev_priv, error); i915_capture_reg_state(dev_priv, error); - i915_gem_capture_buffers(dev_priv, error); i915_gem_record_fences(dev_priv, error); i915_gem_record_rings(dev_priv, error); + i915_capture_active_buffers(dev_priv, error); + i915_capture_pinned_buffers(dev_priv, error); do_gettimeofday(&error->time); diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h index cf5a65be4fe0..a47e1e4aec03 100644 --- a/drivers/gpu/drm/i915/i915_guc_reg.h +++ b/drivers/gpu/drm/i915/i915_guc_reg.h @@ -103,9 +103,6 @@ #define HOST2GUC_INTERRUPT _MMIO(0xc4c8) #define HOST2GUC_TRIGGER (1<<0) -#define DRBMISC1 0x1984 -#define DOORBELL_ENABLE (1<<0) - #define GEN8_DRBREGL(x) _MMIO(0x1000 + (x) * 8) #define GEN8_DRB_VALID (1<<0) #define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 2112e029db6a..3106dcc06fe9 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -59,7 +59,7 @@ * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which * represents in-order queue. The kernel driver packs ring tail pointer and an * ELSP context descriptor dword into Work Item. - * See guc_add_workqueue_item() + * See guc_wq_item_append() * */ @@ -114,10 +114,8 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) if (ret != -ETIMEDOUT) ret = -EIO; - DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " - "status=0x%08X response=0x%08X\n", - data[0], ret, status, - I915_READ(SOFT_SCRATCH(15))); + DRM_WARN("Action 0x%X failed; ret=%d status=0x%08X response=0x%08X\n", + data[0], ret, status, I915_READ(SOFT_SCRATCH(15))); dev_priv->guc.action_fail += 1; dev_priv->guc.action_err = ret; @@ -183,7 +181,7 @@ static int guc_update_doorbell_id(struct intel_guc *guc, struct i915_guc_client *client, u16 new_id) { - struct sg_table *sg = guc->ctx_pool_obj->pages; + struct sg_table *sg = guc->ctx_pool_vma->pages; void *doorbell_bitmap = guc->doorbell_bitmap; struct guc_doorbell_info *doorbell; struct guc_context_desc desc; @@ -290,7 +288,7 @@ static uint32_t select_doorbell_cacheline(struct intel_guc *guc) /* * Initialise the process descriptor shared with the GuC firmware. */ -static void guc_init_proc_desc(struct intel_guc *guc, +static void guc_proc_desc_init(struct intel_guc *guc, struct i915_guc_client *client) { struct guc_process_desc *desc; @@ -322,15 +320,15 @@ static void guc_init_proc_desc(struct intel_guc *guc, * write queue, etc). */ -static void guc_init_ctx_desc(struct intel_guc *guc, +static void guc_ctx_desc_init(struct intel_guc *guc, struct i915_guc_client *client) { - struct drm_i915_gem_object *client_obj = client->client_obj; struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_engine_cs *engine; struct i915_gem_context *ctx = client->owner; struct guc_context_desc desc; struct sg_table *sg; + unsigned int tmp; u32 gfx_addr; memset(&desc, 0, sizeof(desc)); @@ -340,10 +338,10 @@ static void guc_init_ctx_desc(struct intel_guc *guc, desc.priority = client->priority; desc.db_id = client->doorbell_id; - for_each_engine(engine, dev_priv) { + for_each_engine_masked(engine, dev_priv, client->engines, tmp) { struct intel_context *ce = &ctx->engine[engine->id]; - struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id]; - struct drm_i915_gem_object *obj; + uint32_t guc_engine_id = engine->guc_id; + struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; /* TODO: We have a design issue to be solved here. Only when we * receive the first batch, we know which engine is used by the @@ -358,30 +356,29 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->context_desc = lower_32_bits(ce->lrc_desc); /* The state page is after PPHWSP */ - gfx_addr = i915_gem_obj_ggtt_offset(ce->state); - lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; + lrc->ring_lcra = + i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | - (engine->guc_id << GUC_ELC_ENGINE_OFFSET); - - obj = ce->ringbuf->obj; - gfx_addr = i915_gem_obj_ggtt_offset(obj); + (guc_engine_id << GUC_ELC_ENGINE_OFFSET); - lrc->ring_begin = gfx_addr; - lrc->ring_end = gfx_addr + obj->base.size - 1; - lrc->ring_next_free_location = gfx_addr; + lrc->ring_begin = i915_ggtt_offset(ce->ring->vma); + lrc->ring_end = lrc->ring_begin + ce->ring->size - 1; + lrc->ring_next_free_location = lrc->ring_begin; lrc->ring_current_tail_pointer_value = 0; - desc.engines_used |= (1 << engine->guc_id); + desc.engines_used |= (1 << guc_engine_id); } + DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", + client->engines, desc.engines_used); WARN_ON(desc.engines_used == 0); /* * The doorbell, process descriptor, and workqueue are all parts * of the client object, which the GuC will reference via the GGTT */ - gfx_addr = i915_gem_obj_ggtt_offset(client_obj); - desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) + + gfx_addr = i915_ggtt_offset(client->vma); + desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; desc.db_trigger_cpu = (uintptr_t)client->client_base + client->doorbell_offset; @@ -397,12 +394,12 @@ static void guc_init_ctx_desc(struct intel_guc *guc, desc.desc_private = (uintptr_t)client; /* Pool context is pinned already */ - sg = guc->ctx_pool_obj->pages; + sg = guc->ctx_pool_vma->pages; sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), sizeof(desc) * client->ctx_index); } -static void guc_fini_ctx_desc(struct intel_guc *guc, +static void guc_ctx_desc_fini(struct intel_guc *guc, struct i915_guc_client *client) { struct guc_context_desc desc; @@ -410,13 +407,13 @@ static void guc_fini_ctx_desc(struct intel_guc *guc, memset(&desc, 0, sizeof(desc)); - sg = guc->ctx_pool_obj->pages; + sg = guc->ctx_pool_vma->pages; sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), sizeof(desc) * client->ctx_index); } /** - * i915_guc_wq_check_space() - check that the GuC can accept a request + * i915_guc_wq_reserve() - reserve space in the GuC's workqueue * @request: request associated with the commands * * Return: 0 if space is available @@ -424,39 +421,56 @@ static void guc_fini_ctx_desc(struct intel_guc *guc, * * This function must be called (and must return 0) before a request * is submitted to the GuC via i915_guc_submit() below. Once a result - * of 0 has been returned, it remains valid until (but only until) - * the next call to submit(). + * of 0 has been returned, it must be balanced by a corresponding + * call to submit(). * - * This precheck allows the caller to determine in advance that space + * Reservation allows the caller to determine in advance that space * will be available for the next submission before committing resources * to it, and helps avoid late failures with complicated recovery paths. */ -int i915_guc_wq_check_space(struct drm_i915_gem_request *request) +int i915_guc_wq_reserve(struct drm_i915_gem_request *request) { const size_t wqi_size = sizeof(struct guc_wq_item); struct i915_guc_client *gc = request->i915->guc.execbuf_client; - struct guc_process_desc *desc; + struct guc_process_desc *desc = gc->client_base + gc->proc_desc_offset; u32 freespace; + int ret; - GEM_BUG_ON(gc == NULL); + spin_lock(&gc->wq_lock); + freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size); + freespace -= gc->wq_rsvd; + if (likely(freespace >= wqi_size)) { + gc->wq_rsvd += wqi_size; + ret = 0; + } else { + gc->no_wq_space++; + ret = -EAGAIN; + } + spin_unlock(&gc->wq_lock); - desc = gc->client_base + gc->proc_desc_offset; + return ret; +} - freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size); - if (likely(freespace >= wqi_size)) - return 0; +void i915_guc_wq_unreserve(struct drm_i915_gem_request *request) +{ + const size_t wqi_size = sizeof(struct guc_wq_item); + struct i915_guc_client *gc = request->i915->guc.execbuf_client; - gc->no_wq_space += 1; + GEM_BUG_ON(READ_ONCE(gc->wq_rsvd) < wqi_size); - return -EAGAIN; + spin_lock(&gc->wq_lock); + gc->wq_rsvd -= wqi_size; + spin_unlock(&gc->wq_lock); } -static void guc_add_workqueue_item(struct i915_guc_client *gc, - struct drm_i915_gem_request *rq) +/* Construct a Work Item and append it to the GuC's Work Queue */ +static void guc_wq_item_append(struct i915_guc_client *gc, + struct drm_i915_gem_request *rq) { /* wqi_len is in DWords, and does not include the one-word header */ const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size/sizeof(u32) - 1; + struct intel_engine_cs *engine = rq->engine; struct guc_process_desc *desc; struct guc_wq_item *wqi; void *base; @@ -464,7 +478,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, desc = gc->client_base + gc->proc_desc_offset; - /* Free space is guaranteed, see i915_guc_wq_check_space() above */ + /* Free space is guaranteed, see i915_guc_wq_reserve() above */ freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size); GEM_BUG_ON(freespace < wqi_size); @@ -482,31 +496,32 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, * workqueue buffer dw by dw. */ BUILD_BUG_ON(wqi_size != 16); + GEM_BUG_ON(gc->wq_rsvd < wqi_size); /* postincrement WQ tail for next time */ wq_off = gc->wq_tail; + GEM_BUG_ON(wq_off & (wqi_size - 1)); gc->wq_tail += wqi_size; gc->wq_tail &= gc->wq_size - 1; - GEM_BUG_ON(wq_off & (wqi_size - 1)); + gc->wq_rsvd -= wqi_size; /* WQ starts from the page after doorbell / process_desc */ wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT; wq_off &= PAGE_SIZE - 1; - base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page)); + base = kmap_atomic(i915_gem_object_get_page(gc->vma->obj, wq_page)); wqi = (struct guc_wq_item *)((char *)base + wq_off); /* Now fill in the 4-word work queue item */ wqi->header = WQ_TYPE_INORDER | (wqi_len << WQ_LEN_SHIFT) | - (rq->engine->guc_id << WQ_TARGET_SHIFT) | + (engine->guc_id << WQ_TARGET_SHIFT) | WQ_NO_WCFLUSH_WAIT; /* The GuC wants only the low-order word of the context descriptor */ - wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, - rq->engine); + wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine); wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; - wqi->fence_id = rq->seqno; + wqi->fence_id = rq->fence.seqno; kunmap_atomic(base); } @@ -553,8 +568,8 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) if (db_ret.db_status == GUC_DOORBELL_DISABLED) break; - DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", - db_cmp.cookie, db_ret.cookie); + DRM_WARN("Cookie mismatch. Expected %d, found %d\n", + db_cmp.cookie, db_ret.cookie); /* update the cookie to newly read cookie from GuC */ db_cmp.cookie = db_ret.cookie; @@ -573,26 +588,26 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) * Return: 0 on success, otherwise an errno. * (Note: nonzero really shouldn't happen!) * - * The caller must have already called i915_guc_wq_check_space() above - * with a result of 0 (success) since the last request submission. This - * guarantees that there is space in the work queue for the new request, - * so enqueuing the item cannot fail. + * The caller must have already called i915_guc_wq_reserve() above with + * a result of 0 (success), guaranteeing that there is space in the work + * queue for the new request, so enqueuing the item cannot fail. * * Bad Things Will Happen if the caller violates this protocol e.g. calls - * submit() when check() says there's no space, or calls submit() multiple - * times with no intervening check(). + * submit() when _reserve() says there's no space, or calls _submit() + * a different number of times from (successful) calls to _reserve(). * * The only error here arises if the doorbell hardware isn't functioning * as expected, which really shouln't happen. */ -int i915_guc_submit(struct drm_i915_gem_request *rq) +static void i915_guc_submit(struct drm_i915_gem_request *rq) { unsigned int engine_id = rq->engine->id; struct intel_guc *guc = &rq->i915->guc; struct i915_guc_client *client = guc->execbuf_client; int b_ret; - guc_add_workqueue_item(client, rq); + spin_lock(&client->wq_lock); + guc_wq_item_append(client, rq); b_ret = guc_ring_doorbell(client); client->submissions[engine_id] += 1; @@ -601,9 +616,8 @@ int i915_guc_submit(struct drm_i915_gem_request *rq) client->b_fail += 1; guc->submissions[engine_id] += 1; - guc->last_seqno[engine_id] = rq->seqno; - - return b_ret; + guc->last_seqno[engine_id] = rq->fence.seqno; + spin_unlock(&client->wq_lock); } /* @@ -613,55 +627,48 @@ int i915_guc_submit(struct drm_i915_gem_request *rq) */ /** - * gem_allocate_guc_obj() - Allocate gem object for GuC usage - * @dev_priv: driver private data structure - * @size: size of object + * guc_allocate_vma() - Allocate a GGTT VMA for GuC usage + * @guc: the guc + * @size: size of area to allocate (both virtual space and memory) * - * This is a wrapper to create a gem obj. In order to use it inside GuC, the - * object needs to be pinned lifetime. Also we must pin it to gtt space other - * than [0, GUC_WOPCM_TOP) because this range is reserved inside GuC. + * This is a wrapper to create an object for use with the GuC. In order to + * use it inside the GuC, an object needs to be pinned lifetime, so we allocate + * both some backing storage and a range inside the Global GTT. We must pin + * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that + * range is reserved inside GuC. * - * Return: A drm_i915_gem_object if successful, otherwise NULL. + * Return: A i915_vma if successful, otherwise an ERR_PTR. */ -static struct drm_i915_gem_object * -gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size) +static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size) { + struct drm_i915_private *dev_priv = guc_to_i915(guc); struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; obj = i915_gem_object_create(&dev_priv->drm, size); if (IS_ERR(obj)) - return NULL; + return ERR_CAST(obj); - if (i915_gem_object_get_pages(obj)) { - drm_gem_object_unreference(&obj->base); - return NULL; - } + vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err; - if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) { - drm_gem_object_unreference(&obj->base); - return NULL; + ret = i915_vma_pin(vma, 0, PAGE_SIZE, + PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + if (ret) { + vma = ERR_PTR(ret); + goto err; } /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); - return obj; -} + return vma; -/** - * gem_release_guc_obj() - Release gem object allocated for GuC usage - * @obj: gem obj to be released - */ -static void gem_release_guc_obj(struct drm_i915_gem_object *obj) -{ - if (!obj) - return; - - if (i915_gem_obj_is_pinned(obj)) - i915_gem_object_ggtt_unpin(obj); - - drm_gem_object_unreference(&obj->base); +err: + i915_gem_object_put(obj); + return vma; } static void @@ -688,61 +695,74 @@ guc_client_free(struct drm_i915_private *dev_priv, kunmap(kmap_to_page(client->client_base)); } - gem_release_guc_obj(client->client_obj); + i915_vma_unpin_and_release(&client->vma); if (client->ctx_index != GUC_INVALID_CTX_ID) { - guc_fini_ctx_desc(guc, client); + guc_ctx_desc_fini(guc, client); ida_simple_remove(&guc->ctx_ids, client->ctx_index); } kfree(client); } +/* Check that a doorbell register is in the expected state */ +static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + i915_reg_t drbreg = GEN8_DRBREGL(db_id); + uint32_t value = I915_READ(drbreg); + bool enabled = (value & GUC_DOORBELL_ENABLED) != 0; + bool expected = test_bit(db_id, guc->doorbell_bitmap); + + if (enabled == expected) + return true; + + DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n", + db_id, drbreg.reg, value, + expected ? "active" : "inactive"); + + return false; +} + /* - * Borrow the first client to set up & tear down every doorbell + * Borrow the first client to set up & tear down each unused doorbell * in turn, to ensure that all doorbell h/w is (re)initialised. */ static void guc_init_doorbell_hw(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); struct i915_guc_client *client = guc->execbuf_client; - uint16_t db_id, i; - int err; + uint16_t db_id; + int i, err; + /* Save client's original doorbell selection */ db_id = client->doorbell_id; for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { - i915_reg_t drbreg = GEN8_DRBREGL(i); - u32 value = I915_READ(drbreg); + /* Skip if doorbell is OK */ + if (guc_doorbell_check(guc, i)) + continue; err = guc_update_doorbell_id(guc, client, i); - - /* Report update failure or unexpectedly active doorbell */ - if (err || (i != db_id && (value & GUC_DOORBELL_ENABLED))) - DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) was 0x%x, err %d\n", - i, drbreg.reg, value, err); + if (err) + DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n", + i, err); } /* Restore to original value */ err = guc_update_doorbell_id(guc, client, db_id); if (err) - DRM_ERROR("Failed to restore doorbell to %d, err %d\n", - db_id, err); + DRM_WARN("Failed to restore doorbell to %d, err %d\n", + db_id, err); - for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { - i915_reg_t drbreg = GEN8_DRBREGL(i); - u32 value = I915_READ(drbreg); - - if (i != db_id && (value & GUC_DOORBELL_ENABLED)) - DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) finally 0x%x\n", - i, drbreg.reg, value); - - } + /* Read back & verify all doorbell registers */ + for (i = 0; i < GUC_MAX_DOORBELLS; ++i) + (void)guc_doorbell_check(guc, i); } /** * guc_client_alloc() - Allocate an i915_guc_client * @dev_priv: driver private data structure + * @engines: The set of engines to enable for this client * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW * The kernel client to replace ExecList submission is created with * NORMAL priority. Priority of a client for scheduler can be HIGH, @@ -754,22 +774,24 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) */ static struct i915_guc_client * guc_client_alloc(struct drm_i915_private *dev_priv, + uint32_t engines, uint32_t priority, struct i915_gem_context *ctx) { struct i915_guc_client *client; struct intel_guc *guc = &dev_priv->guc; - struct drm_i915_gem_object *obj; + struct i915_vma *vma; uint16_t db_id; client = kzalloc(sizeof(*client), GFP_KERNEL); if (!client) return NULL; - client->doorbell_id = GUC_INVALID_DOORBELL_ID; - client->priority = priority; client->owner = ctx; client->guc = guc; + client->engines = engines; + client->priority = priority; + client->doorbell_id = GUC_INVALID_DOORBELL_ID; client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); @@ -779,13 +801,15 @@ guc_client_alloc(struct drm_i915_private *dev_priv, } /* The first page is doorbell/proc_desc. Two followed pages are wq. */ - obj = gem_allocate_guc_obj(dev_priv, GUC_DB_SIZE + GUC_WQ_SIZE); - if (!obj) + vma = guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE); + if (IS_ERR(vma)) goto err; /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ - client->client_obj = obj; - client->client_base = kmap(i915_gem_object_get_page(obj, 0)); + client->vma = vma; + client->client_base = kmap(i915_vma_first_page(vma)); + + spin_lock_init(&client->wq_lock); client->wq_offset = GUC_DB_SIZE; client->wq_size = GUC_WQ_SIZE; @@ -806,29 +830,26 @@ guc_client_alloc(struct drm_i915_private *dev_priv, else client->proc_desc_offset = (GUC_DB_SIZE / 2); - guc_init_proc_desc(guc, client); - guc_init_ctx_desc(guc, client); + guc_proc_desc_init(guc, client); + guc_ctx_desc_init(guc, client); if (guc_init_doorbell(guc, client, db_id)) goto err; - DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u\n", - priority, client, client->ctx_index); + DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n", + priority, client, client->engines, client->ctx_index); DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n", client->doorbell_id, client->doorbell_offset); return client; err: - DRM_ERROR("FAILED to create priority %u GuC client!\n", priority); - guc_client_free(dev_priv, client); return NULL; } -static void guc_create_log(struct intel_guc *guc) +static void guc_log_create(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct drm_i915_gem_object *obj; + struct i915_vma *vma; unsigned long offset; uint32_t size, flags; @@ -844,16 +865,16 @@ static void guc_create_log(struct intel_guc *guc) GUC_LOG_ISR_PAGES + 1 + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; - obj = guc->log_obj; - if (!obj) { - obj = gem_allocate_guc_obj(dev_priv, size); - if (!obj) { + vma = guc->log_vma; + if (!vma) { + vma = guc_allocate_vma(guc, size); + if (IS_ERR(vma)) { /* logging will be off */ i915.guc_log_level = -1; return; } - guc->log_obj = obj; + guc->log_vma = vma; } /* each allocated unit is a page */ @@ -862,11 +883,11 @@ static void guc_create_log(struct intel_guc *guc) (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); - offset = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; /* in pages */ + offset = i915_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; } -static void init_guc_policies(struct guc_policies *policies) +static void guc_policies_init(struct guc_policies *policies) { struct guc_policy *policy; u32 p, i; @@ -888,10 +909,10 @@ static void init_guc_policies(struct guc_policies *policies) policies->is_valid = 1; } -static void guc_create_ads(struct intel_guc *guc) +static void guc_addon_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct drm_i915_gem_object *obj; + struct i915_vma *vma; struct guc_ads *ads; struct guc_policies *policies; struct guc_mmio_reg_state *reg_state; @@ -904,16 +925,16 @@ static void guc_create_ads(struct intel_guc *guc) sizeof(struct guc_mmio_reg_state) + GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE; - obj = guc->ads_obj; - if (!obj) { - obj = gem_allocate_guc_obj(dev_priv, PAGE_ALIGN(size)); - if (!obj) + vma = guc->ads_vma; + if (!vma) { + vma = guc_allocate_vma(guc, PAGE_ALIGN(size)); + if (IS_ERR(vma)) return; - guc->ads_obj = obj; + guc->ads_vma = vma; } - page = i915_gem_object_get_page(obj, 0); + page = i915_vma_first_page(vma); ads = kmap(page); /* @@ -924,17 +945,17 @@ static void guc_create_ads(struct intel_guc *guc) * to find it. */ engine = &dev_priv->engine[RCS]; - ads->golden_context_lrca = engine->status_page.gfx_addr; + ads->golden_context_lrca = engine->status_page.ggtt_offset; for_each_engine(engine, dev_priv) ads->eng_state_size[engine->guc_id] = intel_lr_context_size(engine); /* GuC scheduling policies */ policies = (void *)ads + sizeof(struct guc_ads); - init_guc_policies(policies); + guc_policies_init(policies); - ads->scheduler_policies = i915_gem_obj_ggtt_offset(obj) + - sizeof(struct guc_ads); + ads->scheduler_policies = + i915_ggtt_offset(vma) + sizeof(struct guc_ads); /* MMIO reg state */ reg_state = (void *)policies + sizeof(struct guc_policies); @@ -966,6 +987,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize; const size_t gemsize = round_up(poolsize, PAGE_SIZE); struct intel_guc *guc = &dev_priv->guc; + struct i915_vma *vma; /* Wipe bitmap & delete client in case of reinitialisation */ bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS); @@ -974,16 +996,17 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) if (!i915.enable_guc_submission) return 0; /* not enabled */ - if (guc->ctx_pool_obj) + if (guc->ctx_pool_vma) return 0; /* already allocated */ - guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv, gemsize); - if (!guc->ctx_pool_obj) - return -ENOMEM; + vma = guc_allocate_vma(guc, gemsize); + if (IS_ERR(vma)) + return PTR_ERR(vma); + guc->ctx_pool_vma = vma; ida_init(&guc->ctx_ids); - guc_create_log(guc); - guc_create_ads(guc); + guc_log_create(guc); + guc_addon_create(guc); return 0; } @@ -992,13 +1015,16 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; struct i915_guc_client *client; + struct intel_engine_cs *engine; + struct drm_i915_gem_request *request; /* client for execbuf submission */ client = guc_client_alloc(dev_priv, + INTEL_INFO(dev_priv)->ring_mask, GUC_CTX_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (!client) { - DRM_ERROR("Failed to create execbuf guc_client\n"); + DRM_ERROR("Failed to create normal GuC client!\n"); return -ENOMEM; } @@ -1006,6 +1032,18 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) host2guc_sample_forcewake(guc, client); guc_init_doorbell_hw(guc); + /* Take over from manual control of ELSP (execlists) */ + for_each_engine(engine, dev_priv) { + engine->submit_request = i915_guc_submit; + + /* Replay the current set of previously submitted requests */ + list_for_each_entry(request, &engine->request_list, link) { + client->wq_rsvd += sizeof(struct guc_wq_item); + if (i915_sw_fence_done(&request->submit)) + i915_guc_submit(request); + } + } + return 0; } @@ -1013,6 +1051,12 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; + if (!guc->execbuf_client) + return; + + /* Revert back to manual ELSP submission */ + intel_execlists_enable_submission(dev_priv); + guc_client_free(dev_priv, guc->execbuf_client); guc->execbuf_client = NULL; } @@ -1021,16 +1065,12 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - gem_release_guc_obj(dev_priv->guc.ads_obj); - guc->ads_obj = NULL; - - gem_release_guc_obj(dev_priv->guc.log_obj); - guc->log_obj = NULL; + i915_vma_unpin_and_release(&guc->ads_vma); + i915_vma_unpin_and_release(&guc->log_vma); - if (guc->ctx_pool_obj) + if (guc->ctx_pool_vma) ida_destroy(&guc->ctx_ids); - gem_release_guc_obj(guc->ctx_pool_obj); - guc->ctx_pool_obj = NULL; + i915_vma_unpin_and_release(&guc->ctx_pool_vma); } /** @@ -1053,7 +1093,7 @@ int intel_guc_suspend(struct drm_device *dev) /* any value greater than GUC_POWER_D0 */ data[1] = GUC_POWER_D1; /* first page is shared data with GuC */ - data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state); + data[2] = i915_ggtt_offset(ctx->engine[RCS].state); return host2guc_action(guc, data, ARRAY_SIZE(data)); } @@ -1078,7 +1118,7 @@ int intel_guc_resume(struct drm_device *dev) data[0] = HOST2GUC_ACTION_EXIT_S_STATE; data[1] = GUC_POWER_D0; /* first page is shared data with GuC */ - data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state); + data[2] = i915_ggtt_offset(ctx->engine[RCS].state); return host2guc_action(guc, data, ARRAY_SIZE(data)); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1c2aec392412..3fc286cd1157 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -350,6 +350,9 @@ void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) { + if (READ_ONCE(dev_priv->rps.interrupts_enabled)) + return; + spin_lock_irq(&dev_priv->irq_lock); WARN_ON_ONCE(dev_priv->rps.pm_iir); WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); @@ -368,10 +371,13 @@ u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask) void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) { + if (!READ_ONCE(dev_priv->rps.interrupts_enabled)) + return; + spin_lock_irq(&dev_priv->irq_lock); dev_priv->rps.interrupts_enabled = false; - I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0)); + I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); __gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events); I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) & @@ -656,12 +662,6 @@ static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv) * of horizontal active on the first line of vertical active */ -static u32 i8xx_get_vblank_counter(struct drm_device *dev, unsigned int pipe) -{ - /* Gen2 doesn't have a hardware frame counter */ - return 0; -} - /* Called from drm generic code, passed a 'crtc', which * we use as a pipe index */ @@ -978,10 +978,8 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { smp_store_mb(engine->breadcrumbs.irq_posted, true); - if (intel_engine_wakeup(engine)) { + if (intel_engine_wakeup(engine)) trace_i915_gem_request_notify(engine); - engine->breadcrumbs.irq_wakeups++; - } } static void vlv_c0_read(struct drm_i915_private *dev_priv, @@ -1105,9 +1103,10 @@ static void gen6_pm_rps_work(struct work_struct *work) new_delay = dev_priv->rps.cur_freq; min = dev_priv->rps.min_freq_softlimit; max = dev_priv->rps.max_freq_softlimit; - - if (client_boost) { - new_delay = dev_priv->rps.max_freq_softlimit; + if (client_boost || any_waiters(dev_priv)) + max = dev_priv->rps.max_freq; + if (client_boost && new_delay < dev_priv->rps.boost_freq) { + new_delay = dev_priv->rps.boost_freq; adj = 0; } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { if (adj > 0) @@ -1122,7 +1121,7 @@ static void gen6_pm_rps_work(struct work_struct *work) new_delay = dev_priv->rps.efficient_freq; adj = 0; } - } else if (any_waiters(dev_priv)) { + } else if (client_boost || any_waiters(dev_priv)) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) @@ -2504,57 +2503,52 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; - int ret; kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); + DRM_DEBUG_DRIVER("resetting chip\n"); + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); + /* - * Note that there's only one work item which does gpu resets, so we - * need not worry about concurrent gpu resets potentially incrementing - * error->reset_counter twice. We only need to take care of another - * racing irq/hangcheck declaring the gpu dead for a second time. A - * quick check for that is good enough: schedule_work ensures the - * correct ordering between hang detection and this work item, and since - * the reset in-progress bit is only ever set by code outside of this - * work we don't need to worry about any other races. + * In most cases it's guaranteed that we get here with an RPM + * reference held, for example because there is a pending GPU + * request that won't finish until the reset is done. This + * isn't the case at least when we get here by doing a + * simulated reset via debugs, so get an RPM reference. */ - if (i915_reset_in_progress(&dev_priv->gpu_error)) { - DRM_DEBUG_DRIVER("resetting chip\n"); - kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); - - /* - * In most cases it's guaranteed that we get here with an RPM - * reference held, for example because there is a pending GPU - * request that won't finish until the reset is done. This - * isn't the case at least when we get here by doing a - * simulated reset via debugs, so get an RPM reference. - */ - intel_runtime_pm_get(dev_priv); - - intel_prepare_reset(dev_priv); + intel_runtime_pm_get(dev_priv); + intel_prepare_reset(dev_priv); + do { /* * All state reset _must_ be completed before we update the * reset counter, for otherwise waiters might miss the reset * pending state and not properly drop locks, resulting in * deadlocks with the reset work. */ - ret = i915_reset(dev_priv); + if (mutex_trylock(&dev_priv->drm.struct_mutex)) { + i915_reset(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + } - intel_finish_reset(dev_priv); + /* We need to wait for anyone holding the lock to wakeup */ + } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags, + I915_RESET_IN_PROGRESS, + TASK_UNINTERRUPTIBLE, + HZ)); - intel_runtime_pm_put(dev_priv); + intel_finish_reset(dev_priv); + intel_runtime_pm_put(dev_priv); - if (ret == 0) - kobject_uevent_env(kobj, - KOBJ_CHANGE, reset_done_event); + if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) + kobject_uevent_env(kobj, + KOBJ_CHANGE, reset_done_event); - /* - * Note: The wake_up also serves as a memory barrier so that - * waiters see the update value of the reset counter atomic_t. - */ - wake_up_all(&dev_priv->gpu_error.reset_queue); - } + /* + * Note: The wake_up also serves as a memory barrier so that + * waiters see the updated value of the dev_priv->gpu_error. + */ + wake_up_all(&dev_priv->gpu_error.reset_queue); } static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv) @@ -2673,25 +2667,26 @@ void i915_handle_error(struct drm_i915_private *dev_priv, i915_capture_error_state(dev_priv, engine_mask, error_msg); i915_report_and_clear_eir(dev_priv); - if (engine_mask) { - atomic_or(I915_RESET_IN_PROGRESS_FLAG, - &dev_priv->gpu_error.reset_counter); + if (!engine_mask) + return; - /* - * Wakeup waiting processes so that the reset function - * i915_reset_and_wakeup doesn't deadlock trying to grab - * various locks. By bumping the reset counter first, the woken - * processes will see a reset in progress and back off, - * releasing their locks and then wait for the reset completion. - * We must do this for _all_ gpu waiters that might hold locks - * that the reset work needs to acquire. - * - * Note: The wake_up serves as the required memory barrier to - * ensure that the waiters see the updated value of the reset - * counter atomic_t. - */ - i915_error_wake_up(dev_priv); - } + if (test_and_set_bit(I915_RESET_IN_PROGRESS, + &dev_priv->gpu_error.flags)) + return; + + /* + * Wakeup waiting processes so that the reset function + * i915_reset_and_wakeup doesn't deadlock trying to grab + * various locks. By bumping the reset counter first, the woken + * processes will see a reset in progress and back off, + * releasing their locks and then wait for the reset completion. + * We must do this for _all_ gpu waiters that might hold locks + * that the reset work needs to acquire. + * + * Note: The wake_up also provides a memory barrier to ensure that the + * waiters see the updated value of the reset flags. + */ + i915_error_wake_up(dev_priv); i915_reset_and_wakeup(dev_priv); } @@ -2804,13 +2799,6 @@ static void gen8_disable_vblank(struct drm_device *dev, unsigned int pipe) } static bool -ring_idle(struct intel_engine_cs *engine, u32 seqno) -{ - return i915_seqno_passed(seqno, - READ_ONCE(engine->last_submitted_seqno)); -} - -static bool ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) { if (INTEL_GEN(engine->i915) >= 8) { @@ -2834,7 +2822,7 @@ semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr, if (engine == signaller) continue; - if (offset == signaller->semaphore.signal_ggtt[engine->id]) + if (offset == signaller->semaphore.signal_ggtt[engine->hw_id]) return signaller; } } else { @@ -2844,21 +2832,22 @@ semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr, if(engine == signaller) continue; - if (sync_bits == signaller->semaphore.mbox.wait[engine->id]) + if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id]) return signaller; } } - DRM_ERROR("No signaller ring found for ring %i, ipehr 0x%08x, offset 0x%016llx\n", - engine->id, ipehr, offset); + DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x, offset 0x%016llx\n", + engine->name, ipehr, offset); - return NULL; + return ERR_PTR(-ENODEV); } static struct intel_engine_cs * semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) { struct drm_i915_private *dev_priv = engine->i915; + void __iomem *vaddr; u32 cmd, ipehr, head; u64 offset = 0; int i, backwards; @@ -2897,6 +2886,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) */ head = I915_READ_HEAD(engine) & HEAD_ADDR; backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4; + vaddr = (void __iomem *)engine->buffer->vaddr; for (i = backwards; i; --i) { /* @@ -2907,7 +2897,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) head &= engine->buffer->size - 1; /* This here seems to blow up */ - cmd = ioread32(engine->buffer->virtual_start + head); + cmd = ioread32(vaddr + head); if (cmd == ipehr) break; @@ -2917,11 +2907,11 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) if (!i) return NULL; - *seqno = ioread32(engine->buffer->virtual_start + head + 4) + 1; + *seqno = ioread32(vaddr + head + 4) + 1; if (INTEL_GEN(dev_priv) >= 8) { - offset = ioread32(engine->buffer->virtual_start + head + 12); + offset = ioread32(vaddr + head + 12); offset <<= 32; - offset = ioread32(engine->buffer->virtual_start + head + 8); + offset |= ioread32(vaddr + head + 8); } return semaphore_wait_to_signaller_ring(engine, ipehr, offset); } @@ -2938,6 +2928,9 @@ static int semaphore_passed(struct intel_engine_cs *engine) if (signaller == NULL) return -1; + if (IS_ERR(signaller)) + return 0; + /* Prevent pathological recursion due to driver bugs */ if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES) return -1; @@ -2990,7 +2983,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine) return stuck; } -static enum intel_ring_hangcheck_action +static enum intel_engine_hangcheck_action head_stuck(struct intel_engine_cs *engine, u64 acthd) { if (acthd != engine->hangcheck.acthd) { @@ -3008,11 +3001,11 @@ head_stuck(struct intel_engine_cs *engine, u64 acthd) return HANGCHECK_HUNG; } -static enum intel_ring_hangcheck_action -ring_stuck(struct intel_engine_cs *engine, u64 acthd) +static enum intel_engine_hangcheck_action +engine_stuck(struct intel_engine_cs *engine, u64 acthd) { struct drm_i915_private *dev_priv = engine->i915; - enum intel_ring_hangcheck_action ha; + enum intel_engine_hangcheck_action ha; u32 tmp; ha = head_stuck(engine, acthd); @@ -3054,22 +3047,6 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd) return HANGCHECK_HUNG; } -static unsigned long kick_waiters(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_wakeups); - - if (engine->hangcheck.user_interrupts == irq_count && - !test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { - if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) - DRM_ERROR("Hangcheck timer elapsed... %s idle\n", - engine->name); - - intel_engine_enable_fake_irq(engine); - } - - return irq_count; -} /* * This is called when the chip hasn't reported back with completed * batchbuffers in a long time. We keep track per ring seqno progress and @@ -3107,7 +3084,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) bool busy = intel_engine_has_waiter(engine); u64 acthd; u32 seqno; - unsigned user_interrupts; + u32 submit; semaphore_clear_deadlocks(dev_priv); @@ -3121,29 +3098,22 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (engine->irq_seqno_barrier) engine->irq_seqno_barrier(engine); - acthd = intel_ring_get_active_head(engine); + acthd = intel_engine_get_active_head(engine); seqno = intel_engine_get_seqno(engine); - - /* Reset stuck interrupts between batch advances */ - user_interrupts = 0; + submit = READ_ONCE(engine->last_submitted_seqno); if (engine->hangcheck.seqno == seqno) { - if (ring_idle(engine, seqno)) { + if (i915_seqno_passed(seqno, submit)) { engine->hangcheck.action = HANGCHECK_IDLE; - if (busy) { - /* Safeguard against driver failure */ - user_interrupts = kick_waiters(engine); - engine->hangcheck.score += BUSY; - } } else { /* We always increment the hangcheck score - * if the ring is busy and still processing + * if the engine is busy and still processing * the same request, so that no single request * can run indefinitely (such as a chain of * batches). The only time we do not increment * the hangcheck score on this ring, if this - * ring is in a legitimate wait for another - * ring. In that case the waiting ring is a + * engine is in a legitimate wait for another + * engine. In that case the waiting engine is a * victim and we want to be sure we catch the * right culprit. Then every time we do kick * the ring, add a small increment to the @@ -3151,8 +3121,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) * being repeatedly kicked and so responsible * for stalling the machine. */ - engine->hangcheck.action = ring_stuck(engine, - acthd); + engine->hangcheck.action = + engine_stuck(engine, acthd); switch (engine->hangcheck.action) { case HANGCHECK_IDLE: @@ -3195,12 +3165,12 @@ static void i915_hangcheck_elapsed(struct work_struct *work) engine->hangcheck.seqno = seqno; engine->hangcheck.acthd = acthd; - engine->hangcheck.user_interrupts = user_interrupts; busy_count += busy; } if (hung) { char msg[80]; + unsigned int tmp; int len; /* If some rings hung but others were still busy, only @@ -3210,7 +3180,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) hung &= ~stuck; len = scnprintf(msg, sizeof(msg), "%s on ", stuck == hung ? "No progress" : "Hang"); - for_each_engine_masked(engine, dev_priv, hung) + for_each_engine_masked(engine, dev_priv, hung, tmp) len += scnprintf(msg + len, sizeof(msg) - len, "%s, ", engine->name); msg[len-2] = '\0'; @@ -4536,14 +4506,15 @@ void intel_irq_init(struct drm_i915_private *dev_priv) dev_priv->rps.pm_intr_keep |= GEN6_PM_RP_UP_EI_EXPIRED; if (INTEL_INFO(dev_priv)->gen >= 8) - dev_priv->rps.pm_intr_keep |= GEN8_PMINTR_REDIRECT_TO_NON_DISP; + dev_priv->rps.pm_intr_keep |= GEN8_PMINTR_REDIRECT_TO_GUC; INIT_DELAYED_WORK(&dev_priv->gpu_error.hangcheck_work, i915_hangcheck_elapsed); if (IS_GEN2(dev_priv)) { + /* Gen2 doesn't have a hardware frame counter */ dev->max_vblank_count = 0; - dev->driver->get_vblank_counter = i8xx_get_vblank_counter; + dev->driver->get_vblank_counter = drm_vblank_no_hw_counter; } else if (IS_G4X(dev_priv) || INTEL_INFO(dev_priv)->gen >= 5) { dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */ dev->driver->get_vblank_counter = g4x_get_vblank_counter; diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c new file mode 100644 index 000000000000..49a079494b68 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_memcpy.c @@ -0,0 +1,101 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/kernel.h> +#include <asm/fpu/api.h> + +#include "i915_drv.h" + +static DEFINE_STATIC_KEY_FALSE(has_movntdqa); + +#ifdef CONFIG_AS_MOVNTDQA +static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) +{ + kernel_fpu_begin(); + + len >>= 4; + while (len >= 4) { + asm("movntdqa (%0), %%xmm0\n" + "movntdqa 16(%0), %%xmm1\n" + "movntdqa 32(%0), %%xmm2\n" + "movntdqa 48(%0), %%xmm3\n" + "movaps %%xmm0, (%1)\n" + "movaps %%xmm1, 16(%1)\n" + "movaps %%xmm2, 32(%1)\n" + "movaps %%xmm3, 48(%1)\n" + :: "r" (src), "r" (dst) : "memory"); + src += 64; + dst += 64; + len -= 4; + } + while (len--) { + asm("movntdqa (%0), %%xmm0\n" + "movaps %%xmm0, (%1)\n" + :: "r" (src), "r" (dst) : "memory"); + src += 16; + dst += 16; + } + + kernel_fpu_end(); +} +#endif + +/** + * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC + * @dst: destination pointer + * @src: source pointer + * @len: how many bytes to copy + * + * i915_memcpy_from_wc copies @len bytes from @src to @dst using + * non-temporal instructions where available. Note that all arguments + * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple + * of 16. + * + * To test whether accelerated reads from WC are supported, use + * i915_memcpy_from_wc(NULL, NULL, 0); + * + * Returns true if the copy was successful, false if the preconditions + * are not met. + */ +bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len) +{ + if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15)) + return false; + +#ifdef CONFIG_AS_MOVNTDQA + if (static_branch_likely(&has_movntdqa)) { + if (likely(len)) + __memcpy_ntdqa(dst, src, len); + return true; + } +#endif + + return false; +} + +void i915_memcpy_init_early(struct drm_i915_private *dev_priv) +{ + if (static_cpu_has(X86_FEATURE_XMM4_1)) + static_branch_enable(&has_movntdqa); +} diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c new file mode 100644 index 000000000000..e4935dd1fd37 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -0,0 +1,84 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/mm.h> +#include <linux/io-mapping.h> + +#include <asm/pgtable.h> + +#include "i915_drv.h" + +struct remap_pfn { + struct mm_struct *mm; + unsigned long pfn; + pgprot_t prot; +}; + +static int remap_pfn(pte_t *pte, pgtable_t token, + unsigned long addr, void *data) +{ + struct remap_pfn *r = data; + + /* Special PTE are not associated with any struct page */ + set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot))); + r->pfn++; + + return 0; +} + +/** + * remap_io_mapping - remap an IO mapping to userspace + * @vma: user vma to map to + * @addr: target user address to start at + * @pfn: physical address of kernel memory + * @size: size of map area + * @iomap: the source io_mapping + * + * Note: this is only safe if the mm semaphore is held when called. + */ +int remap_io_mapping(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size, + struct io_mapping *iomap) +{ + struct remap_pfn r; + int err; + + GEM_BUG_ON((vma->vm_flags & + (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)) != + (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)); + + /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ + r.mm = vma->vm_mm; + r.pfn = pfn; + r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) | + (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK)); + + err = apply_to_page_range(r.mm, addr, size, remap_pfn, &r); + if (unlikely(err)) { + zap_vma_ptes(vma, addr, (r.pfn - pfn) << PAGE_SHIFT); + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index b6e404c91eed..768ad89d9cd4 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -45,6 +45,7 @@ struct i915_params i915 __read_mostly = { .fastboot = 0, .prefault_disable = 0, .load_detect_test = 0, + .force_reset_modeset_test = 0, .reset = true, .invert_brightness = 0, .disable_display = 0, @@ -161,6 +162,11 @@ MODULE_PARM_DESC(load_detect_test, "Force-enable the VGA load detect code for testing (default:false). " "For developers only."); +module_param_named_unsafe(force_reset_modeset_test, i915.force_reset_modeset_test, bool, 0600); +MODULE_PARM_DESC(force_reset_modeset_test, + "Force a modeset during gpu reset for testing (default:false). " + "For developers only."); + module_param_named_unsafe(invert_brightness, i915.invert_brightness, int, 0600); MODULE_PARM_DESC(invert_brightness, "Invert backlight brightness " diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 0ad020b4a925..3a0dd78ddb38 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -57,6 +57,7 @@ struct i915_params { bool fastboot; bool prefault_disable; bool load_detect_test; + bool force_reset_modeset_test; bool reset; bool disable_display; bool verbose_state_checks; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 949c01686a66..687c768833b3 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -54,207 +54,216 @@ #define CHV_COLORS \ .color = { .degamma_lut_size = 65, .gamma_lut_size = 257 } +#define GEN2_FEATURES \ + .gen = 2, .num_pipes = 1, \ + .has_overlay = 1, .overlay_needs_physical = 1, \ + .has_gmch_display = 1, \ + .hws_needs_physical = 1, \ + .ring_mask = RENDER_RING, \ + GEN_DEFAULT_PIPEOFFSETS, \ + CURSOR_OFFSETS + static const struct intel_device_info intel_i830_info = { - .gen = 2, .is_mobile = 1, .cursor_needs_physical = 1, .num_pipes = 2, - .has_overlay = 1, .overlay_needs_physical = 1, - .ring_mask = RENDER_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + GEN2_FEATURES, + .is_mobile = 1, .cursor_needs_physical = 1, + .num_pipes = 2, /* legal, last one wins */ }; static const struct intel_device_info intel_845g_info = { - .gen = 2, .num_pipes = 1, - .has_overlay = 1, .overlay_needs_physical = 1, - .ring_mask = RENDER_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + GEN2_FEATURES, }; static const struct intel_device_info intel_i85x_info = { - .gen = 2, .is_i85x = 1, .is_mobile = 1, .num_pipes = 2, + GEN2_FEATURES, + .is_i85x = 1, .is_mobile = 1, + .num_pipes = 2, /* legal, last one wins */ .cursor_needs_physical = 1, - .has_overlay = 1, .overlay_needs_physical = 1, .has_fbc = 1, - .ring_mask = RENDER_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, }; static const struct intel_device_info intel_i865g_info = { - .gen = 2, .num_pipes = 1, - .has_overlay = 1, .overlay_needs_physical = 1, - .ring_mask = RENDER_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + GEN2_FEATURES, }; +#define GEN3_FEATURES \ + .gen = 3, .num_pipes = 2, \ + .has_gmch_display = 1, \ + .ring_mask = RENDER_RING, \ + GEN_DEFAULT_PIPEOFFSETS, \ + CURSOR_OFFSETS + static const struct intel_device_info intel_i915g_info = { - .gen = 3, .is_i915g = 1, .cursor_needs_physical = 1, .num_pipes = 2, + GEN3_FEATURES, + .is_i915g = 1, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, - .ring_mask = RENDER_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + .hws_needs_physical = 1, }; static const struct intel_device_info intel_i915gm_info = { - .gen = 3, .is_mobile = 1, .num_pipes = 2, + GEN3_FEATURES, + .is_mobile = 1, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, .supports_tv = 1, .has_fbc = 1, - .ring_mask = RENDER_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + .hws_needs_physical = 1, }; static const struct intel_device_info intel_i945g_info = { - .gen = 3, .has_hotplug = 1, .cursor_needs_physical = 1, .num_pipes = 2, + GEN3_FEATURES, + .has_hotplug = 1, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, - .ring_mask = RENDER_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + .hws_needs_physical = 1, }; static const struct intel_device_info intel_i945gm_info = { - .gen = 3, .is_i945gm = 1, .is_mobile = 1, .num_pipes = 2, + GEN3_FEATURES, + .is_i945gm = 1, .is_mobile = 1, .has_hotplug = 1, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, .supports_tv = 1, .has_fbc = 1, - .ring_mask = RENDER_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + .hws_needs_physical = 1, }; +#define GEN4_FEATURES \ + .gen = 4, .num_pipes = 2, \ + .has_hotplug = 1, \ + .has_gmch_display = 1, \ + .ring_mask = RENDER_RING, \ + GEN_DEFAULT_PIPEOFFSETS, \ + CURSOR_OFFSETS + static const struct intel_device_info intel_i965g_info = { - .gen = 4, .is_broadwater = 1, .num_pipes = 2, - .has_hotplug = 1, + GEN4_FEATURES, + .is_broadwater = 1, .has_overlay = 1, - .ring_mask = RENDER_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + .hws_needs_physical = 1, }; static const struct intel_device_info intel_i965gm_info = { - .gen = 4, .is_crestline = 1, .num_pipes = 2, - .is_mobile = 1, .has_fbc = 1, .has_hotplug = 1, + GEN4_FEATURES, + .is_crestline = 1, + .is_mobile = 1, .has_fbc = 1, .has_overlay = 1, .supports_tv = 1, - .ring_mask = RENDER_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + .hws_needs_physical = 1, }; static const struct intel_device_info intel_g33_info = { - .gen = 3, .is_g33 = 1, .num_pipes = 2, - .need_gfx_hws = 1, .has_hotplug = 1, + GEN3_FEATURES, + .is_g33 = 1, + .has_hotplug = 1, .has_overlay = 1, - .ring_mask = RENDER_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, }; static const struct intel_device_info intel_g45_info = { - .gen = 4, .is_g4x = 1, .need_gfx_hws = 1, .num_pipes = 2, - .has_pipe_cxsr = 1, .has_hotplug = 1, + GEN4_FEATURES, + .is_g4x = 1, + .has_pipe_cxsr = 1, .ring_mask = RENDER_RING | BSD_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, }; static const struct intel_device_info intel_gm45_info = { - .gen = 4, .is_g4x = 1, .num_pipes = 2, - .is_mobile = 1, .need_gfx_hws = 1, .has_fbc = 1, - .has_pipe_cxsr = 1, .has_hotplug = 1, + GEN4_FEATURES, + .is_g4x = 1, + .is_mobile = 1, .has_fbc = 1, + .has_pipe_cxsr = 1, .supports_tv = 1, .ring_mask = RENDER_RING | BSD_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, }; static const struct intel_device_info intel_pineview_info = { - .gen = 3, .is_g33 = 1, .is_pineview = 1, .is_mobile = 1, .num_pipes = 2, - .need_gfx_hws = 1, .has_hotplug = 1, + GEN3_FEATURES, + .is_g33 = 1, .is_pineview = 1, .is_mobile = 1, + .has_hotplug = 1, .has_overlay = 1, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, }; +#define GEN5_FEATURES \ + .gen = 5, .num_pipes = 2, \ + .has_hotplug = 1, \ + .has_gmbus_irq = 1, \ + .ring_mask = RENDER_RING | BSD_RING, \ + GEN_DEFAULT_PIPEOFFSETS, \ + CURSOR_OFFSETS + static const struct intel_device_info intel_ironlake_d_info = { - .gen = 5, .num_pipes = 2, - .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + GEN5_FEATURES, }; static const struct intel_device_info intel_ironlake_m_info = { - .gen = 5, .is_mobile = 1, .num_pipes = 2, - .need_gfx_hws = 1, .has_hotplug = 1, - .has_fbc = 1, - .ring_mask = RENDER_RING | BSD_RING, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + GEN5_FEATURES, + .is_mobile = 1, }; +#define GEN6_FEATURES \ + .gen = 6, .num_pipes = 2, \ + .has_hotplug = 1, \ + .has_fbc = 1, \ + .ring_mask = RENDER_RING | BSD_RING | BLT_RING, \ + .has_llc = 1, \ + .has_rc6 = 1, \ + .has_rc6p = 1, \ + .has_gmbus_irq = 1, \ + .has_hw_contexts = 1, \ + GEN_DEFAULT_PIPEOFFSETS, \ + CURSOR_OFFSETS + static const struct intel_device_info intel_sandybridge_d_info = { - .gen = 6, .num_pipes = 2, - .need_gfx_hws = 1, .has_hotplug = 1, - .has_fbc = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING, - .has_llc = 1, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + GEN6_FEATURES, }; static const struct intel_device_info intel_sandybridge_m_info = { - .gen = 6, .is_mobile = 1, .num_pipes = 2, - .need_gfx_hws = 1, .has_hotplug = 1, - .has_fbc = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING, - .has_llc = 1, - GEN_DEFAULT_PIPEOFFSETS, - CURSOR_OFFSETS, + GEN6_FEATURES, + .is_mobile = 1, }; #define GEN7_FEATURES \ .gen = 7, .num_pipes = 3, \ - .need_gfx_hws = 1, .has_hotplug = 1, \ + .has_hotplug = 1, \ .has_fbc = 1, \ .ring_mask = RENDER_RING | BSD_RING | BLT_RING, \ .has_llc = 1, \ + .has_rc6 = 1, \ + .has_rc6p = 1, \ + .has_gmbus_irq = 1, \ + .has_hw_contexts = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ IVB_CURSOR_OFFSETS static const struct intel_device_info intel_ivybridge_d_info = { GEN7_FEATURES, .is_ivybridge = 1, + .has_l3_dpf = 1, }; static const struct intel_device_info intel_ivybridge_m_info = { GEN7_FEATURES, .is_ivybridge = 1, .is_mobile = 1, + .has_l3_dpf = 1, }; static const struct intel_device_info intel_ivybridge_q_info = { GEN7_FEATURES, .is_ivybridge = 1, .num_pipes = 0, /* legal, last one wins */ + .has_l3_dpf = 1, }; #define VLV_FEATURES \ .gen = 7, .num_pipes = 2, \ - .need_gfx_hws = 1, .has_hotplug = 1, \ + .has_psr = 1, \ + .has_runtime_pm = 1, \ + .has_rc6 = 1, \ + .has_gmbus_irq = 1, \ + .has_hw_contexts = 1, \ + .has_gmch_display = 1, \ + .has_hotplug = 1, \ .ring_mask = RENDER_RING | BSD_RING | BLT_RING, \ .display_mmio_offset = VLV_DISPLAY_BASE, \ GEN_DEFAULT_PIPEOFFSETS, \ CURSOR_OFFSETS -static const struct intel_device_info intel_valleyview_m_info = { - VLV_FEATURES, - .is_valleyview = 1, - .is_mobile = 1, -}; - -static const struct intel_device_info intel_valleyview_d_info = { +static const struct intel_device_info intel_valleyview_info = { VLV_FEATURES, .is_valleyview = 1, }; @@ -263,54 +272,50 @@ static const struct intel_device_info intel_valleyview_d_info = { GEN7_FEATURES, \ .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \ .has_ddi = 1, \ - .has_fpga_dbg = 1 - -static const struct intel_device_info intel_haswell_d_info = { - HSW_FEATURES, - .is_haswell = 1, -}; - -static const struct intel_device_info intel_haswell_m_info = { + .has_fpga_dbg = 1, \ + .has_psr = 1, \ + .has_resource_streamer = 1, \ + .has_dp_mst = 1, \ + .has_rc6p = 0 /* RC6p removed-by HSW */, \ + .has_runtime_pm = 1 + +static const struct intel_device_info intel_haswell_info = { HSW_FEATURES, .is_haswell = 1, - .is_mobile = 1, + .has_l3_dpf = 1, }; #define BDW_FEATURES \ HSW_FEATURES, \ - BDW_COLORS + BDW_COLORS, \ + .has_logical_ring_contexts = 1 -static const struct intel_device_info intel_broadwell_d_info = { +static const struct intel_device_info intel_broadwell_info = { BDW_FEATURES, .gen = 8, .is_broadwell = 1, }; -static const struct intel_device_info intel_broadwell_m_info = { - BDW_FEATURES, - .gen = 8, .is_mobile = 1, - .is_broadwell = 1, -}; - -static const struct intel_device_info intel_broadwell_gt3d_info = { +static const struct intel_device_info intel_broadwell_gt3_info = { BDW_FEATURES, .gen = 8, .is_broadwell = 1, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; -static const struct intel_device_info intel_broadwell_gt3m_info = { - BDW_FEATURES, - .gen = 8, .is_mobile = 1, - .is_broadwell = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, -}; - static const struct intel_device_info intel_cherryview_info = { .gen = 8, .num_pipes = 3, - .need_gfx_hws = 1, .has_hotplug = 1, + .has_hotplug = 1, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, .is_cherryview = 1, + .has_psr = 1, + .has_runtime_pm = 1, + .has_resource_streamer = 1, + .has_rc6 = 1, + .has_gmbus_irq = 1, + .has_hw_contexts = 1, + .has_logical_ring_contexts = 1, + .has_gmch_display = 1, .display_mmio_offset = VLV_DISPLAY_BASE, GEN_CHV_PIPEOFFSETS, CURSOR_OFFSETS, @@ -321,25 +326,41 @@ static const struct intel_device_info intel_skylake_info = { BDW_FEATURES, .is_skylake = 1, .gen = 9, + .has_csr = 1, + .has_guc = 1, + .ddb_size = 896, }; static const struct intel_device_info intel_skylake_gt3_info = { BDW_FEATURES, .is_skylake = 1, .gen = 9, + .has_csr = 1, + .has_guc = 1, + .ddb_size = 896, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; static const struct intel_device_info intel_broxton_info = { .is_broxton = 1, .gen = 9, - .need_gfx_hws = 1, .has_hotplug = 1, + .has_hotplug = 1, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, .num_pipes = 3, .has_ddi = 1, .has_fpga_dbg = 1, .has_fbc = 1, + .has_runtime_pm = 1, .has_pooled_eu = 0, + .has_csr = 1, + .has_resource_streamer = 1, + .has_rc6 = 1, + .has_dp_mst = 1, + .has_gmbus_irq = 1, + .has_hw_contexts = 1, + .has_logical_ring_contexts = 1, + .has_guc = 1, + .ddb_size = 512, GEN_DEFAULT_PIPEOFFSETS, IVB_CURSOR_OFFSETS, BDW_COLORS, @@ -349,12 +370,18 @@ static const struct intel_device_info intel_kabylake_info = { BDW_FEATURES, .is_kabylake = 1, .gen = 9, + .has_csr = 1, + .has_guc = 1, + .ddb_size = 896, }; static const struct intel_device_info intel_kabylake_gt3_info = { BDW_FEATURES, .is_kabylake = 1, .gen = 9, + .has_csr = 1, + .has_guc = 1, + .ddb_size = 896, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; @@ -386,14 +413,10 @@ static const struct pci_device_id pciidlist[] = { INTEL_IVB_Q_IDS(&intel_ivybridge_q_info), /* must be first IVB */ INTEL_IVB_M_IDS(&intel_ivybridge_m_info), INTEL_IVB_D_IDS(&intel_ivybridge_d_info), - INTEL_HSW_D_IDS(&intel_haswell_d_info), - INTEL_HSW_M_IDS(&intel_haswell_m_info), - INTEL_VLV_M_IDS(&intel_valleyview_m_info), - INTEL_VLV_D_IDS(&intel_valleyview_d_info), - INTEL_BDW_GT12M_IDS(&intel_broadwell_m_info), - INTEL_BDW_GT12D_IDS(&intel_broadwell_d_info), - INTEL_BDW_GT3M_IDS(&intel_broadwell_gt3m_info), - INTEL_BDW_GT3D_IDS(&intel_broadwell_gt3d_info), + INTEL_HSW_IDS(&intel_haswell_info), + INTEL_VLV_IDS(&intel_valleyview_info), + INTEL_BDW_GT12_IDS(&intel_broadwell_info), + INTEL_BDW_GT3_IDS(&intel_broadwell_gt3_info), INTEL_CHV_IDS(&intel_cherryview_info), INTEL_SKL_GT1_IDS(&intel_skylake_info), INTEL_SKL_GT2_IDS(&intel_skylake_info), diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ce14fe09d962..70d96162def6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -186,13 +186,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN9_GRDOM_GUC (1 << 5) #define GEN8_GRDOM_MEDIA2 (1 << 7) -#define RING_PP_DIR_BASE(ring) _MMIO((ring)->mmio_base+0x228) -#define RING_PP_DIR_BASE_READ(ring) _MMIO((ring)->mmio_base+0x518) -#define RING_PP_DIR_DCLV(ring) _MMIO((ring)->mmio_base+0x220) +#define RING_PP_DIR_BASE(engine) _MMIO((engine)->mmio_base+0x228) +#define RING_PP_DIR_BASE_READ(engine) _MMIO((engine)->mmio_base+0x518) +#define RING_PP_DIR_DCLV(engine) _MMIO((engine)->mmio_base+0x220) #define PP_DIR_DCLV_2G 0xffffffff -#define GEN8_RING_PDP_UDW(ring, n) _MMIO((ring)->mmio_base+0x270 + (n) * 8 + 4) -#define GEN8_RING_PDP_LDW(ring, n) _MMIO((ring)->mmio_base+0x270 + (n) * 8) +#define GEN8_RING_PDP_UDW(engine, n) _MMIO((engine)->mmio_base+0x270 + (n) * 8 + 4) +#define GEN8_RING_PDP_LDW(engine, n) _MMIO((engine)->mmio_base+0x270 + (n) * 8) #define GEN8_R_PWR_CLK_STATE _MMIO(0x20C8) #define GEN8_RPCS_ENABLE (1 << 31) @@ -1536,6 +1536,7 @@ enum skl_disp_power_wells { #define BALANCE_LEG_MASK(port) (7<<(8+3*(port))) /* Balance leg disable bits */ #define BALANCE_LEG_DISABLE_SHIFT 23 +#define BALANCE_LEG_DISABLE(port) (1 << (23 + (port))) /* * Fence registers @@ -1647,7 +1648,7 @@ enum skl_disp_power_wells { #define ARB_MODE_BWGTLB_DISABLE (1<<9) #define ARB_MODE_SWIZZLE_BDW (1<<1) #define RENDER_HWS_PGA_GEN7 _MMIO(0x04080) -#define RING_FAULT_REG(ring) _MMIO(0x4094 + 0x100*(ring)->id) +#define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100*(engine)->hw_id) #define RING_FAULT_GTTSEL_MASK (1<<11) #define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) #define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) @@ -1845,7 +1846,7 @@ enum skl_disp_power_wells { #define GFX_MODE _MMIO(0x2520) #define GFX_MODE_GEN7 _MMIO(0x229c) -#define RING_MODE_GEN7(ring) _MMIO((ring)->mmio_base+0x29c) +#define RING_MODE_GEN7(engine) _MMIO((engine)->mmio_base+0x29c) #define GFX_RUN_LIST_ENABLE (1<<15) #define GFX_INTERRUPT_STEERING (1<<14) #define GFX_TLB_INVALIDATE_EXPLICIT (1<<13) @@ -3659,8 +3660,17 @@ enum { #define VIDEO_DIP_ENABLE_SPD_HSW (1 << 0) /* Panel power sequencing */ -#define PP_STATUS _MMIO(0x61200) -#define PP_ON (1 << 31) +#define PPS_BASE 0x61200 +#define VLV_PPS_BASE (VLV_DISPLAY_BASE + PPS_BASE) +#define PCH_PPS_BASE 0xC7200 + +#define _MMIO_PPS(pps_idx, reg) _MMIO(dev_priv->pps_mmio_base - \ + PPS_BASE + (reg) + \ + (pps_idx) * 0x100) + +#define _PP_STATUS 0x61200 +#define PP_STATUS(pps_idx) _MMIO_PPS(pps_idx, _PP_STATUS) +#define PP_ON (1 << 31) /* * Indicates that all dependencies of the panel are on: * @@ -3668,14 +3678,14 @@ enum { * - pipe enabled * - LVDS/DVOB/DVOC on */ -#define PP_READY (1 << 30) -#define PP_SEQUENCE_NONE (0 << 28) -#define PP_SEQUENCE_POWER_UP (1 << 28) -#define PP_SEQUENCE_POWER_DOWN (2 << 28) -#define PP_SEQUENCE_MASK (3 << 28) -#define PP_SEQUENCE_SHIFT 28 -#define PP_CYCLE_DELAY_ACTIVE (1 << 27) -#define PP_SEQUENCE_STATE_MASK 0x0000000f +#define PP_READY (1 << 30) +#define PP_SEQUENCE_NONE (0 << 28) +#define PP_SEQUENCE_POWER_UP (1 << 28) +#define PP_SEQUENCE_POWER_DOWN (2 << 28) +#define PP_SEQUENCE_MASK (3 << 28) +#define PP_SEQUENCE_SHIFT 28 +#define PP_CYCLE_DELAY_ACTIVE (1 << 27) +#define PP_SEQUENCE_STATE_MASK 0x0000000f #define PP_SEQUENCE_STATE_OFF_IDLE (0x0 << 0) #define PP_SEQUENCE_STATE_OFF_S0_1 (0x1 << 0) #define PP_SEQUENCE_STATE_OFF_S0_2 (0x2 << 0) @@ -3685,11 +3695,46 @@ enum { #define PP_SEQUENCE_STATE_ON_S1_2 (0xa << 0) #define PP_SEQUENCE_STATE_ON_S1_3 (0xb << 0) #define PP_SEQUENCE_STATE_RESET (0xf << 0) -#define PP_CONTROL _MMIO(0x61204) -#define POWER_TARGET_ON (1 << 0) -#define PP_ON_DELAYS _MMIO(0x61208) -#define PP_OFF_DELAYS _MMIO(0x6120c) -#define PP_DIVISOR _MMIO(0x61210) + +#define _PP_CONTROL 0x61204 +#define PP_CONTROL(pps_idx) _MMIO_PPS(pps_idx, _PP_CONTROL) +#define PANEL_UNLOCK_REGS (0xabcd << 16) +#define PANEL_UNLOCK_MASK (0xffff << 16) +#define BXT_POWER_CYCLE_DELAY_MASK 0x1f0 +#define BXT_POWER_CYCLE_DELAY_SHIFT 4 +#define EDP_FORCE_VDD (1 << 3) +#define EDP_BLC_ENABLE (1 << 2) +#define PANEL_POWER_RESET (1 << 1) +#define PANEL_POWER_OFF (0 << 0) +#define PANEL_POWER_ON (1 << 0) + +#define _PP_ON_DELAYS 0x61208 +#define PP_ON_DELAYS(pps_idx) _MMIO_PPS(pps_idx, _PP_ON_DELAYS) +#define PANEL_PORT_SELECT_SHIFT 30 +#define PANEL_PORT_SELECT_MASK (3 << 30) +#define PANEL_PORT_SELECT_LVDS (0 << 30) +#define PANEL_PORT_SELECT_DPA (1 << 30) +#define PANEL_PORT_SELECT_DPC (2 << 30) +#define PANEL_PORT_SELECT_DPD (3 << 30) +#define PANEL_PORT_SELECT_VLV(port) ((port) << 30) +#define PANEL_POWER_UP_DELAY_MASK 0x1fff0000 +#define PANEL_POWER_UP_DELAY_SHIFT 16 +#define PANEL_LIGHT_ON_DELAY_MASK 0x1fff +#define PANEL_LIGHT_ON_DELAY_SHIFT 0 + +#define _PP_OFF_DELAYS 0x6120C +#define PP_OFF_DELAYS(pps_idx) _MMIO_PPS(pps_idx, _PP_OFF_DELAYS) +#define PANEL_POWER_DOWN_DELAY_MASK 0x1fff0000 +#define PANEL_POWER_DOWN_DELAY_SHIFT 16 +#define PANEL_LIGHT_OFF_DELAY_MASK 0x1fff +#define PANEL_LIGHT_OFF_DELAY_SHIFT 0 + +#define _PP_DIVISOR 0x61210 +#define PP_DIVISOR(pps_idx) _MMIO_PPS(pps_idx, _PP_DIVISOR) +#define PP_REFERENCE_DIVIDER_MASK 0xffffff00 +#define PP_REFERENCE_DIVIDER_SHIFT 8 +#define PANEL_POWER_CYCLE_DELAY_MASK 0x1f +#define PANEL_POWER_CYCLE_DELAY_SHIFT 0 /* Panel fitting */ #define PFIT_CONTROL _MMIO(dev_priv->info.display_mmio_offset + 0x61230) @@ -6132,6 +6177,7 @@ enum { # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1<<10) | (1<<26)) # define GEN9_RHWO_OPTIMIZATION_DISABLE (1<<14) #define COMMON_SLICE_CHICKEN2 _MMIO(0x7014) +# define GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE (1<<12) # define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1<<8) # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE (1<<0) @@ -6748,77 +6794,6 @@ enum { #define PCH_LVDS _MMIO(0xe1180) #define LVDS_DETECTED (1 << 1) -/* vlv has 2 sets of panel control regs. */ -#define _PIPEA_PP_STATUS (VLV_DISPLAY_BASE + 0x61200) -#define _PIPEA_PP_CONTROL (VLV_DISPLAY_BASE + 0x61204) -#define _PIPEA_PP_ON_DELAYS (VLV_DISPLAY_BASE + 0x61208) -#define PANEL_PORT_SELECT_VLV(port) ((port) << 30) -#define _PIPEA_PP_OFF_DELAYS (VLV_DISPLAY_BASE + 0x6120c) -#define _PIPEA_PP_DIVISOR (VLV_DISPLAY_BASE + 0x61210) - -#define _PIPEB_PP_STATUS (VLV_DISPLAY_BASE + 0x61300) -#define _PIPEB_PP_CONTROL (VLV_DISPLAY_BASE + 0x61304) -#define _PIPEB_PP_ON_DELAYS (VLV_DISPLAY_BASE + 0x61308) -#define _PIPEB_PP_OFF_DELAYS (VLV_DISPLAY_BASE + 0x6130c) -#define _PIPEB_PP_DIVISOR (VLV_DISPLAY_BASE + 0x61310) - -#define VLV_PIPE_PP_STATUS(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_STATUS, _PIPEB_PP_STATUS) -#define VLV_PIPE_PP_CONTROL(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_CONTROL, _PIPEB_PP_CONTROL) -#define VLV_PIPE_PP_ON_DELAYS(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_ON_DELAYS, _PIPEB_PP_ON_DELAYS) -#define VLV_PIPE_PP_OFF_DELAYS(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_OFF_DELAYS, _PIPEB_PP_OFF_DELAYS) -#define VLV_PIPE_PP_DIVISOR(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_DIVISOR, _PIPEB_PP_DIVISOR) - -#define _PCH_PP_STATUS 0xc7200 -#define _PCH_PP_CONTROL 0xc7204 -#define PANEL_UNLOCK_REGS (0xabcd << 16) -#define PANEL_UNLOCK_MASK (0xffff << 16) -#define BXT_POWER_CYCLE_DELAY_MASK (0x1f0) -#define BXT_POWER_CYCLE_DELAY_SHIFT 4 -#define EDP_FORCE_VDD (1 << 3) -#define EDP_BLC_ENABLE (1 << 2) -#define PANEL_POWER_RESET (1 << 1) -#define PANEL_POWER_OFF (0 << 0) -#define PANEL_POWER_ON (1 << 0) -#define _PCH_PP_ON_DELAYS 0xc7208 -#define PANEL_PORT_SELECT_MASK (3 << 30) -#define PANEL_PORT_SELECT_LVDS (0 << 30) -#define PANEL_PORT_SELECT_DPA (1 << 30) -#define PANEL_PORT_SELECT_DPC (2 << 30) -#define PANEL_PORT_SELECT_DPD (3 << 30) -#define PANEL_POWER_UP_DELAY_MASK (0x1fff0000) -#define PANEL_POWER_UP_DELAY_SHIFT 16 -#define PANEL_LIGHT_ON_DELAY_MASK (0x1fff) -#define PANEL_LIGHT_ON_DELAY_SHIFT 0 - -#define _PCH_PP_OFF_DELAYS 0xc720c -#define PANEL_POWER_DOWN_DELAY_MASK (0x1fff0000) -#define PANEL_POWER_DOWN_DELAY_SHIFT 16 -#define PANEL_LIGHT_OFF_DELAY_MASK (0x1fff) -#define PANEL_LIGHT_OFF_DELAY_SHIFT 0 - -#define _PCH_PP_DIVISOR 0xc7210 -#define PP_REFERENCE_DIVIDER_MASK (0xffffff00) -#define PP_REFERENCE_DIVIDER_SHIFT 8 -#define PANEL_POWER_CYCLE_DELAY_MASK (0x1f) -#define PANEL_POWER_CYCLE_DELAY_SHIFT 0 - -#define PCH_PP_STATUS _MMIO(_PCH_PP_STATUS) -#define PCH_PP_CONTROL _MMIO(_PCH_PP_CONTROL) -#define PCH_PP_ON_DELAYS _MMIO(_PCH_PP_ON_DELAYS) -#define PCH_PP_OFF_DELAYS _MMIO(_PCH_PP_OFF_DELAYS) -#define PCH_PP_DIVISOR _MMIO(_PCH_PP_DIVISOR) - -/* BXT PPS changes - 2nd set of PPS registers */ -#define _BXT_PP_STATUS2 0xc7300 -#define _BXT_PP_CONTROL2 0xc7304 -#define _BXT_PP_ON_DELAYS2 0xc7308 -#define _BXT_PP_OFF_DELAYS2 0xc730c - -#define BXT_PP_STATUS(n) _MMIO_PIPE(n, _PCH_PP_STATUS, _BXT_PP_STATUS2) -#define BXT_PP_CONTROL(n) _MMIO_PIPE(n, _PCH_PP_CONTROL, _BXT_PP_CONTROL2) -#define BXT_PP_ON_DELAYS(n) _MMIO_PIPE(n, _PCH_PP_ON_DELAYS, _BXT_PP_ON_DELAYS2) -#define BXT_PP_OFF_DELAYS(n) _MMIO_PIPE(n, _PCH_PP_OFF_DELAYS, _BXT_PP_OFF_DELAYS2) - #define _PCH_DP_B 0xe4100 #define PCH_DP_B _MMIO(_PCH_DP_B) #define _PCH_DPB_AUX_CH_CTL 0xe4110 @@ -6958,6 +6933,9 @@ enum { #define ECOBUS _MMIO(0xa180) #define FORCEWAKE_MT_ENABLE (1<<5) #define VLV_SPAREG2H _MMIO(0xA194) +#define GEN9_PWRGT_DOMAIN_STATUS _MMIO(0xA2A0) +#define GEN9_PWRGT_MEDIA_STATUS_MASK (1 << 0) +#define GEN9_PWRGT_RENDER_STATUS_MASK (1 << 1) #define GTFIFODBG _MMIO(0x120000) #define GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV (0x1f << 20) @@ -7058,12 +7036,13 @@ enum { #define GEN6_RP_UP_THRESHOLD _MMIO(0xA02C) #define GEN6_RP_DOWN_THRESHOLD _MMIO(0xA030) #define GEN6_RP_CUR_UP_EI _MMIO(0xA050) -#define GEN6_CURICONT_MASK 0xffffff +#define GEN6_RP_EI_MASK 0xffffff +#define GEN6_CURICONT_MASK GEN6_RP_EI_MASK #define GEN6_RP_CUR_UP _MMIO(0xA054) -#define GEN6_CURBSYTAVG_MASK 0xffffff +#define GEN6_CURBSYTAVG_MASK GEN6_RP_EI_MASK #define GEN6_RP_PREV_UP _MMIO(0xA058) #define GEN6_RP_CUR_DOWN_EI _MMIO(0xA05C) -#define GEN6_CURIAVG_MASK 0xffffff +#define GEN6_CURIAVG_MASK GEN6_RP_EI_MASK #define GEN6_RP_CUR_DOWN _MMIO(0xA060) #define GEN6_RP_PREV_DOWN _MMIO(0xA064) #define GEN6_RP_UP_EI _MMIO(0xA068) @@ -7088,7 +7067,7 @@ enum { #define VLV_RCEDATA _MMIO(0xA0BC) #define GEN6_RC6pp_THRESHOLD _MMIO(0xA0C0) #define GEN6_PMINTRMSK _MMIO(0xA168) -#define GEN8_PMINTR_REDIRECT_TO_NON_DISP (1<<31) +#define GEN8_PMINTR_REDIRECT_TO_GUC (1<<31) #define GEN8_MISC_CTRL0 _MMIO(0xA180) #define VLV_PWRDWNUPCTL _MMIO(0xA294) #define GEN9_MEDIA_PG_IDLE_HYSTERESIS _MMIO(0xA0C4) @@ -7144,6 +7123,15 @@ enum { #define GEN6_PCODE_MAILBOX _MMIO(0x138124) #define GEN6_PCODE_READY (1<<31) +#define GEN6_PCODE_ERROR_MASK 0xFF +#define GEN6_PCODE_SUCCESS 0x0 +#define GEN6_PCODE_ILLEGAL_CMD 0x1 +#define GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x2 +#define GEN6_PCODE_TIMEOUT 0x3 +#define GEN6_PCODE_UNIMPLEMENTED_CMD 0xFF +#define GEN7_PCODE_TIMEOUT 0x2 +#define GEN7_PCODE_ILLEGAL_DATA 0x3 +#define GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x10 #define GEN6_PCODE_WRITE_RC6VIDS 0x4 #define GEN6_PCODE_READ_RC6VIDS 0x5 #define GEN6_ENCODE_RC6_VID(mv) (((mv) - 245) / 5) @@ -7165,6 +7153,10 @@ enum { #define HSW_PCODE_DE_WRITE_FREQ_REQ 0x17 #define DISPLAY_IPS_CONTROL 0x19 #define HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL 0x1A +#define GEN9_PCODE_SAGV_CONTROL 0x21 +#define GEN9_SAGV_DISABLE 0x0 +#define GEN9_SAGV_IS_DISABLED 0x1 +#define GEN9_SAGV_ENABLE 0x3 #define GEN6_PCODE_DATA _MMIO(0x138128) #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16 @@ -7485,6 +7477,7 @@ enum { #define _DDI_BUF_TRANS_A 0x64E00 #define _DDI_BUF_TRANS_B 0x64E60 #define DDI_BUF_TRANS_LO(port, i) _MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8) +#define DDI_BUF_BALANCE_LEG_ENABLE (1 << 31) #define DDI_BUF_TRANS_HI(port, i) _MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8 + 4) /* Sideband Interface (SBI) is programmed indirectly, via diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 5cfe4c7716b4..a0af170062b1 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -37,25 +37,6 @@ static void i915_save_display(struct drm_device *dev) if (INTEL_INFO(dev)->gen <= 4) dev_priv->regfile.saveDSPARB = I915_READ(DSPARB); - /* LVDS state */ - if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) - dev_priv->regfile.saveLVDS = I915_READ(PCH_LVDS); - else if (INTEL_INFO(dev)->gen <= 4 && IS_MOBILE(dev) && !IS_I830(dev)) - dev_priv->regfile.saveLVDS = I915_READ(LVDS); - - /* Panel power sequencer */ - if (HAS_PCH_SPLIT(dev)) { - dev_priv->regfile.savePP_CONTROL = I915_READ(PCH_PP_CONTROL); - dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PCH_PP_ON_DELAYS); - dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PCH_PP_OFF_DELAYS); - dev_priv->regfile.savePP_DIVISOR = I915_READ(PCH_PP_DIVISOR); - } else if (INTEL_INFO(dev)->gen <= 4) { - dev_priv->regfile.savePP_CONTROL = I915_READ(PP_CONTROL); - dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PP_ON_DELAYS); - dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PP_OFF_DELAYS); - dev_priv->regfile.savePP_DIVISOR = I915_READ(PP_DIVISOR); - } - /* save FBC interval */ if (HAS_FBC(dev) && INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev)) dev_priv->regfile.saveFBC_CONTROL = I915_READ(FBC_CONTROL); @@ -64,33 +45,11 @@ static void i915_save_display(struct drm_device *dev) static void i915_restore_display(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - u32 mask = 0xffffffff; /* Display arbitration */ if (INTEL_INFO(dev)->gen <= 4) I915_WRITE(DSPARB, dev_priv->regfile.saveDSPARB); - mask = ~LVDS_PORT_EN; - - /* LVDS state */ - if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) - I915_WRITE(PCH_LVDS, dev_priv->regfile.saveLVDS & mask); - else if (INTEL_INFO(dev)->gen <= 4 && IS_MOBILE(dev) && !IS_I830(dev)) - I915_WRITE(LVDS, dev_priv->regfile.saveLVDS & mask); - - /* Panel power sequencer */ - if (HAS_PCH_SPLIT(dev)) { - I915_WRITE(PCH_PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS); - I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS); - I915_WRITE(PCH_PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR); - I915_WRITE(PCH_PP_CONTROL, dev_priv->regfile.savePP_CONTROL); - } else if (INTEL_INFO(dev)->gen <= 4) { - I915_WRITE(PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS); - I915_WRITE(PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS); - I915_WRITE(PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR); - I915_WRITE(PP_CONTROL, dev_priv->regfile.savePP_CONTROL); - } - /* only restore FBC info on the platform that supports FBC*/ intel_fbc_global_disable(dev_priv); @@ -104,6 +63,7 @@ static void i915_restore_display(struct drm_device *dev) int i915_save_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; int i; mutex_lock(&dev->struct_mutex); @@ -111,7 +71,7 @@ int i915_save_state(struct drm_device *dev) i915_save_display(dev); if (IS_GEN4(dev)) - pci_read_config_word(dev->pdev, GCDGMBUS, + pci_read_config_word(pdev, GCDGMBUS, &dev_priv->regfile.saveGCDGMBUS); /* Cache mode state */ @@ -149,6 +109,7 @@ int i915_save_state(struct drm_device *dev) int i915_restore_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; int i; mutex_lock(&dev->struct_mutex); @@ -156,7 +117,7 @@ int i915_restore_state(struct drm_device *dev) i915_gem_restore_fences(dev); if (IS_GEN4(dev)) - pci_write_config_word(dev->pdev, GCDGMBUS, + pci_write_config_word(pdev, GCDGMBUS, dev_priv->regfile.saveGCDGMBUS); i915_restore_display(dev); diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c new file mode 100644 index 000000000000..1e5cbc585ca2 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -0,0 +1,362 @@ +/* + * (C) Copyright 2016 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <linux/slab.h> +#include <linux/fence.h> +#include <linux/reservation.h> + +#include "i915_sw_fence.h" + +static DEFINE_SPINLOCK(i915_sw_fence_lock); + +static int __i915_sw_fence_notify(struct i915_sw_fence *fence, + enum i915_sw_fence_notify state) +{ + i915_sw_fence_notify_t fn; + + fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK); + return fn(fence, state); +} + +static void i915_sw_fence_free(struct kref *kref) +{ + struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref); + + WARN_ON(atomic_read(&fence->pending) > 0); + + if (fence->flags & I915_SW_FENCE_MASK) + __i915_sw_fence_notify(fence, FENCE_FREE); + else + kfree(fence); +} + +static void i915_sw_fence_put(struct i915_sw_fence *fence) +{ + kref_put(&fence->kref, i915_sw_fence_free); +} + +static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence) +{ + kref_get(&fence->kref); + return fence; +} + +static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence, + struct list_head *continuation) +{ + wait_queue_head_t *x = &fence->wait; + wait_queue_t *pos, *next; + unsigned long flags; + + atomic_set_release(&fence->pending, -1); /* 0 -> -1 [done] */ + + /* + * To prevent unbounded recursion as we traverse the graph of + * i915_sw_fences, we move the task_list from this, the next ready + * fence, to the tail of the original fence's task_list + * (and so added to the list to be woken). + */ + + spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation); + if (continuation) { + list_for_each_entry_safe(pos, next, &x->task_list, task_list) { + if (pos->func == autoremove_wake_function) + pos->func(pos, TASK_NORMAL, 0, continuation); + else + list_move_tail(&pos->task_list, continuation); + } + } else { + LIST_HEAD(extra); + + do { + list_for_each_entry_safe(pos, next, + &x->task_list, task_list) + pos->func(pos, TASK_NORMAL, 0, &extra); + + if (list_empty(&extra)) + break; + + list_splice_tail_init(&extra, &x->task_list); + } while (1); + } + spin_unlock_irqrestore(&x->lock, flags); +} + +static void __i915_sw_fence_complete(struct i915_sw_fence *fence, + struct list_head *continuation) +{ + if (!atomic_dec_and_test(&fence->pending)) + return; + + if (fence->flags & I915_SW_FENCE_MASK && + __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE) + return; + + __i915_sw_fence_wake_up_all(fence, continuation); +} + +static void i915_sw_fence_complete(struct i915_sw_fence *fence) +{ + if (WARN_ON(i915_sw_fence_done(fence))) + return; + + __i915_sw_fence_complete(fence, NULL); +} + +static void i915_sw_fence_await(struct i915_sw_fence *fence) +{ + WARN_ON(atomic_inc_return(&fence->pending) <= 1); +} + +void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn) +{ + BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK); + + init_waitqueue_head(&fence->wait); + kref_init(&fence->kref); + atomic_set(&fence->pending, 1); + fence->flags = (unsigned long)fn; +} + +void i915_sw_fence_commit(struct i915_sw_fence *fence) +{ + i915_sw_fence_complete(fence); + i915_sw_fence_put(fence); +} + +static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key) +{ + list_del(&wq->task_list); + __i915_sw_fence_complete(wq->private, key); + i915_sw_fence_put(wq->private); + return 0; +} + +static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence, + const struct i915_sw_fence * const signaler) +{ + wait_queue_t *wq; + + if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags)) + return false; + + if (fence == signaler) + return true; + + list_for_each_entry(wq, &fence->wait.task_list, task_list) { + if (wq->func != i915_sw_fence_wake) + continue; + + if (__i915_sw_fence_check_if_after(wq->private, signaler)) + return true; + } + + return false; +} + +static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence) +{ + wait_queue_t *wq; + + if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags)) + return; + + list_for_each_entry(wq, &fence->wait.task_list, task_list) { + if (wq->func != i915_sw_fence_wake) + continue; + + __i915_sw_fence_clear_checked_bit(wq->private); + } +} + +static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence, + const struct i915_sw_fence * const signaler) +{ + unsigned long flags; + bool err; + + if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG)) + return false; + + spin_lock_irqsave(&i915_sw_fence_lock, flags); + err = __i915_sw_fence_check_if_after(fence, signaler); + __i915_sw_fence_clear_checked_bit(fence); + spin_unlock_irqrestore(&i915_sw_fence_lock, flags); + + return err; +} + +int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, + struct i915_sw_fence *signaler, + wait_queue_t *wq) +{ + unsigned long flags; + int pending; + + if (i915_sw_fence_done(signaler)) + return 0; + + /* The dependency graph must be acyclic. */ + if (unlikely(i915_sw_fence_check_if_after(fence, signaler))) + return -EINVAL; + + INIT_LIST_HEAD(&wq->task_list); + wq->flags = 0; + wq->func = i915_sw_fence_wake; + wq->private = i915_sw_fence_get(fence); + + i915_sw_fence_await(fence); + + spin_lock_irqsave(&signaler->wait.lock, flags); + if (likely(!i915_sw_fence_done(signaler))) { + __add_wait_queue_tail(&signaler->wait, wq); + pending = 1; + } else { + i915_sw_fence_wake(wq, 0, 0, NULL); + pending = 0; + } + spin_unlock_irqrestore(&signaler->wait.lock, flags); + + return pending; +} + +struct dma_fence_cb { + struct fence_cb base; + struct i915_sw_fence *fence; + struct fence *dma; + struct timer_list timer; +}; + +static void timer_i915_sw_fence_wake(unsigned long data) +{ + struct dma_fence_cb *cb = (struct dma_fence_cb *)data; + + printk(KERN_WARNING "asynchronous wait on fence %s:%s:%x timed out\n", + cb->dma->ops->get_driver_name(cb->dma), + cb->dma->ops->get_timeline_name(cb->dma), + cb->dma->seqno); + fence_put(cb->dma); + cb->dma = NULL; + + i915_sw_fence_commit(cb->fence); + cb->timer.function = NULL; +} + +static void dma_i915_sw_fence_wake(struct fence *dma, struct fence_cb *data) +{ + struct dma_fence_cb *cb = container_of(data, typeof(*cb), base); + + del_timer_sync(&cb->timer); + if (cb->timer.function) + i915_sw_fence_commit(cb->fence); + fence_put(cb->dma); + + kfree(cb); +} + +int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, + struct fence *dma, + unsigned long timeout, + gfp_t gfp) +{ + struct dma_fence_cb *cb; + int ret; + + if (fence_is_signaled(dma)) + return 0; + + cb = kmalloc(sizeof(*cb), gfp); + if (!cb) { + if (!gfpflags_allow_blocking(gfp)) + return -ENOMEM; + + return fence_wait(dma, false); + } + + cb->fence = i915_sw_fence_get(fence); + i915_sw_fence_await(fence); + + cb->dma = NULL; + __setup_timer(&cb->timer, + timer_i915_sw_fence_wake, (unsigned long)cb, + TIMER_IRQSAFE); + if (timeout) { + cb->dma = fence_get(dma); + mod_timer(&cb->timer, round_jiffies_up(jiffies + timeout)); + } + + ret = fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake); + if (ret == 0) { + ret = 1; + } else { + dma_i915_sw_fence_wake(dma, &cb->base); + if (ret == -ENOENT) /* fence already signaled */ + ret = 0; + } + + return ret; +} + +int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, + struct reservation_object *resv, + const struct fence_ops *exclude, + bool write, + unsigned long timeout, + gfp_t gfp) +{ + struct fence *excl; + int ret = 0, pending; + + if (write) { + struct fence **shared; + unsigned int count, i; + + ret = reservation_object_get_fences_rcu(resv, + &excl, &count, &shared); + if (ret) + return ret; + + for (i = 0; i < count; i++) { + if (shared[i]->ops == exclude) + continue; + + pending = i915_sw_fence_await_dma_fence(fence, + shared[i], + timeout, + gfp); + if (pending < 0) { + ret = pending; + break; + } + + ret |= pending; + } + + for (i = 0; i < count; i++) + fence_put(shared[i]); + kfree(shared); + } else { + excl = reservation_object_get_excl_rcu(resv); + } + + if (ret >= 0 && excl && excl->ops != exclude) { + pending = i915_sw_fence_await_dma_fence(fence, + excl, + timeout, + gfp); + if (pending < 0) + ret = pending; + else + ret |= pending; + } + + fence_put(excl); + + return ret; +} diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h new file mode 100644 index 000000000000..373141602ca4 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -0,0 +1,65 @@ +/* + * i915_sw_fence.h - library routines for N:M synchronisation points + * + * Copyright (C) 2016 Intel Corporation + * + * This file is released under the GPLv2. + * + */ + +#ifndef _I915_SW_FENCE_H_ +#define _I915_SW_FENCE_H_ + +#include <linux/gfp.h> +#include <linux/kref.h> +#include <linux/notifier.h> /* for NOTIFY_DONE */ +#include <linux/wait.h> + +struct completion; +struct fence; +struct fence_ops; +struct reservation_object; + +struct i915_sw_fence { + wait_queue_head_t wait; + unsigned long flags; + struct kref kref; + atomic_t pending; +}; + +#define I915_SW_FENCE_CHECKED_BIT 0 /* used internally for DAG checking */ +#define I915_SW_FENCE_PRIVATE_BIT 1 /* available for use by owner */ +#define I915_SW_FENCE_MASK (~3) + +enum i915_sw_fence_notify { + FENCE_COMPLETE, + FENCE_FREE +}; + +typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *, + enum i915_sw_fence_notify state); +#define __i915_sw_fence_call __aligned(4) + +void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn); +void i915_sw_fence_commit(struct i915_sw_fence *fence); + +int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, + struct i915_sw_fence *after, + wait_queue_t *wq); +int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, + struct fence *dma, + unsigned long timeout, + gfp_t gfp); +int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, + struct reservation_object *resv, + const struct fence_ops *exclude, + bool write, + unsigned long timeout, + gfp_t gfp); + +static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence) +{ + return atomic_read(&fence->pending) < 0; +} + +#endif /* _I915_SW_FENCE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index d61829e54f93..1012eeea1324 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -32,13 +32,16 @@ #include "intel_drv.h" #include "i915_drv.h" -#define dev_to_drm_minor(d) dev_get_drvdata((d)) +static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) +{ + struct drm_minor *minor = dev_get_drvdata(kdev); + return to_i915(minor->dev); +} #ifdef CONFIG_PM -static u32 calc_residency(struct drm_device *dev, +static u32 calc_residency(struct drm_i915_private *dev_priv, i915_reg_t reg) { - struct drm_i915_private *dev_priv = to_i915(dev); u64 raw_time; /* 32b value may overflow during fixed point math */ u64 units = 128ULL, div = 100000ULL; u32 ret; @@ -49,13 +52,13 @@ static u32 calc_residency(struct drm_device *dev, intel_runtime_pm_get(dev_priv); /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ - if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { units = 1; div = dev_priv->czclk_freq; if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) units <<= 8; - } else if (IS_BROXTON(dev)) { + } else if (IS_BROXTON(dev_priv)) { units = 1; div = 1200; /* 833.33ns */ } @@ -76,32 +79,32 @@ show_rc6_mask(struct device *kdev, struct device_attribute *attr, char *buf) static ssize_t show_rc6_ms(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_minor *dminor = dev_get_drvdata(kdev); - u32 rc6_residency = calc_residency(dminor->dev, GEN6_GT_GFX_RC6); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + u32 rc6_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6); return snprintf(buf, PAGE_SIZE, "%u\n", rc6_residency); } static ssize_t show_rc6p_ms(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_minor *dminor = dev_to_drm_minor(kdev); - u32 rc6p_residency = calc_residency(dminor->dev, GEN6_GT_GFX_RC6p); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + u32 rc6p_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6p); return snprintf(buf, PAGE_SIZE, "%u\n", rc6p_residency); } static ssize_t show_rc6pp_ms(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_minor *dminor = dev_to_drm_minor(kdev); - u32 rc6pp_residency = calc_residency(dminor->dev, GEN6_GT_GFX_RC6pp); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + u32 rc6pp_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6pp); return snprintf(buf, PAGE_SIZE, "%u\n", rc6pp_residency); } static ssize_t show_media_rc6_ms(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_minor *dminor = dev_get_drvdata(kdev); - u32 rc6_residency = calc_residency(dminor->dev, VLV_GT_MEDIA_RC6); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + u32 rc6_residency = calc_residency(dev_priv, VLV_GT_MEDIA_RC6); return snprintf(buf, PAGE_SIZE, "%u\n", rc6_residency); } @@ -144,9 +147,9 @@ static struct attribute_group media_rc6_attr_group = { }; #endif -static int l3_access_valid(struct drm_device *dev, loff_t offset) +static int l3_access_valid(struct drm_i915_private *dev_priv, loff_t offset) { - if (!HAS_L3_DPF(dev)) + if (!HAS_L3_DPF(dev_priv)) return -EPERM; if (offset % 4 != 0) @@ -163,22 +166,21 @@ i915_l3_read(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t offset, size_t count) { - struct device *dev = kobj_to_dev(kobj); - struct drm_minor *dminor = dev_to_drm_minor(dev); - struct drm_device *drm_dev = dminor->dev; - struct drm_i915_private *dev_priv = to_i915(drm_dev); + struct device *kdev = kobj_to_dev(kobj); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct drm_device *dev = &dev_priv->drm; int slice = (int)(uintptr_t)attr->private; int ret; count = round_down(count, 4); - ret = l3_access_valid(drm_dev, offset); + ret = l3_access_valid(dev_priv, offset); if (ret) return ret; count = min_t(size_t, GEN7_L3LOG_SIZE - offset, count); - ret = i915_mutex_lock_interruptible(drm_dev); + ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; @@ -189,7 +191,7 @@ i915_l3_read(struct file *filp, struct kobject *kobj, else memset(buf, 0, count); - mutex_unlock(&drm_dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); return count; } @@ -199,30 +201,29 @@ i915_l3_write(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t offset, size_t count) { - struct device *dev = kobj_to_dev(kobj); - struct drm_minor *dminor = dev_to_drm_minor(dev); - struct drm_device *drm_dev = dminor->dev; - struct drm_i915_private *dev_priv = to_i915(drm_dev); + struct device *kdev = kobj_to_dev(kobj); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct drm_device *dev = &dev_priv->drm; struct i915_gem_context *ctx; u32 *temp = NULL; /* Just here to make handling failures easy */ int slice = (int)(uintptr_t)attr->private; int ret; - if (!HAS_HW_CONTEXTS(drm_dev)) + if (!HAS_HW_CONTEXTS(dev_priv)) return -ENXIO; - ret = l3_access_valid(drm_dev, offset); + ret = l3_access_valid(dev_priv, offset); if (ret) return ret; - ret = i915_mutex_lock_interruptible(drm_dev); + ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; if (!dev_priv->l3_parity.remap_info[slice]) { temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); if (!temp) { - mutex_unlock(&drm_dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); return -ENOMEM; } } @@ -240,7 +241,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj, list_for_each_entry(ctx, &dev_priv->context_list, link) ctx->remap_slice |= (1<<slice); - mutex_unlock(&drm_dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); return count; } @@ -266,13 +267,9 @@ static struct bin_attribute dpf_attrs_1 = { static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_minor *minor = dev_to_drm_minor(kdev); - struct drm_device *dev = minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); int ret; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->rps.hw_lock); @@ -300,59 +297,70 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, static ssize_t gt_cur_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_minor *minor = dev_to_drm_minor(kdev); - struct drm_device *dev = minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.cur_freq)); +} - flush_delayed_work(&dev_priv->rps.delayed_resume_work); +static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) +{ + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - intel_runtime_pm_get(dev_priv); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.boost_freq)); +} + +static ssize_t gt_boost_freq_mhz_store(struct device *kdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + u32 val; + ssize_t ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + /* Validate against (static) hardware limits */ + val = intel_freq_opcode(dev_priv, val); + if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq) + return -EINVAL; mutex_lock(&dev_priv->rps.hw_lock); - ret = intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq); + dev_priv->rps.boost_freq = val; mutex_unlock(&dev_priv->rps.hw_lock); - intel_runtime_pm_put(dev_priv); - - return snprintf(buf, PAGE_SIZE, "%d\n", ret); + return count; } static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_minor *minor = dev_to_drm_minor(kdev); - struct drm_device *dev = minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - return snprintf(buf, PAGE_SIZE, - "%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.efficient_freq)); } static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_minor *minor = dev_to_drm_minor(kdev); - struct drm_device *dev = minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - - return snprintf(buf, PAGE_SIZE, "%d\n", ret); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.max_freq_softlimit)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, struct device_attribute *attr, const char *buf, size_t count) { - struct drm_minor *minor = dev_to_drm_minor(kdev); - struct drm_device *dev = minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); u32 val; ssize_t ret; @@ -360,8 +368,6 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, if (ret) return ret; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->rps.hw_lock); @@ -400,27 +406,18 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_minor *minor = dev_to_drm_minor(kdev); - struct drm_device *dev = minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - flush_delayed_work(&dev_priv->rps.delayed_resume_work); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - mutex_lock(&dev_priv->rps.hw_lock); - ret = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - - return snprintf(buf, PAGE_SIZE, "%d\n", ret); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.min_freq_softlimit)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, struct device_attribute *attr, const char *buf, size_t count) { - struct drm_minor *minor = dev_to_drm_minor(kdev); - struct drm_device *dev = minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); u32 val; ssize_t ret; @@ -428,8 +425,6 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, if (ret) return ret; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->rps.hw_lock); @@ -465,6 +460,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); static DEVICE_ATTR(gt_cur_freq_mhz, S_IRUGO, gt_cur_freq_mhz_show, NULL); +static DEVICE_ATTR(gt_boost_freq_mhz, S_IRUGO, gt_boost_freq_mhz_show, gt_boost_freq_mhz_store); static DEVICE_ATTR(gt_max_freq_mhz, S_IRUGO | S_IWUSR, gt_max_freq_mhz_show, gt_max_freq_mhz_store); static DEVICE_ATTR(gt_min_freq_mhz, S_IRUGO | S_IWUSR, gt_min_freq_mhz_show, gt_min_freq_mhz_store); @@ -478,9 +474,7 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); /* For now we have a static number of RP states */ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_minor *minor = dev_to_drm_minor(kdev); - struct drm_device *dev = minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) @@ -498,6 +492,7 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr static const struct attribute *gen6_attrs[] = { &dev_attr_gt_act_freq_mhz.attr, &dev_attr_gt_cur_freq_mhz.attr, + &dev_attr_gt_boost_freq_mhz.attr, &dev_attr_gt_max_freq_mhz.attr, &dev_attr_gt_min_freq_mhz.attr, &dev_attr_gt_RP0_freq_mhz.attr, @@ -509,6 +504,7 @@ static const struct attribute *gen6_attrs[] = { static const struct attribute *vlv_attrs[] = { &dev_attr_gt_act_freq_mhz.attr, &dev_attr_gt_cur_freq_mhz.attr, + &dev_attr_gt_boost_freq_mhz.attr, &dev_attr_gt_max_freq_mhz.attr, &dev_attr_gt_min_freq_mhz.attr, &dev_attr_gt_RP0_freq_mhz.attr, @@ -524,8 +520,8 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, { struct device *kdev = kobj_to_dev(kobj); - struct drm_minor *minor = dev_to_drm_minor(kdev); - struct drm_device *dev = minor->dev; + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct drm_device *dev = &dev_priv->drm; struct i915_error_state_file_priv error_priv; struct drm_i915_error_state_buf error_str; ssize_t ret_count = 0; @@ -559,18 +555,10 @@ static ssize_t error_state_write(struct file *file, struct kobject *kobj, loff_t off, size_t count) { struct device *kdev = kobj_to_dev(kobj); - struct drm_minor *minor = dev_to_drm_minor(kdev); - struct drm_device *dev = minor->dev; - int ret; + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); DRM_DEBUG_DRIVER("Resetting error state\n"); - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - i915_destroy_error_state(dev); - mutex_unlock(&dev->struct_mutex); + i915_destroy_error_state(&dev_priv->drm); return count; } @@ -583,37 +571,38 @@ static struct bin_attribute error_state_attr = { .write = error_state_write, }; -void i915_setup_sysfs(struct drm_device *dev) +void i915_setup_sysfs(struct drm_i915_private *dev_priv) { + struct device *kdev = dev_priv->drm.primary->kdev; int ret; #ifdef CONFIG_PM - if (HAS_RC6(dev)) { - ret = sysfs_merge_group(&dev->primary->kdev->kobj, + if (HAS_RC6(dev_priv)) { + ret = sysfs_merge_group(&kdev->kobj, &rc6_attr_group); if (ret) DRM_ERROR("RC6 residency sysfs setup failed\n"); } - if (HAS_RC6p(dev)) { - ret = sysfs_merge_group(&dev->primary->kdev->kobj, + if (HAS_RC6p(dev_priv)) { + ret = sysfs_merge_group(&kdev->kobj, &rc6p_attr_group); if (ret) DRM_ERROR("RC6p residency sysfs setup failed\n"); } - if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { - ret = sysfs_merge_group(&dev->primary->kdev->kobj, + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + ret = sysfs_merge_group(&kdev->kobj, &media_rc6_attr_group); if (ret) DRM_ERROR("Media RC6 residency sysfs setup failed\n"); } #endif - if (HAS_L3_DPF(dev)) { - ret = device_create_bin_file(dev->primary->kdev, &dpf_attrs); + if (HAS_L3_DPF(dev_priv)) { + ret = device_create_bin_file(kdev, &dpf_attrs); if (ret) DRM_ERROR("l3 parity sysfs setup failed\n"); - if (NUM_L3_SLICES(dev) > 1) { - ret = device_create_bin_file(dev->primary->kdev, + if (NUM_L3_SLICES(dev_priv) > 1) { + ret = device_create_bin_file(kdev, &dpf_attrs_1); if (ret) DRM_ERROR("l3 parity slice 1 setup failed\n"); @@ -621,30 +610,32 @@ void i915_setup_sysfs(struct drm_device *dev) } ret = 0; - if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) - ret = sysfs_create_files(&dev->primary->kdev->kobj, vlv_attrs); - else if (INTEL_INFO(dev)->gen >= 6) - ret = sysfs_create_files(&dev->primary->kdev->kobj, gen6_attrs); + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + ret = sysfs_create_files(&kdev->kobj, vlv_attrs); + else if (INTEL_GEN(dev_priv) >= 6) + ret = sysfs_create_files(&kdev->kobj, gen6_attrs); if (ret) DRM_ERROR("RPS sysfs setup failed\n"); - ret = sysfs_create_bin_file(&dev->primary->kdev->kobj, + ret = sysfs_create_bin_file(&kdev->kobj, &error_state_attr); if (ret) DRM_ERROR("error_state sysfs setup failed\n"); } -void i915_teardown_sysfs(struct drm_device *dev) +void i915_teardown_sysfs(struct drm_i915_private *dev_priv) { - sysfs_remove_bin_file(&dev->primary->kdev->kobj, &error_state_attr); - if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) - sysfs_remove_files(&dev->primary->kdev->kobj, vlv_attrs); + struct device *kdev = dev_priv->drm.primary->kdev; + + sysfs_remove_bin_file(&kdev->kobj, &error_state_attr); + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + sysfs_remove_files(&kdev->kobj, vlv_attrs); else - sysfs_remove_files(&dev->primary->kdev->kobj, gen6_attrs); - device_remove_bin_file(dev->primary->kdev, &dpf_attrs_1); - device_remove_bin_file(dev->primary->kdev, &dpf_attrs); + sysfs_remove_files(&kdev->kobj, gen6_attrs); + device_remove_bin_file(kdev, &dpf_attrs_1); + device_remove_bin_file(kdev, &dpf_attrs); #ifdef CONFIG_PM - sysfs_unmerge_group(&dev->primary->kdev->kobj, &rc6_attr_group); - sysfs_unmerge_group(&dev->primary->kdev->kobj, &rc6p_attr_group); + sysfs_unmerge_group(&kdev->kobj, &rc6_attr_group); + sysfs_unmerge_group(&kdev->kobj, &rc6p_attr_group); #endif } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 534154e05fbe..178798002a73 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -394,25 +394,27 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy, ); TRACE_EVENT(i915_gem_evict, - TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags), - TP_ARGS(dev, size, align, flags), + TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned int flags), + TP_ARGS(vm, size, align, flags), TP_STRUCT__entry( __field(u32, dev) + __field(struct i915_address_space *, vm) __field(u32, size) __field(u32, align) - __field(unsigned, flags) + __field(unsigned int, flags) ), TP_fast_assign( - __entry->dev = dev->primary->index; + __entry->dev = vm->dev->primary->index; + __entry->vm = vm; __entry->size = size; __entry->align = align; __entry->flags = flags; ), - TP_printk("dev=%d, size=%d, align=%d %s", - __entry->dev, __entry->size, __entry->align, + TP_printk("dev=%d, vm=%p, size=%d, align=%d %s", + __entry->dev, __entry->vm, __entry->size, __entry->align, __entry->flags & PIN_MAPPABLE ? ", mappable" : "") ); @@ -449,10 +451,9 @@ TRACE_EVENT(i915_gem_evict_vm, ); TRACE_EVENT(i915_gem_ring_sync_to, - TP_PROTO(struct drm_i915_gem_request *to_req, - struct intel_engine_cs *from, - struct drm_i915_gem_request *req), - TP_ARGS(to_req, from, req), + TP_PROTO(struct drm_i915_gem_request *to, + struct drm_i915_gem_request *from), + TP_ARGS(to, from), TP_STRUCT__entry( __field(u32, dev) @@ -463,9 +464,9 @@ TRACE_EVENT(i915_gem_ring_sync_to, TP_fast_assign( __entry->dev = from->i915->drm.primary->index; - __entry->sync_from = from->id; - __entry->sync_to = to_req->engine->id; - __entry->seqno = i915_gem_request_get_seqno(req); + __entry->sync_from = from->engine->id; + __entry->sync_to = to->engine->id; + __entry->seqno = from->fence.seqno; ), TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u", @@ -488,9 +489,9 @@ TRACE_EVENT(i915_gem_ring_dispatch, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->seqno; + __entry->seqno = req->fence.seqno; __entry->flags = flags; - intel_engine_enable_signaling(req); + fence_enable_sw_signaling(&req->fence); ), TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", @@ -533,7 +534,7 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->seqno; + __entry->seqno = req->fence.seqno; ), TP_printk("dev=%u, ring=%u, seqno=%u", @@ -595,7 +596,7 @@ TRACE_EVENT(i915_gem_request_wait_begin, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->seqno; + __entry->seqno = req->fence.seqno; __entry->blocking = mutex_is_locked(&req->i915->drm.struct_mutex); ), diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index f6acb5a0e701..dae340cfc6c7 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -65,9 +65,6 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv) BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); - if (!IS_HASWELL(dev_priv)) - return; - magic = __raw_i915_read64(dev_priv, vgtif_reg(magic)); if (magic != VGT_MAGIC) return; @@ -97,6 +94,7 @@ static struct _balloon_info_ bl_info; /** * intel_vgt_deballoon - deballoon reserved graphics address trunks + * @dev_priv: i915 device private data * * This function is called to deallocate the ballooned-out graphic memory, when * driver is unloaded or when ballooning fails. @@ -138,7 +136,7 @@ static int vgt_balloon_space(struct drm_mm *mm, /** * intel_vgt_balloon - balloon out reserved graphics address trunks - * @dev: drm device + * @dev_priv: i915 device private data * * This function is called at the initialization stage, to balloon out the * graphic address space allocated to other vGPUs, by marking these spaces as @@ -155,27 +153,27 @@ static int vgt_balloon_space(struct drm_mm *mm, * host point of view, the graphic address space is partitioned by multiple * vGPUs in different VMs. :: * - * vGPU1 view Host view - * 0 ------> +-----------+ +-----------+ - * ^ |###########| | vGPU3 | - * | |###########| +-----------+ - * | |###########| | vGPU2 | - * | +-----------+ +-----------+ - * mappable GM | available | ==> | vGPU1 | - * | +-----------+ +-----------+ - * | |###########| | | - * v |###########| | Host | - * +=======+===========+ +===========+ - * ^ |###########| | vGPU3 | - * | |###########| +-----------+ - * | |###########| | vGPU2 | - * | +-----------+ +-----------+ - * unmappable GM | available | ==> | vGPU1 | - * | +-----------+ +-----------+ - * | |###########| | | - * | |###########| | Host | - * v |###########| | | - * total GM size ------> +-----------+ +-----------+ + * vGPU1 view Host view + * 0 ------> +-----------+ +-----------+ + * ^ |###########| | vGPU3 | + * | |###########| +-----------+ + * | |###########| | vGPU2 | + * | +-----------+ +-----------+ + * mappable GM | available | ==> | vGPU1 | + * | +-----------+ +-----------+ + * | |###########| | | + * v |###########| | Host | + * +=======+===========+ +===========+ + * ^ |###########| | vGPU3 | + * | |###########| +-----------+ + * | |###########| | vGPU2 | + * | +-----------+ +-----------+ + * unmappable GM | available | ==> | vGPU1 | + * | +-----------+ +-----------+ + * | |###########| | | + * | |###########| | Host | + * v |###########| | | + * total GM size ------> +-----------+ +-----------+ * * Returns: * zero on success, non-zero if configuration invalid or ballooning failed diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 7de7721f65bc..b82de3072d4f 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -55,7 +55,7 @@ intel_create_plane_state(struct drm_plane *plane) return NULL; state->base.plane = plane; - state->base.rotation = BIT(DRM_ROTATE_0); + state->base.rotation = DRM_ROTATE_0; state->ckey.flags = I915_SET_COLORKEY_NONE; return state; @@ -134,20 +134,6 @@ static int intel_plane_atomic_check(struct drm_plane *plane, crtc_state = to_intel_crtc_state(drm_crtc_state); - /* - * The original src/dest coordinates are stored in state->base, but - * we want to keep another copy internal to our driver that we can - * clip/modify ourselves. - */ - intel_state->src.x1 = state->src_x; - intel_state->src.y1 = state->src_y; - intel_state->src.x2 = state->src_x + state->src_w; - intel_state->src.y2 = state->src_y + state->src_h; - intel_state->dst.x1 = state->crtc_x; - intel_state->dst.y1 = state->crtc_y; - intel_state->dst.x2 = state->crtc_x + state->crtc_w; - intel_state->dst.y2 = state->crtc_y + state->crtc_h; - /* Clip all planes to CRTC size, or 0x0 if CRTC is disabled */ intel_state->clip.x1 = 0; intel_state->clip.y1 = 0; @@ -157,6 +143,7 @@ static int intel_plane_atomic_check(struct drm_plane *plane, crtc_state->base.enable ? crtc_state->pipe_src_h : 0; if (state->fb && intel_rotation_90_or_270(state->rotation)) { + char *format_name; if (!(state->fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || state->fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED)) { DRM_DEBUG_KMS("Y/Yf tiling required for 90/270!\n"); @@ -171,8 +158,9 @@ static int intel_plane_atomic_check(struct drm_plane *plane, switch (state->fb->pixel_format) { case DRM_FORMAT_C8: case DRM_FORMAT_RGB565: - DRM_DEBUG_KMS("Unsupported pixel format %s for 90/270!\n", - drm_get_format_name(state->fb->pixel_format)); + format_name = drm_get_format_name(state->fb->pixel_format); + DRM_DEBUG_KMS("Unsupported pixel format %s for 90/270!\n", format_name); + kfree(format_name); return -EINVAL; default: @@ -180,7 +168,7 @@ static int intel_plane_atomic_check(struct drm_plane *plane, } } - intel_state->visible = false; + intel_state->base.visible = false; ret = intel_plane->check_plane(plane, crtc_state, intel_state); if (ret) return ret; @@ -196,7 +184,7 @@ static void intel_plane_atomic_update(struct drm_plane *plane, to_intel_plane_state(plane->state); struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc; - if (intel_state->visible) + if (intel_state->base.visible) intel_plane->update_plane(plane, to_intel_crtc_state(crtc->state), intel_state); diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 6700a7be7f78..6c70a5bfd7d8 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -51,10 +51,10 @@ * related registers. (The notable exception is the power management, not * covered here.) * - * The struct i915_audio_component is used to interact between the graphics - * and audio drivers. The struct i915_audio_component_ops *ops in it is + * The struct &i915_audio_component is used to interact between the graphics + * and audio drivers. The struct &i915_audio_component_ops @ops in it is * defined in graphics driver and called in audio driver. The - * struct i915_audio_component_audio_ops *audio_ops is called from i915 driver. + * struct &i915_audio_component_audio_ops @audio_ops is called from i915 driver. */ static const struct { @@ -359,9 +359,7 @@ static void ilk_audio_codec_disable(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - struct intel_digital_port *intel_dig_port = - enc_to_dig_port(&encoder->base); - enum port port = intel_dig_port->port; + enum port port = enc_to_dig_port(&encoder->base)->port; enum pipe pipe = intel_crtc->pipe; uint32_t tmp, eldv; i915_reg_t aud_config, aud_cntrl_st2; @@ -407,13 +405,10 @@ static void ilk_audio_codec_enable(struct drm_connector *connector, { struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - struct intel_digital_port *intel_dig_port = - enc_to_dig_port(&encoder->base); - enum port port = intel_dig_port->port; + enum port port = enc_to_dig_port(&encoder->base)->port; enum pipe pipe = intel_crtc->pipe; uint8_t *eld = connector->eld; - uint32_t eldv; - uint32_t tmp; + uint32_t tmp, eldv; int len, i; i915_reg_t hdmiw_hdmiedid, aud_config, aud_cntl_st, aud_cntrl_st2; @@ -581,25 +576,27 @@ void intel_init_audio_hooks(struct drm_i915_private *dev_priv) } } -static void i915_audio_component_get_power(struct device *dev) +static void i915_audio_component_get_power(struct device *kdev) { - intel_display_power_get(dev_to_i915(dev), POWER_DOMAIN_AUDIO); + intel_display_power_get(kdev_to_i915(kdev), POWER_DOMAIN_AUDIO); } -static void i915_audio_component_put_power(struct device *dev) +static void i915_audio_component_put_power(struct device *kdev) { - intel_display_power_put(dev_to_i915(dev), POWER_DOMAIN_AUDIO); + intel_display_power_put(kdev_to_i915(kdev), POWER_DOMAIN_AUDIO); } -static void i915_audio_component_codec_wake_override(struct device *dev, +static void i915_audio_component_codec_wake_override(struct device *kdev, bool enable) { - struct drm_i915_private *dev_priv = dev_to_i915(dev); + struct drm_i915_private *dev_priv = kdev_to_i915(kdev); u32 tmp; if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) return; + i915_audio_component_get_power(kdev); + /* * Enable/disable generating the codec wake signal, overriding the * internal logic to generate the codec wake to controller. @@ -615,12 +612,14 @@ static void i915_audio_component_codec_wake_override(struct device *dev, I915_WRITE(HSW_AUD_CHICKENBIT, tmp); usleep_range(1000, 1500); } + + i915_audio_component_put_power(kdev); } /* Get CDCLK in kHz */ -static int i915_audio_component_get_cdclk_freq(struct device *dev) +static int i915_audio_component_get_cdclk_freq(struct device *kdev) { - struct drm_i915_private *dev_priv = dev_to_i915(dev); + struct drm_i915_private *dev_priv = kdev_to_i915(kdev); if (WARN_ON_ONCE(!HAS_DDI(dev_priv))) return -ENODEV; @@ -628,10 +627,10 @@ static int i915_audio_component_get_cdclk_freq(struct device *dev) return dev_priv->cdclk_freq; } -static int i915_audio_component_sync_audio_rate(struct device *dev, +static int i915_audio_component_sync_audio_rate(struct device *kdev, int port, int rate) { - struct drm_i915_private *dev_priv = dev_to_i915(dev); + struct drm_i915_private *dev_priv = kdev_to_i915(kdev); struct intel_encoder *intel_encoder; struct intel_crtc *crtc; struct drm_display_mode *mode; @@ -648,6 +647,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev, !IS_HASWELL(dev_priv)) return 0; + i915_audio_component_get_power(kdev); mutex_lock(&dev_priv->av_mutex); /* 1. get the pipe */ intel_encoder = dev_priv->dig_port_map[port]; @@ -698,14 +698,15 @@ static int i915_audio_component_sync_audio_rate(struct device *dev, unlock: mutex_unlock(&dev_priv->av_mutex); + i915_audio_component_put_power(kdev); return err; } -static int i915_audio_component_get_eld(struct device *dev, int port, +static int i915_audio_component_get_eld(struct device *kdev, int port, bool *enabled, unsigned char *buf, int max_bytes) { - struct drm_i915_private *dev_priv = dev_to_i915(dev); + struct drm_i915_private *dev_priv = kdev_to_i915(kdev); struct intel_encoder *intel_encoder; struct intel_digital_port *intel_dig_port; const u8 *eld; @@ -739,11 +740,11 @@ static const struct i915_audio_component_ops i915_audio_component_ops = { .get_eld = i915_audio_component_get_eld, }; -static int i915_audio_component_bind(struct device *i915_dev, - struct device *hda_dev, void *data) +static int i915_audio_component_bind(struct device *i915_kdev, + struct device *hda_kdev, void *data) { struct i915_audio_component *acomp = data; - struct drm_i915_private *dev_priv = dev_to_i915(i915_dev); + struct drm_i915_private *dev_priv = kdev_to_i915(i915_kdev); int i; if (WARN_ON(acomp->ops || acomp->dev)) @@ -751,7 +752,7 @@ static int i915_audio_component_bind(struct device *i915_dev, drm_modeset_lock_all(&dev_priv->drm); acomp->ops = &i915_audio_component_ops; - acomp->dev = i915_dev; + acomp->dev = i915_kdev; BUILD_BUG_ON(MAX_PORTS != I915_MAX_PORTS); for (i = 0; i < ARRAY_SIZE(acomp->aud_sample_rate); i++) acomp->aud_sample_rate[i] = 0; @@ -761,11 +762,11 @@ static int i915_audio_component_bind(struct device *i915_dev, return 0; } -static void i915_audio_component_unbind(struct device *i915_dev, - struct device *hda_dev, void *data) +static void i915_audio_component_unbind(struct device *i915_kdev, + struct device *hda_kdev, void *data) { struct i915_audio_component *acomp = data; - struct drm_i915_private *dev_priv = dev_to_i915(i915_dev); + struct drm_i915_private *dev_priv = kdev_to_i915(i915_kdev); drm_modeset_lock_all(&dev_priv->drm); acomp->ops = NULL; diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index b074f3d6d127..495611b7068d 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -26,6 +26,40 @@ #include "i915_drv.h" +static void intel_breadcrumbs_hangcheck(unsigned long data) +{ + struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + if (!b->irq_enabled) + return; + + if (time_before(jiffies, b->timeout)) { + mod_timer(&b->hangcheck, b->timeout); + return; + } + + DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); + set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + + /* Ensure that even if the GPU hangs, we get woken up. + * + * However, note that if no one is waiting, we never notice + * a gpu hang. Eventually, we will have to wait for a resource + * held by the GPU and so trigger a hangcheck. In the most + * pathological case, this will be upon memory starvation! To + * prevent this, we also queue the hangcheck from the retire + * worker. + */ + i915_queue_hangcheck(engine->i915); +} + +static unsigned long wait_timeout(void) +{ + return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); +} + static void intel_breadcrumbs_fake_irq(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; @@ -37,10 +71,8 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) * every jiffie in order to kick the oldest waiter to do the * coherent seqno check. */ - rcu_read_lock(); if (intel_engine_wakeup(engine)) mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); - rcu_read_unlock(); } static void irq_enable(struct intel_engine_cs *engine) @@ -91,17 +123,13 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) } if (!b->irq_enabled || - test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) + test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { mod_timer(&b->fake_irq, jiffies + 1); - - /* Ensure that even if the GPU hangs, we get woken up. - * - * However, note that if no one is waiting, we never notice - * a gpu hang. Eventually, we will have to wait for a resource - * held by the GPU and so trigger a hangcheck. In the most - * pathological case, this will be upon memory starvation! - */ - i915_queue_hangcheck(i915); + } else { + /* Ensure we never sleep indefinitely */ + GEM_BUG_ON(!time_after(b->timeout, jiffies)); + mod_timer(&b->hangcheck, b->timeout); + } } static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) @@ -204,7 +232,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, } rb_link_node(&wait->node, parent, p); rb_insert_color(&wait->node, &b->waiters); - GEM_BUG_ON(!first && !b->irq_seqno_bh); + GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh)); if (completed) { struct rb_node *next = rb_next(completed); @@ -212,8 +240,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, GEM_BUG_ON(!next && !first); if (next && next != &wait->node) { GEM_BUG_ON(first); + b->timeout = wait_timeout(); b->first_wait = to_wait(next); - smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk); + rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); /* As there is a delay between reading the current * seqno, processing the completed tasks and selecting * the next waiter, we may have missed the interrupt @@ -238,8 +267,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, if (first) { GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); + b->timeout = wait_timeout(); b->first_wait = wait; - smp_store_mb(b->irq_seqno_bh, wait->tsk); + rcu_assign_pointer(b->irq_seqno_bh, wait->tsk); /* After assigning ourselves as the new bottom-half, we must * perform a cursory check to prevent a missed interrupt. * Either we miss the interrupt whilst programming the hardware, @@ -250,7 +280,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, */ __intel_breadcrumbs_enable_irq(b); } - GEM_BUG_ON(!b->irq_seqno_bh); + GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh)); GEM_BUG_ON(!b->first_wait); GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); @@ -270,11 +300,6 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, return first; } -void intel_engine_enable_fake_irq(struct intel_engine_cs *engine) -{ - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); -} - static inline bool chain_wakeup(struct rb_node *rb, int priority) { return rb && to_wait(rb)->tsk->prio <= priority; @@ -310,7 +335,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, const int priority = wakeup_priority(b, wait->tsk); struct rb_node *next; - GEM_BUG_ON(b->irq_seqno_bh != wait->tsk); + GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk); /* We are the current bottom-half. Find the next candidate, * the first waiter in the queue on the remaining oldest @@ -352,14 +377,15 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, * the interrupt, or if we have to handle an * exception rather than a seqno completion. */ + b->timeout = wait_timeout(); b->first_wait = to_wait(next); - smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk); + rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); if (b->first_wait->seqno != wait->seqno) __intel_breadcrumbs_enable_irq(b); - wake_up_process(b->irq_seqno_bh); + wake_up_process(b->first_wait->tsk); } else { b->first_wait = NULL; - WRITE_ONCE(b->irq_seqno_bh, NULL); + rcu_assign_pointer(b->irq_seqno_bh, NULL); __intel_breadcrumbs_disable_irq(b); } } else { @@ -373,7 +399,7 @@ out_unlock: GEM_BUG_ON(b->first_wait == wait); GEM_BUG_ON(rb_first(&b->waiters) != (b->first_wait ? &b->first_wait->node : NULL)); - GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters)); + GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters)); spin_unlock(&b->lock); } @@ -437,6 +463,10 @@ static int intel_breadcrumbs_signaler(void *arg) intel_engine_remove_wait(engine, &request->signaling.wait); + local_bh_disable(); + fence_signal(&request->fence); + local_bh_enable(); /* kick start the tasklets */ + /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may * have installed an even older signal than the one @@ -452,7 +482,7 @@ static int intel_breadcrumbs_signaler(void *arg) rb_erase(&request->signaling.node, &b->signals); spin_unlock(&b->lock); - i915_gem_request_unreference(request); + i915_gem_request_put(request); } else { if (kthread_should_stop()) break; @@ -472,18 +502,14 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) struct rb_node *parent, **p; bool first, wakeup; - if (unlikely(READ_ONCE(request->signaling.wait.tsk))) - return; - - spin_lock(&b->lock); - if (unlikely(request->signaling.wait.tsk)) { - wakeup = false; - goto unlock; - } + /* locked by fence_enable_sw_signaling() */ + assert_spin_locked(&request->lock); request->signaling.wait.tsk = b->signaler; - request->signaling.wait.seqno = request->seqno; - i915_gem_request_reference(request); + request->signaling.wait.seqno = request->fence.seqno; + i915_gem_request_get(request); + + spin_lock(&b->lock); /* First add ourselves into the list of waiters, but register our * bottom-half as the signaller thread. As per usual, only the oldest @@ -504,8 +530,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) p = &b->signals.rb_node; while (*p) { parent = *p; - if (i915_seqno_passed(request->seqno, - to_signaler(parent)->seqno)) { + if (i915_seqno_passed(request->fence.seqno, + to_signaler(parent)->fence.seqno)) { p = &parent->rb_right; first = false; } else { @@ -517,7 +543,6 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) if (first) smp_store_mb(b->first_signal, request); -unlock: spin_unlock(&b->lock); if (wakeup) @@ -533,6 +558,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) setup_timer(&b->fake_irq, intel_breadcrumbs_fake_irq, (unsigned long)engine); + setup_timer(&b->hangcheck, + intel_breadcrumbs_hangcheck, + (unsigned long)engine); /* Spawn a thread to provide a common bottom-half for all signals. * As this is an asynchronous interface we cannot steal the current @@ -550,6 +578,36 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) return 0; } +static void cancel_fake_irq(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + del_timer_sync(&b->hangcheck); + del_timer_sync(&b->fake_irq); + clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); +} + +void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + cancel_fake_irq(engine); + spin_lock(&b->lock); + + __intel_breadcrumbs_disable_irq(b); + if (intel_engine_has_waiter(engine)) { + b->timeout = wait_timeout(); + __intel_breadcrumbs_enable_irq(b); + if (READ_ONCE(b->irq_posted)) + wake_up_process(b->first_wait->tsk); + } else { + /* sanitize the IMR and unmask any auxiliary interrupts */ + irq_disable(engine); + } + + spin_unlock(&b->lock); +} + void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -557,7 +615,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) if (!IS_ERR_OR_NULL(b->signaler)) kthread_stop(b->signaler); - del_timer_sync(&b->fake_irq); + cancel_fake_irq(engine); } unsigned int intel_kick_waiters(struct drm_i915_private *i915) @@ -570,11 +628,9 @@ unsigned int intel_kick_waiters(struct drm_i915_private *i915) * RCU lock, i.e. as we call wake_up_process() we must be holding the * rcu_read_lock(). */ - rcu_read_lock(); for_each_engine(engine, i915) if (unlikely(intel_engine_wakeup(engine))) mask |= intel_engine_flag(engine); - rcu_read_unlock(); return mask; } diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index bc0fef3d3335..95a72771eea6 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -100,13 +100,14 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int i, pipe = intel_crtc->pipe; uint16_t coeffs[9] = { 0, }; + struct intel_crtc_state *intel_crtc_state = to_intel_crtc_state(crtc_state); if (crtc_state->ctm) { struct drm_color_ctm *ctm = (struct drm_color_ctm *)crtc_state->ctm->data; uint64_t input[9] = { 0, }; - if (intel_crtc->config->limited_color_range) { + if (intel_crtc_state->limited_color_range) { ctm_mult_by_limited(input, ctm->matrix); } else { for (i = 0; i < ARRAY_SIZE(input); i++) @@ -158,7 +159,7 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) * into consideration. */ for (i = 0; i < 3; i++) { - if (intel_crtc->config->limited_color_range) + if (intel_crtc_state->limited_color_range) coeffs[i * 3 + i] = I9XX_CSC_COEFF_LIMITED_RANGE; else @@ -182,7 +183,7 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) if (INTEL_INFO(dev)->gen > 6) { uint16_t postoff = 0; - if (intel_crtc->config->limited_color_range) + if (intel_crtc_state->limited_color_range) postoff = (16 * (1 << 12) / 255) & 0x1fff; I915_WRITE(PIPE_CSC_POSTOFF_HI(pipe), postoff); @@ -193,7 +194,7 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) } else { uint32_t mode = CSC_MODE_YUV_TO_RGB; - if (intel_crtc->config->limited_color_range) + if (intel_crtc_state->limited_color_range) mode |= CSC_BLACK_SCREEN_OFFSET; I915_WRITE(PIPE_CSC_MODE(pipe), mode); @@ -263,7 +264,8 @@ void intel_color_set_csc(struct drm_crtc_state *crtc_state) /* Loads the legacy palette/gamma unit for the CRTC. */ static void i9xx_load_luts_internal(struct drm_crtc *crtc, - struct drm_property_blob *blob) + struct drm_property_blob *blob, + struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -272,7 +274,7 @@ static void i9xx_load_luts_internal(struct drm_crtc *crtc, int i; if (HAS_GMCH_DISPLAY(dev)) { - if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DSI)) + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) assert_dsi_pll_enabled(dev_priv); else assert_pll_enabled(dev_priv, pipe); @@ -305,7 +307,8 @@ static void i9xx_load_luts_internal(struct drm_crtc *crtc, static void i9xx_load_luts(struct drm_crtc_state *crtc_state) { - i9xx_load_luts_internal(crtc_state->crtc, crtc_state->gamma_lut); + i9xx_load_luts_internal(crtc_state->crtc, crtc_state->gamma_lut, + to_intel_crtc_state(crtc_state)); } /* Loads the legacy palette/gamma unit for the CRTC on Haswell. */ @@ -323,7 +326,7 @@ static void haswell_load_luts(struct drm_crtc_state *crtc_state) * Workaround : Do not read or write the pipe palette/gamma data while * GAMMA_MODE is configured for split gamma and IPS_CTL has IPS enabled. */ - if (IS_HASWELL(dev) && intel_crtc->config->ips_enabled && + if (IS_HASWELL(dev) && intel_crtc_state->ips_enabled && (intel_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT)) { hsw_disable_ips(intel_crtc); reenable_ips = true; @@ -436,7 +439,8 @@ static void cherryview_load_luts(struct drm_crtc_state *state) /* Turn off degamma/gamma on CGM block. */ I915_WRITE(CGM_PIPE_MODE(pipe), (state->ctm ? CGM_PIPE_MODE_CSC : 0)); - i9xx_load_luts_internal(crtc, state->gamma_lut); + i9xx_load_luts_internal(crtc, state->gamma_lut, + to_intel_crtc_state(state)); return; } @@ -479,7 +483,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state) * Also program a linear LUT in the legacy block (behind the * CGM block). */ - i9xx_load_luts_internal(crtc, NULL); + i9xx_load_luts_internal(crtc, NULL, to_intel_crtc_state(state)); } void intel_color_load_luts(struct drm_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 827b6ef4e9ae..dfbcf16b41df 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -143,13 +143,15 @@ static void hsw_crt_get_config(struct intel_encoder *encoder, /* Note: The caller is required to filter out dpms modes not supported by the * platform. */ -static void intel_crt_set_dpms(struct intel_encoder *encoder, int mode) +static void intel_crt_set_dpms(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + int mode) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_encoder_to_crt(encoder); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); - const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; u32 adpa; if (INTEL_INFO(dev)->gen >= 5) @@ -193,23 +195,45 @@ static void intel_crt_set_dpms(struct intel_encoder *encoder, int mode) I915_WRITE(crt->adpa_reg, adpa); } -static void intel_disable_crt(struct intel_encoder *encoder) +static void intel_disable_crt(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { - intel_crt_set_dpms(encoder, DRM_MODE_DPMS_OFF); + intel_crt_set_dpms(encoder, old_crtc_state, DRM_MODE_DPMS_OFF); } -static void pch_disable_crt(struct intel_encoder *encoder) +static void pch_disable_crt(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { } -static void pch_post_disable_crt(struct intel_encoder *encoder) +static void pch_post_disable_crt(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { - intel_disable_crt(encoder); + intel_disable_crt(encoder, old_crtc_state, old_conn_state); } -static void intel_enable_crt(struct intel_encoder *encoder) +static void hsw_post_disable_crt(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { - intel_crt_set_dpms(encoder, DRM_MODE_DPMS_ON); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + pch_post_disable_crt(encoder, old_crtc_state, old_conn_state); + + lpt_disable_pch_transcoder(dev_priv); + lpt_disable_iclkip(dev_priv); + + intel_ddi_fdi_post_disable(encoder, old_crtc_state, old_conn_state); +} + +static void intel_enable_crt(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) +{ + intel_crt_set_dpms(encoder, pipe_config, DRM_MODE_DPMS_ON); } static enum drm_mode_status @@ -253,7 +277,8 @@ intel_crt_mode_valid(struct drm_connector *connector, } static bool intel_crt_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; @@ -894,6 +919,7 @@ void intel_crt_init(struct drm_device *dev) if (HAS_DDI(dev)) { crt->base.get_config = hsw_crt_get_config; crt->base.get_hw_state = intel_ddi_get_hw_state; + crt->base.post_disable = hsw_post_disable_crt; } else { crt->base.get_config = intel_crt_get_config; crt->base.get_hw_state = intel_crt_get_hw_state; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 3edb9580928e..1ea0e1f43397 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -32,24 +32,17 @@ * onwards to drive newly added DMC (Display microcontroller) in display * engine to save and restore the state of display engine when it enter into * low-power state and comes back to normal. - * - * Firmware loading status will be one of the below states: FW_UNINITIALIZED, - * FW_LOADED, FW_FAILED. - * - * Once the firmware is written into the registers status will be moved from - * FW_UNINITIALIZED to FW_LOADED and for any erroneous condition status will - * be moved to FW_FAILED. */ -#define I915_CSR_KBL "i915/kbl_dmc_ver1.bin" +#define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" MODULE_FIRMWARE(I915_CSR_KBL); #define KBL_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) -#define I915_CSR_SKL "i915/skl_dmc_ver1.bin" +#define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin" MODULE_FIRMWARE(I915_CSR_SKL); -#define SKL_CSR_VERSION_REQUIRED CSR_VERSION(1, 23) +#define SKL_CSR_VERSION_REQUIRED CSR_VERSION(1, 26) -#define I915_CSR_BXT "i915/bxt_dmc_ver1.bin" +#define I915_CSR_BXT "i915/bxt_dmc_ver1_07.bin" MODULE_FIRMWARE(I915_CSR_BXT); #define BXT_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index dd1d6fe12297..15d47c87def6 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -145,7 +145,7 @@ static const struct ddi_buf_trans skl_ddi_translations_dp[] = { static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { { 0x0000201B, 0x000000A2, 0x0 }, { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x0 }, + { 0x80007011, 0x000000CD, 0x1 }, { 0x80009010, 0x000000C0, 0x1 }, { 0x0000201B, 0x0000009D, 0x0 }, { 0x80005012, 0x000000C0, 0x1 }, @@ -158,7 +158,7 @@ static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = { { 0x00000018, 0x000000A2, 0x0 }, { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x0 }, + { 0x80007011, 0x000000CD, 0x3 }, { 0x80009010, 0x000000C0, 0x3 }, { 0x00000018, 0x0000009D, 0x0 }, { 0x80005012, 0x000000C0, 0x3 }, @@ -301,45 +301,34 @@ static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = { { 154, 0x9A, 1, 128, true }, /* 9: 1200 0 */ }; -static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type); - -static void ddi_get_encoder_port(struct intel_encoder *intel_encoder, - struct intel_digital_port **dig_port, - enum port *port) +enum port intel_ddi_get_encoder_port(struct intel_encoder *encoder) { - struct drm_encoder *encoder = &intel_encoder->base; - - switch (intel_encoder->type) { + switch (encoder->type) { case INTEL_OUTPUT_DP_MST: - *dig_port = enc_to_mst(encoder)->primary; - *port = (*dig_port)->port; - break; - default: - WARN(1, "Invalid DDI encoder type %d\n", intel_encoder->type); - /* fallthrough and treat as unknown */ + return enc_to_mst(&encoder->base)->primary->port; case INTEL_OUTPUT_DP: case INTEL_OUTPUT_EDP: case INTEL_OUTPUT_HDMI: case INTEL_OUTPUT_UNKNOWN: - *dig_port = enc_to_dig_port(encoder); - *port = (*dig_port)->port; - break; + return enc_to_dig_port(&encoder->base)->port; case INTEL_OUTPUT_ANALOG: - *dig_port = NULL; - *port = PORT_E; - break; + return PORT_E; + default: + MISSING_CASE(encoder->type); + return PORT_A; } } -enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder) +static const struct ddi_buf_trans * +bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { - struct intel_digital_port *dig_port; - enum port port; - - ddi_get_encoder_port(intel_encoder, &dig_port, &port); - - return port; + if (dev_priv->vbt.edp.low_vswing) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_edp); + return bdw_ddi_translations_edp; + } else { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + return bdw_ddi_translations_dp; + } } static const struct ddi_buf_trans * @@ -388,39 +377,58 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) } } +static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) +{ + int n_hdmi_entries; + int hdmi_level; + int hdmi_default_entry; + + hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; + + if (IS_BROXTON(dev_priv)) + return hdmi_level; + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + hdmi_default_entry = 8; + } else if (IS_BROADWELL(dev_priv)) { + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_default_entry = 7; + } else if (IS_HASWELL(dev_priv)) { + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + hdmi_default_entry = 6; + } else { + WARN(1, "ddi translation table missing\n"); + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_default_entry = 7; + } + + /* Choose a good default if VBT is badly populated */ + if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || + hdmi_level >= n_hdmi_entries) + hdmi_level = hdmi_default_entry; + + return hdmi_level; +} + /* * Starting with Haswell, DDI port buffers must be programmed with correct - * values in advance. The buffer values are different for FDI and DP modes, - * but the HDMI/DVI fields are shared among those. So we program the DDI - * in either FDI or DP modes only, as HDMI connections will work with both - * of those + * values in advance. This function programs the correct values for + * DP/eDP/FDI use cases. */ -void intel_prepare_ddi_buffer(struct intel_encoder *encoder) +void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int i, n_hdmi_entries, n_dp_entries, n_edp_entries, hdmi_default_entry, - size; - int hdmi_level; - enum port port; + int i, n_dp_entries, n_edp_entries, size; + enum port port = intel_ddi_get_encoder_port(encoder); const struct ddi_buf_trans *ddi_translations_fdi; const struct ddi_buf_trans *ddi_translations_dp; const struct ddi_buf_trans *ddi_translations_edp; - const struct ddi_buf_trans *ddi_translations_hdmi; const struct ddi_buf_trans *ddi_translations; - port = intel_ddi_get_encoder_port(encoder); - hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; - - if (IS_BROXTON(dev_priv)) { - if (encoder->type != INTEL_OUTPUT_HDMI) - return; - - /* Vswing programming for HDMI */ - bxt_ddi_vswing_sequence(dev_priv, hdmi_level, port, - INTEL_OUTPUT_HDMI); + if (IS_BROXTON(dev_priv)) return; - } if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { ddi_translations_fdi = NULL; @@ -428,13 +436,10 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) skl_get_buf_trans_dp(dev_priv, &n_dp_entries); ddi_translations_edp = skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - ddi_translations_hdmi = - skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); - hdmi_default_entry = 8; + /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level || - dev_priv->vbt.ddi_port_info[port].dp_boost_level) - iboost_bit = 1<<31; + if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && port != PORT_A && port != PORT_E && @@ -443,38 +448,20 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) } else if (IS_BROADWELL(dev_priv)) { ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; - - if (dev_priv->vbt.edp.low_vswing) { - ddi_translations_edp = bdw_ddi_translations_edp; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); - } else { - ddi_translations_edp = bdw_ddi_translations_dp; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - } - - ddi_translations_hdmi = bdw_ddi_translations_hdmi; - + ddi_translations_edp = bdw_get_buf_trans_edp(dev_priv, &n_edp_entries); n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; } else if (IS_HASWELL(dev_priv)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; ddi_translations_edp = hsw_ddi_translations_dp; - ddi_translations_hdmi = hsw_ddi_translations_hdmi; n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - hdmi_default_entry = 6; } else { WARN(1, "ddi translation table missing\n"); ddi_translations_edp = bdw_ddi_translations_dp; ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; - ddi_translations_hdmi = bdw_ddi_translations_hdmi; n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; } switch (encoder->type) { @@ -483,7 +470,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) size = n_edp_entries; break; case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_HDMI: ddi_translations = ddi_translations_dp; size = n_dp_entries; break; @@ -501,19 +487,48 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) I915_WRITE(DDI_BUF_TRANS_HI(port, i), ddi_translations[i].trans2); } +} - if (encoder->type != INTEL_OUTPUT_HDMI) +/* + * Starting with Haswell, DDI port buffers must be programmed with correct + * values in advance. This function programs the correct values for + * HDMI/DVI use cases. + */ +static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 iboost_bit = 0; + int n_hdmi_entries, hdmi_level; + enum port port = intel_ddi_get_encoder_port(encoder); + const struct ddi_buf_trans *ddi_translations_hdmi; + + if (IS_BROXTON(dev_priv)) return; - /* Choose a good default if VBT is badly populated */ - if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || - hdmi_level >= n_hdmi_entries) - hdmi_level = hdmi_default_entry; + hdmi_level = intel_ddi_hdmi_level(dev_priv, port); + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + + /* If we're boosting the current, set bit 31 of trans1 */ + if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; + } else if (IS_BROADWELL(dev_priv)) { + ddi_translations_hdmi = bdw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + } else if (IS_HASWELL(dev_priv)) { + ddi_translations_hdmi = hsw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + } else { + WARN(1, "ddi translation table missing\n"); + ddi_translations_hdmi = bdw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + } /* Entry 9 is for HDMI: */ - I915_WRITE(DDI_BUF_TRANS_LO(port, i), + I915_WRITE(DDI_BUF_TRANS_LO(port, 9), ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit); - I915_WRITE(DDI_BUF_TRANS_HI(port, i), + I915_WRITE(DDI_BUF_TRANS_HI(port, 9), ddi_translations_hdmi[hdmi_level].trans2); } @@ -531,6 +546,27 @@ static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, DRM_ERROR("Timeout waiting for DDI BUF %c idle bit\n", port_name(port)); } +static uint32_t hsw_pll_to_ddi_pll_sel(struct intel_shared_dpll *pll) +{ + switch (pll->id) { + case DPLL_ID_WRPLL1: + return PORT_CLK_SEL_WRPLL1; + case DPLL_ID_WRPLL2: + return PORT_CLK_SEL_WRPLL2; + case DPLL_ID_SPLL: + return PORT_CLK_SEL_SPLL; + case DPLL_ID_LCPLL_810: + return PORT_CLK_SEL_LCPLL_810; + case DPLL_ID_LCPLL_1350: + return PORT_CLK_SEL_LCPLL_1350; + case DPLL_ID_LCPLL_2700: + return PORT_CLK_SEL_LCPLL_2700; + default: + MISSING_CASE(pll->id); + return PORT_CLK_SEL_NONE; + } +} + /* Starting with Haswell, different DDI ports can work in FDI mode for * connection to the PCH-located connectors. For this, it is necessary to train * both the DDI port and PCH receiver for the desired DDI buffer settings. @@ -546,11 +582,11 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; - u32 temp, i, rx_ctl_val; + u32 temp, i, rx_ctl_val, ddi_pll_sel; for_each_encoder_on_crtc(dev, crtc, encoder) { WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG); - intel_prepare_ddi_buffer(encoder); + intel_prepare_dp_ddi_buffers(encoder); } /* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the @@ -577,8 +613,9 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); /* Configure Port Clock Select */ - I915_WRITE(PORT_CLK_SEL(PORT_E), intel_crtc->config->ddi_pll_sel); - WARN_ON(intel_crtc->config->ddi_pll_sel != PORT_CLK_SEL_SPLL); + ddi_pll_sel = hsw_pll_to_ddi_pll_sel(intel_crtc->config->shared_dpll); + I915_WRITE(PORT_CLK_SEL(PORT_E), ddi_pll_sel); + WARN_ON(ddi_pll_sel != PORT_CLK_SEL_SPLL); /* Start the training iterating through available voltages and emphasis, * testing each value twice. */ @@ -855,7 +892,7 @@ static void skl_ddi_clock_get(struct intel_encoder *encoder, int link_clock = 0; uint32_t dpll_ctl1, dpll; - dpll = pipe_config->ddi_pll_sel; + dpll = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); dpll_ctl1 = I915_READ(DPLL_CTRL1); @@ -903,7 +940,7 @@ static void hsw_ddi_clock_get(struct intel_encoder *encoder, int link_clock = 0; u32 val, pll; - val = pipe_config->ddi_pll_sel; + val = hsw_pll_to_ddi_pll_sel(pipe_config->shared_dpll); switch (val & PORT_CLK_SEL_MASK) { case PORT_CLK_SEL_LCPLL_810: link_clock = 81000; @@ -1111,7 +1148,6 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - struct drm_encoder *encoder = &intel_encoder->base; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe = intel_crtc->pipe; @@ -1177,29 +1213,15 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) temp |= TRANS_DDI_MODE_SELECT_HDMI; else temp |= TRANS_DDI_MODE_SELECT_DVI; - } else if (type == INTEL_OUTPUT_ANALOG) { temp |= TRANS_DDI_MODE_SELECT_FDI; temp |= (intel_crtc->config->fdi_lanes - 1) << 1; - } else if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - - if (intel_dp->is_mst) { - temp |= TRANS_DDI_MODE_SELECT_DP_MST; - } else - temp |= TRANS_DDI_MODE_SELECT_DP_SST; - + temp |= TRANS_DDI_MODE_SELECT_DP_SST; temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); } else if (type == INTEL_OUTPUT_DP_MST) { - struct intel_dp *intel_dp = &enc_to_mst(encoder)->primary->dp; - - if (intel_dp->is_mst) { - temp |= TRANS_DDI_MODE_SELECT_DP_MST; - } else - temp |= TRANS_DDI_MODE_SELECT_DP_SST; - + temp |= TRANS_DDI_MODE_SELECT_DP_MST; temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); } else { WARN(1, "Invalid encoder type %d for pipe %c\n", @@ -1379,14 +1401,30 @@ void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc) TRANS_CLK_SEL_DISABLED); } -static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) +static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, + enum port port, uint8_t iboost) +{ + u32 tmp; + + tmp = I915_READ(DISPIO_CR_TX_BMU_CR0); + tmp &= ~(BALANCE_LEG_MASK(port) | BALANCE_LEG_DISABLE(port)); + if (iboost) + tmp |= iboost << BALANCE_LEG_SHIFT(port); + else + tmp |= BALANCE_LEG_DISABLE(port); + I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp); +} + +static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level) { + struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + enum port port = intel_dig_port->port; + int type = encoder->type; const struct ddi_buf_trans *ddi_translations; uint8_t iboost; uint8_t dp_iboost, hdmi_iboost; int n_entries; - u32 reg; /* VBT may override standard boost values */ dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level; @@ -1428,16 +1466,10 @@ static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv, return; } - reg = I915_READ(DISPIO_CR_TX_BMU_CR0); - reg &= ~BALANCE_LEG_MASK(port); - reg &= ~(1 << (BALANCE_LEG_DISABLE_SHIFT + port)); + _skl_ddi_set_iboost(dev_priv, port, iboost); - if (iboost) - reg |= iboost << BALANCE_LEG_SHIFT(port); - else - reg |= 1 << (BALANCE_LEG_DISABLE_SHIFT + port); - - I915_WRITE(DISPIO_CR_TX_BMU_CR0, reg); + if (port == PORT_A && intel_dig_port->max_lanes == 4) + _skl_ddi_set_iboost(dev_priv, PORT_E, iboost); } static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, @@ -1568,7 +1600,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) level = translate_signal_level(signal_levels); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - skl_ddi_set_iboost(dev_priv, level, port, encoder->type); + skl_ddi_set_iboost(encoder, level); else if (IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); @@ -1576,13 +1608,15 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) } void intel_ddi_clk_select(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config) + struct intel_shared_dpll *pll) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); + if (WARN_ON(!pll)) + return; + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - uint32_t dpll = pipe_config->ddi_pll_sel; uint32_t val; /* DDI -> PLL mapping */ @@ -1590,61 +1624,91 @@ void intel_ddi_clk_select(struct intel_encoder *encoder, val &= ~(DPLL_CTRL2_DDI_CLK_OFF(port) | DPLL_CTRL2_DDI_CLK_SEL_MASK(port)); - val |= (DPLL_CTRL2_DDI_CLK_SEL(dpll, port) | + val |= (DPLL_CTRL2_DDI_CLK_SEL(pll->id, port) | DPLL_CTRL2_DDI_SEL_OVERRIDE(port)); I915_WRITE(DPLL_CTRL2, val); } else if (INTEL_INFO(dev_priv)->gen < 9) { - WARN_ON(pipe_config->ddi_pll_sel == PORT_CLK_SEL_NONE); - I915_WRITE(PORT_CLK_SEL(port), pipe_config->ddi_pll_sel); + I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll)); } } -static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) +static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, + int link_rate, uint32_t lane_count, + struct intel_shared_dpll *pll, + bool link_mst) { - struct drm_encoder *encoder = &intel_encoder->base; - struct drm_i915_private *dev_priv = to_i915(encoder->dev); - struct intel_crtc *crtc = to_intel_crtc(encoder->crtc); - enum port port = intel_ddi_get_encoder_port(intel_encoder); - int type = intel_encoder->type; - - if (type == INTEL_OUTPUT_HDMI) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); - - intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); - } - - intel_prepare_ddi_buffer(intel_encoder); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = intel_ddi_get_encoder_port(encoder); - if (type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + intel_dp_set_link_params(intel_dp, link_rate, lane_count, + link_mst); + if (encoder->type == INTEL_OUTPUT_EDP) intel_edp_panel_on(intel_dp); - } - intel_ddi_clk_select(intel_encoder, crtc->config); + intel_ddi_clk_select(encoder, pll); + intel_prepare_dp_ddi_buffers(encoder); + intel_ddi_init_dp_buf_reg(encoder); + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + intel_dp_start_link_train(intel_dp); + if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) + intel_dp_stop_link_train(intel_dp); +} - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); +static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, + bool has_hdmi_sink, + struct drm_display_mode *adjusted_mode, + struct intel_shared_dpll *pll) +{ + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct drm_encoder *drm_encoder = &encoder->base; + enum port port = intel_ddi_get_encoder_port(encoder); + int level = intel_ddi_hdmi_level(dev_priv, port); - intel_dp_set_link_params(intel_dp, crtc->config); + intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); + intel_ddi_clk_select(encoder, pll); + intel_prepare_hdmi_ddi_buffers(encoder); + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + skl_ddi_set_iboost(encoder, level); + else if (IS_BROXTON(dev_priv)) + bxt_ddi_vswing_sequence(dev_priv, level, port, + INTEL_OUTPUT_HDMI); - intel_ddi_init_dp_buf_reg(intel_encoder); + intel_hdmi->set_infoframes(drm_encoder, + has_hdmi_sink, + adjusted_mode); +} - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); - intel_dp_start_link_train(intel_dp); - if (port != PORT_A || INTEL_INFO(dev_priv)->gen >= 9) - intel_dp_stop_link_train(intel_dp); - } else if (type == INTEL_OUTPUT_HDMI) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); +static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) +{ + struct drm_encoder *encoder = &intel_encoder->base; + struct intel_crtc *crtc = to_intel_crtc(encoder->crtc); + int type = intel_encoder->type; - intel_hdmi->set_infoframes(encoder, - crtc->config->has_hdmi_sink, - &crtc->config->base.adjusted_mode); + if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { + intel_ddi_pre_enable_dp(intel_encoder, + crtc->config->port_clock, + crtc->config->lane_count, + crtc->config->shared_dpll, + intel_crtc_has_type(crtc->config, + INTEL_OUTPUT_DP_MST)); + } + if (type == INTEL_OUTPUT_HDMI) { + intel_ddi_pre_enable_hdmi(intel_encoder, + crtc->config->has_hdmi_sink, + &crtc->config->base.adjusted_mode, + crtc->config->shared_dpll); } } -static void intel_ddi_post_disable(struct intel_encoder *intel_encoder) +static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct drm_encoder *encoder = &intel_encoder->base; struct drm_device *dev = encoder->dev; @@ -1654,6 +1718,8 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder) uint32_t val; bool wait = false; + /* old_crtc_state and old_conn_state are NULL when called from DP_MST */ + val = I915_READ(DDI_BUF_CTL(port)); if (val & DDI_BUF_CTL_ENABLE) { val &= ~DDI_BUF_CTL_ENABLE; @@ -1689,7 +1755,42 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder) } } -static void intel_enable_ddi(struct intel_encoder *intel_encoder) +void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); + uint32_t val; + + /* + * Bspec lists this as both step 13 (before DDI_BUF_CTL disable) + * and step 18 (after clearing PORT_CLK_SEL). Based on a BUN, + * step 13 is the correct place for it. Step 18 is where it was + * originally before the BUN. + */ + val = I915_READ(FDI_RX_CTL(PIPE_A)); + val &= ~FDI_RX_ENABLE; + I915_WRITE(FDI_RX_CTL(PIPE_A), val); + + intel_ddi_post_disable(intel_encoder, old_crtc_state, old_conn_state); + + val = I915_READ(FDI_RX_MISC(PIPE_A)); + val &= ~(FDI_RX_PWRDN_LANE1_MASK | FDI_RX_PWRDN_LANE0_MASK); + val |= FDI_RX_PWRDN_LANE1_VAL(2) | FDI_RX_PWRDN_LANE0_VAL(2); + I915_WRITE(FDI_RX_MISC(PIPE_A), val); + + val = I915_READ(FDI_RX_CTL(PIPE_A)); + val &= ~FDI_PCDCLK; + I915_WRITE(FDI_RX_CTL(PIPE_A), val); + + val = I915_READ(FDI_RX_CTL(PIPE_A)); + val &= ~FDI_RX_PLL_ENABLE; + I915_WRITE(FDI_RX_CTL(PIPE_A), val); +} + +static void intel_enable_ddi(struct intel_encoder *intel_encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_encoder *encoder = &intel_encoder->base; struct drm_crtc *crtc = encoder->crtc; @@ -1718,7 +1819,7 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder) intel_edp_backlight_on(intel_dp); intel_psr_enable(intel_dp); - intel_edp_drrs_enable(intel_dp); + intel_edp_drrs_enable(intel_dp, pipe_config); } if (intel_crtc->config->has_audio) { @@ -1727,7 +1828,9 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder) } } -static void intel_disable_ddi(struct intel_encoder *intel_encoder) +static void intel_disable_ddi(struct intel_encoder *intel_encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct drm_encoder *encoder = &intel_encoder->base; struct drm_crtc *crtc = encoder->crtc; @@ -1744,7 +1847,7 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder) if (type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - intel_edp_drrs_disable(intel_dp); + intel_edp_drrs_disable(intel_dp, old_crtc_state); intel_psr_disable(intel_dp); intel_edp_backlight_off(intel_dp); } @@ -2033,7 +2136,9 @@ bxt_ddi_phy_calc_lane_lat_optim_mask(struct intel_encoder *encoder, } } -static void bxt_ddi_pre_pll_enable(struct intel_encoder *encoder) +static void bxt_ddi_pre_pll_enable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); @@ -2105,7 +2210,7 @@ void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) val = DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_SCRAMBLE_DISABLE; - if (intel_dp->is_mst) + if (intel_dp->link_mst) val |= DP_TP_CTL_MODE_MST; else { val |= DP_TP_CTL_MODE_SST; @@ -2122,38 +2227,6 @@ void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) udelay(600); } -void intel_ddi_fdi_disable(struct drm_crtc *crtc) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - uint32_t val; - - /* - * Bspec lists this as both step 13 (before DDI_BUF_CTL disable) - * and step 18 (after clearing PORT_CLK_SEL). Based on a BUN, - * step 13 is the correct place for it. Step 18 is where it was - * originally before the BUN. - */ - val = I915_READ(FDI_RX_CTL(PIPE_A)); - val &= ~FDI_RX_ENABLE; - I915_WRITE(FDI_RX_CTL(PIPE_A), val); - - intel_ddi_post_disable(intel_encoder); - - val = I915_READ(FDI_RX_MISC(PIPE_A)); - val &= ~(FDI_RX_PWRDN_LANE1_MASK | FDI_RX_PWRDN_LANE0_MASK); - val |= FDI_RX_PWRDN_LANE1_VAL(2) | FDI_RX_PWRDN_LANE0_VAL(2); - I915_WRITE(FDI_RX_MISC(PIPE_A), val); - - val = I915_READ(FDI_RX_CTL(PIPE_A)); - val &= ~FDI_PCDCLK; - I915_WRITE(FDI_RX_CTL(PIPE_A), val); - - val = I915_READ(FDI_RX_CTL(PIPE_A)); - val &= ~FDI_RX_PLL_ENABLE; - I915_WRITE(FDI_RX_CTL(PIPE_A), val); -} - void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { @@ -2253,7 +2326,8 @@ void intel_ddi_get_config(struct intel_encoder *encoder, } static bool intel_ddi_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int type = encoder->type; @@ -2266,9 +2340,9 @@ static bool intel_ddi_compute_config(struct intel_encoder *encoder, pipe_config->cpu_transcoder = TRANSCODER_EDP; if (type == INTEL_OUTPUT_HDMI) - ret = intel_hdmi_compute_config(encoder, pipe_config); + ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state); else - ret = intel_dp_compute_config(encoder, pipe_config); + ret = intel_dp_compute_config(encoder, pipe_config, conn_state); if (IS_BROXTON(dev_priv) && ret) pipe_config->lane_lat_optim_mask = @@ -2319,6 +2393,45 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) return connector; } +struct intel_shared_dpll * +intel_ddi_get_link_dpll(struct intel_dp *intel_dp, int clock) +{ + struct intel_connector *connector = intel_dp->attached_connector; + struct intel_encoder *encoder = connector->encoder; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct intel_shared_dpll *pll = NULL; + struct intel_shared_dpll_config tmp_pll_config; + enum intel_dpll_id dpll_id; + + if (IS_BROXTON(dev_priv)) { + dpll_id = (enum intel_dpll_id)dig_port->port; + /* + * Select the required PLL. This works for platforms where + * there is no shared DPLL. + */ + pll = &dev_priv->shared_dplls[dpll_id]; + if (WARN_ON(pll->active_mask)) { + + DRM_ERROR("Shared DPLL in use. active_mask:%x\n", + pll->active_mask); + return NULL; + } + tmp_pll_config = pll->config; + if (!bxt_ddi_dp_set_dpll_hw_state(clock, + &pll->config.hw_state)) { + DRM_ERROR("Could not setup DPLL\n"); + pll->config = tmp_pll_config; + return NULL; + } + } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + pll = skl_find_link_pll(dev_priv, clock); + } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { + pll = hsw_ddi_dp_get_dpll(encoder, clock); + } + return pll; +} + void intel_ddi_init(struct drm_device *dev, enum port port) { struct drm_i915_private *dev_priv = to_i915(dev); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index cba137f9ad3e..73b6858600ac 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -46,71 +46,70 @@ void intel_device_info_dump(struct drm_i915_private *dev_priv) static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) { - struct intel_device_info *info = mkwrite_device_info(dev_priv); + struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; u32 fuse, eu_dis; fuse = I915_READ(CHV_FUSE_GT); - info->slice_total = 1; + sseu->slice_mask = BIT(0); if (!(fuse & CHV_FGT_DISABLE_SS0)) { - info->subslice_per_slice++; + sseu->subslice_mask |= BIT(0); eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK | CHV_FGT_EU_DIS_SS0_R1_MASK); - info->eu_total += 8 - hweight32(eu_dis); + sseu->eu_total += 8 - hweight32(eu_dis); } if (!(fuse & CHV_FGT_DISABLE_SS1)) { - info->subslice_per_slice++; + sseu->subslice_mask |= BIT(1); eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK | CHV_FGT_EU_DIS_SS1_R1_MASK); - info->eu_total += 8 - hweight32(eu_dis); + sseu->eu_total += 8 - hweight32(eu_dis); } - info->subslice_total = info->subslice_per_slice; /* * CHV expected to always have a uniform distribution of EU * across subslices. */ - info->eu_per_subslice = info->subslice_total ? - info->eu_total / info->subslice_total : + sseu->eu_per_subslice = sseu_subslice_total(sseu) ? + sseu->eu_total / sseu_subslice_total(sseu) : 0; /* * CHV supports subslice power gating on devices with more than * one subslice, and supports EU power gating on devices with * more than one EU pair per subslice. */ - info->has_slice_pg = 0; - info->has_subslice_pg = (info->subslice_total > 1); - info->has_eu_pg = (info->eu_per_subslice > 2); + sseu->has_slice_pg = 0; + sseu->has_subslice_pg = sseu_subslice_total(sseu) > 1; + sseu->has_eu_pg = (sseu->eu_per_subslice > 2); } static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) { struct intel_device_info *info = mkwrite_device_info(dev_priv); + struct sseu_dev_info *sseu = &info->sseu; int s_max = 3, ss_max = 4, eu_max = 8; int s, ss; - u32 fuse2, s_enable, ss_disable, eu_disable; + u32 fuse2, eu_disable; u8 eu_mask = 0xff; fuse2 = I915_READ(GEN8_FUSE2); - s_enable = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; - ss_disable = (fuse2 & GEN9_F2_SS_DIS_MASK) >> GEN9_F2_SS_DIS_SHIFT; + sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; - info->slice_total = hweight32(s_enable); /* * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - info->subslice_per_slice = ss_max - hweight32(ss_disable); - info->subslice_total = info->slice_total * info->subslice_per_slice; + sseu->subslice_mask = (1 << ss_max) - 1; + sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> + GEN9_F2_SS_DIS_SHIFT); /* * Iterate through enabled slices and subslices to * count the total enabled EU. */ for (s = 0; s < s_max; s++) { - if (!(s_enable & BIT(s))) + if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue; @@ -118,7 +117,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) for (ss = 0; ss < ss_max; ss++) { int eu_per_ss; - if (ss_disable & BIT(ss)) + if (!(sseu->subslice_mask & BIT(ss))) /* skip disabled subslice */ continue; @@ -131,9 +130,9 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) * subslices if they are unbalanced. */ if (eu_per_ss == 7) - info->subslice_7eu[s] |= BIT(ss); + sseu->subslice_7eu[s] |= BIT(ss); - info->eu_total += eu_per_ss; + sseu->eu_total += eu_per_ss; } } @@ -144,9 +143,9 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) * recovery. BXT is expected to be perfectly uniform in EU * distribution. */ - info->eu_per_subslice = info->subslice_total ? - DIV_ROUND_UP(info->eu_total, - info->subslice_total) : 0; + sseu->eu_per_subslice = sseu_subslice_total(sseu) ? + DIV_ROUND_UP(sseu->eu_total, + sseu_subslice_total(sseu)) : 0; /* * SKL supports slice power gating on devices with more than * one slice, and supports EU power gating on devices with @@ -155,15 +154,15 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) * supports EU power gating on devices with more than one EU * pair per subslice. */ - info->has_slice_pg = + sseu->has_slice_pg = (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && - info->slice_total > 1; - info->has_subslice_pg = - IS_BROXTON(dev_priv) && info->subslice_total > 1; - info->has_eu_pg = info->eu_per_subslice > 2; + hweight8(sseu->slice_mask) > 1; + sseu->has_subslice_pg = + IS_BROXTON(dev_priv) && sseu_subslice_total(sseu) > 1; + sseu->has_eu_pg = sseu->eu_per_subslice > 2; if (IS_BROXTON(dev_priv)) { -#define IS_SS_DISABLED(_ss_disable, ss) (_ss_disable & BIT(ss)) +#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) /* * There is a HW issue in 2x6 fused down parts that requires * Pooled EU to be enabled as a WA. The pool configuration @@ -171,19 +170,18 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) * doesn't affect if the device has all 3 subslices enabled. */ /* WaEnablePooledEuFor2x6:bxt */ - info->has_pooled_eu = ((info->subslice_per_slice == 3) || - (info->subslice_per_slice == 2 && + info->has_pooled_eu = ((hweight8(sseu->subslice_mask) == 3) || + (hweight8(sseu->subslice_mask) == 2 && INTEL_REVID(dev_priv) < BXT_REVID_C0)); - info->min_eu_in_pool = 0; + sseu->min_eu_in_pool = 0; if (info->has_pooled_eu) { - if (IS_SS_DISABLED(ss_disable, 0) || - IS_SS_DISABLED(ss_disable, 2)) - info->min_eu_in_pool = 3; - else if (IS_SS_DISABLED(ss_disable, 1)) - info->min_eu_in_pool = 6; + if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) + sseu->min_eu_in_pool = 3; + else if (IS_SS_DISABLED(1)) + sseu->min_eu_in_pool = 6; else - info->min_eu_in_pool = 9; + sseu->min_eu_in_pool = 9; } #undef IS_SS_DISABLED } @@ -191,14 +189,20 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) { - struct intel_device_info *info = mkwrite_device_info(dev_priv); + struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; const int s_max = 3, ss_max = 3, eu_max = 8; int s, ss; - u32 fuse2, eu_disable[s_max], s_enable, ss_disable; + u32 fuse2, eu_disable[s_max]; fuse2 = I915_READ(GEN8_FUSE2); - s_enable = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; - ss_disable = (fuse2 & GEN8_F2_SS_DIS_MASK) >> GEN8_F2_SS_DIS_SHIFT; + sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + /* + * The subslice disable field is global, i.e. it applies + * to each of the enabled slices. + */ + sseu->subslice_mask = BIT(ss_max) - 1; + sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> + GEN8_F2_SS_DIS_SHIFT); eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK; eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) | @@ -208,28 +212,19 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) ((I915_READ(GEN8_EU_DISABLE2) & GEN8_EU_DIS2_S2_MASK) << (32 - GEN8_EU_DIS1_S2_SHIFT)); - info->slice_total = hweight32(s_enable); - - /* - * The subslice disable field is global, i.e. it applies - * to each of the enabled slices. - */ - info->subslice_per_slice = ss_max - hweight32(ss_disable); - info->subslice_total = info->slice_total * info->subslice_per_slice; - /* * Iterate through enabled slices and subslices to * count the total enabled EU. */ for (s = 0; s < s_max; s++) { - if (!(s_enable & (0x1 << s))) + if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue; for (ss = 0; ss < ss_max; ss++) { u32 n_disabled; - if (ss_disable & (0x1 << ss)) + if (!(sseu->subslice_mask & BIT(ss))) /* skip disabled subslice */ continue; @@ -239,9 +234,9 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * Record which subslices have 7 EUs. */ if (eu_max - n_disabled == 7) - info->subslice_7eu[s] |= 1 << ss; + sseu->subslice_7eu[s] |= 1 << ss; - info->eu_total += eu_max - n_disabled; + sseu->eu_total += eu_max - n_disabled; } } @@ -250,16 +245,17 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * subslices with the exception that any one EU in any one subslice may * be fused off for die recovery. */ - info->eu_per_subslice = info->subslice_total ? - DIV_ROUND_UP(info->eu_total, info->subslice_total) : 0; + sseu->eu_per_subslice = sseu_subslice_total(sseu) ? + DIV_ROUND_UP(sseu->eu_total, + sseu_subslice_total(sseu)) : 0; /* * BDW supports slice power gating on devices with more than * one slice. */ - info->has_slice_pg = (info->slice_total > 1); - info->has_subslice_pg = 0; - info->has_eu_pg = 0; + sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1; + sseu->has_subslice_pg = 0; + sseu->has_eu_pg = 0; } /* @@ -374,15 +370,19 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) info->has_snoop = false; - DRM_DEBUG_DRIVER("slice total: %u\n", info->slice_total); - DRM_DEBUG_DRIVER("subslice total: %u\n", info->subslice_total); - DRM_DEBUG_DRIVER("subslice per slice: %u\n", info->subslice_per_slice); - DRM_DEBUG_DRIVER("EU total: %u\n", info->eu_total); - DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->eu_per_subslice); + DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask); + DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask)); + DRM_DEBUG_DRIVER("subslice total: %u\n", + sseu_subslice_total(&info->sseu)); + DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask); + DRM_DEBUG_DRIVER("subslice per slice: %u\n", + hweight8(info->sseu.subslice_mask)); + DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total); + DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->sseu.eu_per_subslice); DRM_DEBUG_DRIVER("has slice power gating: %s\n", - info->has_slice_pg ? "y" : "n"); + info->sseu.has_slice_pg ? "y" : "n"); DRM_DEBUG_DRIVER("has subslice power gating: %s\n", - info->has_subslice_pg ? "y" : "n"); + info->sseu.has_subslice_pg ? "y" : "n"); DRM_DEBUG_DRIVER("has EU power gating: %s\n", - info->has_eu_pg ? "y" : "n"); + info->sseu.has_eu_pg ? "y" : "n"); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c457eed76f1f..fbcfed63a76e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -34,6 +34,7 @@ #include <drm/drm_edid.h> #include <drm/drmP.h> #include "intel_drv.h" +#include "intel_frontbuffer.h" #include <drm/i915_drm.h> #include "i915_drv.h" #include "i915_gem_dmabuf.h" @@ -1201,8 +1202,8 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, if (HAS_PCH_SPLIT(dev)) { u32 port_sel; - pp_reg = PCH_PP_CONTROL; - port_sel = I915_READ(PCH_PP_ON_DELAYS) & PANEL_PORT_SELECT_MASK; + pp_reg = PP_CONTROL(0); + port_sel = I915_READ(PP_ON_DELAYS(0)) & PANEL_PORT_SELECT_MASK; if (port_sel == PANEL_PORT_SELECT_LVDS && I915_READ(PCH_LVDS) & LVDS_PIPEB_SELECT) @@ -1210,10 +1211,10 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, /* XXX: else fix for eDP */ } else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { /* presumably write lock depends on pipe, not port select */ - pp_reg = VLV_PIPE_PP_CONTROL(pipe); + pp_reg = PP_CONTROL(pipe); panel_pipe = pipe; } else { - pp_reg = PP_CONTROL; + pp_reg = PP_CONTROL(0); if (I915_READ(LVDS) & LVDS_PIPEB_SELECT) panel_pipe = PIPE_B; } @@ -1906,7 +1907,7 @@ static void ironlake_disable_pch_transcoder(struct drm_i915_private *dev_priv, } } -static void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv) +void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv) { u32 val; @@ -1958,12 +1959,12 @@ static void intel_enable_pipe(struct intel_crtc *crtc) * a plane. On ILK+ the pipe PLLs are integrated, so we don't * need the check. */ - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH_DISPLAY(dev_priv)) { if (intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI)) assert_dsi_pll_enabled(dev_priv); else assert_pll_enabled(dev_priv, pipe); - else { + } else { if (crtc->config->has_pch_encoder) { /* if driving the PCH, we need FDI enabled */ assert_fdi_rx_pll_enabled(dev_priv, pch_transcoder); @@ -2146,33 +2147,6 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view, } } -static void -intel_fill_fb_info(struct drm_i915_private *dev_priv, - struct drm_framebuffer *fb) -{ - struct intel_rotation_info *info = &to_intel_framebuffer(fb)->rot_info; - unsigned int tile_size, tile_width, tile_height, cpp; - - tile_size = intel_tile_size(dev_priv); - - cpp = drm_format_plane_cpp(fb->pixel_format, 0); - intel_tile_dims(dev_priv, &tile_width, &tile_height, - fb->modifier[0], cpp); - - info->plane[0].width = DIV_ROUND_UP(fb->pitches[0], tile_width * cpp); - info->plane[0].height = DIV_ROUND_UP(fb->height, tile_height); - - if (info->pixel_format == DRM_FORMAT_NV12) { - cpp = drm_format_plane_cpp(fb->pixel_format, 1); - intel_tile_dims(dev_priv, &tile_width, &tile_height, - fb->modifier[1], cpp); - - info->uv_offset = fb->offsets[1]; - info->plane[1].width = DIV_ROUND_UP(fb->pitches[1], tile_width * cpp); - info->plane[1].height = DIV_ROUND_UP(fb->height / 2, tile_height); - } -} - static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv) { if (INTEL_INFO(dev_priv)->gen >= 9) @@ -2205,16 +2179,15 @@ static unsigned int intel_surf_alignment(const struct drm_i915_private *dev_priv } } -int -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, - unsigned int rotation) +struct i915_vma * +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) { struct drm_device *dev = fb->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct i915_ggtt_view view; + struct i915_vma *vma; u32 alignment; - int ret; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -2239,75 +2212,112 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, */ intel_runtime_pm_get(dev_priv); - ret = i915_gem_object_pin_to_display_plane(obj, alignment, - &view); - if (ret) - goto err_pm; - - /* Install a fence for tiled scan-out. Pre-i965 always needs a - * fence, whereas 965+ only requires a fence if using - * framebuffer compression. For simplicity, we always install - * a fence as the cost is not that onerous. - */ - if (view.type == I915_GGTT_VIEW_NORMAL) { - ret = i915_gem_object_get_fence(obj); - if (ret == -EDEADLK) { - /* - * -EDEADLK means there are no free fences - * no pending flips. - * - * This is propagated to atomic, but it uses - * -EDEADLK to force a locking recovery, so - * change the returned error to -EBUSY. - */ - ret = -EBUSY; - goto err_unpin; - } else if (ret) - goto err_unpin; + vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view); + if (IS_ERR(vma)) + goto err; - i915_gem_object_pin_fence(obj); + if (i915_vma_is_map_and_fenceable(vma)) { + /* Install a fence for tiled scan-out. Pre-i965 always needs a + * fence, whereas 965+ only requires a fence if using + * framebuffer compression. For simplicity, we always, when + * possible, install a fence as the cost is not that onerous. + * + * If we fail to fence the tiled scanout, then either the + * modeset will reject the change (which is highly unlikely as + * the affected systems, all but one, do not have unmappable + * space) or we will not be able to enable full powersaving + * techniques (also likely not to apply due to various limits + * FBC and the like impose on the size of the buffer, which + * presumably we violated anyway with this unmappable buffer). + * Anyway, it is presumably better to stumble onwards with + * something and try to run the system in a "less than optimal" + * mode that matches the user configuration. + */ + if (i915_vma_get_fence(vma) == 0) + i915_vma_pin_fence(vma); } +err: intel_runtime_pm_put(dev_priv); - return 0; - -err_unpin: - i915_gem_object_unpin_from_display_plane(obj, &view); -err_pm: - intel_runtime_pm_put(dev_priv); - return ret; + return vma; } void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) { struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct i915_ggtt_view view; + struct i915_vma *vma; WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex)); intel_fill_fb_ggtt_view(&view, fb, rotation); + vma = i915_gem_object_to_ggtt(obj, &view); - if (view.type == I915_GGTT_VIEW_NORMAL) - i915_gem_object_unpin_fence(obj); + i915_vma_unpin_fence(vma); + i915_gem_object_unpin_from_display_plane(vma); +} - i915_gem_object_unpin_from_display_plane(obj, &view); +static int intel_fb_pitch(const struct drm_framebuffer *fb, int plane, + unsigned int rotation) +{ + if (intel_rotation_90_or_270(rotation)) + return to_intel_framebuffer(fb)->rotated[plane].pitch; + else + return fb->pitches[plane]; +} + +/* + * Convert the x/y offsets into a linear offset. + * Only valid with 0/180 degree rotation, which is fine since linear + * offset is only used with linear buffers on pre-hsw and tiled buffers + * with gen2/3, and 90/270 degree rotations isn't supported on any of them. + */ +u32 intel_fb_xy_to_linear(int x, int y, + const struct intel_plane_state *state, + int plane) +{ + const struct drm_framebuffer *fb = state->base.fb; + unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane); + unsigned int pitch = fb->pitches[plane]; + + return y * pitch + x * cpp; +} + +/* + * Add the x/y offsets derived from fb->offsets[] to the user + * specified plane src x/y offsets. The resulting x/y offsets + * specify the start of scanout from the beginning of the gtt mapping. + */ +void intel_add_fb_offsets(int *x, int *y, + const struct intel_plane_state *state, + int plane) + +{ + const struct intel_framebuffer *intel_fb = to_intel_framebuffer(state->base.fb); + unsigned int rotation = state->base.rotation; + + if (intel_rotation_90_or_270(rotation)) { + *x += intel_fb->rotated[plane].x; + *y += intel_fb->rotated[plane].y; + } else { + *x += intel_fb->normal[plane].x; + *y += intel_fb->normal[plane].y; + } } /* - * Adjust the tile offset by moving the difference into - * the x/y offsets. - * * Input tile dimensions and pitch must already be * rotated to match x and y, and in pixel units. */ -static u32 intel_adjust_tile_offset(int *x, int *y, - unsigned int tile_width, - unsigned int tile_height, - unsigned int tile_size, - unsigned int pitch_tiles, - u32 old_offset, - u32 new_offset) -{ +static u32 _intel_adjust_tile_offset(int *x, int *y, + unsigned int tile_width, + unsigned int tile_height, + unsigned int tile_size, + unsigned int pitch_tiles, + u32 old_offset, + u32 new_offset) +{ + unsigned int pitch_pixels = pitch_tiles * tile_width; unsigned int tiles; WARN_ON(old_offset & (tile_size - 1)); @@ -2319,6 +2329,54 @@ static u32 intel_adjust_tile_offset(int *x, int *y, *y += tiles / pitch_tiles * tile_height; *x += tiles % pitch_tiles * tile_width; + /* minimize x in case it got needlessly big */ + *y += *x / pitch_pixels * tile_height; + *x %= pitch_pixels; + + return new_offset; +} + +/* + * Adjust the tile offset by moving the difference into + * the x/y offsets. + */ +static u32 intel_adjust_tile_offset(int *x, int *y, + const struct intel_plane_state *state, int plane, + u32 old_offset, u32 new_offset) +{ + const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev); + const struct drm_framebuffer *fb = state->base.fb; + unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane); + unsigned int rotation = state->base.rotation; + unsigned int pitch = intel_fb_pitch(fb, plane, rotation); + + WARN_ON(new_offset > old_offset); + + if (fb->modifier[plane] != DRM_FORMAT_MOD_NONE) { + unsigned int tile_size, tile_width, tile_height; + unsigned int pitch_tiles; + + tile_size = intel_tile_size(dev_priv); + intel_tile_dims(dev_priv, &tile_width, &tile_height, + fb->modifier[plane], cpp); + + if (intel_rotation_90_or_270(rotation)) { + pitch_tiles = pitch / tile_height; + swap(tile_width, tile_height); + } else { + pitch_tiles = pitch / (tile_width * cpp); + } + + _intel_adjust_tile_offset(x, y, tile_width, tile_height, + tile_size, pitch_tiles, + old_offset, new_offset); + } else { + old_offset += *y * pitch + *x * cpp; + + *y = (old_offset - new_offset) / pitch; + *x = ((old_offset - new_offset) - *y * pitch) / cpp; + } + return new_offset; } @@ -2329,18 +2387,24 @@ static u32 intel_adjust_tile_offset(int *x, int *y, * In the 90/270 rotated case, x and y are assumed * to be already rotated to match the rotated GTT view, and * pitch is the tile_height aligned framebuffer height. + * + * This function is used when computing the derived information + * under intel_framebuffer, so using any of that information + * here is not allowed. Anything under drm_framebuffer can be + * used. This is why the user has to pass in the pitch since it + * is specified in the rotated orientation. */ -u32 intel_compute_tile_offset(int *x, int *y, - const struct drm_framebuffer *fb, int plane, - unsigned int pitch, - unsigned int rotation) +static u32 _intel_compute_tile_offset(const struct drm_i915_private *dev_priv, + int *x, int *y, + const struct drm_framebuffer *fb, int plane, + unsigned int pitch, + unsigned int rotation, + u32 alignment) { - const struct drm_i915_private *dev_priv = to_i915(fb->dev); uint64_t fb_modifier = fb->modifier[plane]; unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane); - u32 offset, offset_aligned, alignment; + u32 offset, offset_aligned; - alignment = intel_surf_alignment(dev_priv, fb_modifier); if (alignment) alignment--; @@ -2368,9 +2432,9 @@ u32 intel_compute_tile_offset(int *x, int *y, offset = (tile_rows * pitch_tiles + tiles) * tile_size; offset_aligned = offset & ~alignment; - intel_adjust_tile_offset(x, y, tile_width, tile_height, - tile_size, pitch_tiles, - offset, offset_aligned); + _intel_adjust_tile_offset(x, y, tile_width, tile_height, + tile_size, pitch_tiles, + offset, offset_aligned); } else { offset = *y * pitch + *x * cpp; offset_aligned = offset & ~alignment; @@ -2382,6 +2446,177 @@ u32 intel_compute_tile_offset(int *x, int *y, return offset_aligned; } +u32 intel_compute_tile_offset(int *x, int *y, + const struct intel_plane_state *state, + int plane) +{ + const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev); + const struct drm_framebuffer *fb = state->base.fb; + unsigned int rotation = state->base.rotation; + int pitch = intel_fb_pitch(fb, plane, rotation); + u32 alignment; + + /* AUX_DIST needs only 4K alignment */ + if (fb->pixel_format == DRM_FORMAT_NV12 && plane == 1) + alignment = 4096; + else + alignment = intel_surf_alignment(dev_priv, fb->modifier[plane]); + + return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch, + rotation, alignment); +} + +/* Convert the fb->offset[] linear offset into x/y offsets */ +static void intel_fb_offset_to_xy(int *x, int *y, + const struct drm_framebuffer *fb, int plane) +{ + unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane); + unsigned int pitch = fb->pitches[plane]; + u32 linear_offset = fb->offsets[plane]; + + *y = linear_offset / pitch; + *x = linear_offset % pitch / cpp; +} + +static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier) +{ + switch (fb_modifier) { + case I915_FORMAT_MOD_X_TILED: + return I915_TILING_X; + case I915_FORMAT_MOD_Y_TILED: + return I915_TILING_Y; + default: + return I915_TILING_NONE; + } +} + +static int +intel_fill_fb_info(struct drm_i915_private *dev_priv, + struct drm_framebuffer *fb) +{ + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + struct intel_rotation_info *rot_info = &intel_fb->rot_info; + u32 gtt_offset_rotated = 0; + unsigned int max_size = 0; + uint32_t format = fb->pixel_format; + int i, num_planes = drm_format_num_planes(format); + unsigned int tile_size = intel_tile_size(dev_priv); + + for (i = 0; i < num_planes; i++) { + unsigned int width, height; + unsigned int cpp, size; + u32 offset; + int x, y; + + cpp = drm_format_plane_cpp(format, i); + width = drm_format_plane_width(fb->width, format, i); + height = drm_format_plane_height(fb->height, format, i); + + intel_fb_offset_to_xy(&x, &y, fb, i); + + /* + * The fence (if used) is aligned to the start of the object + * so having the framebuffer wrap around across the edge of the + * fenced region doesn't really work. We have no API to configure + * the fence start offset within the object (nor could we probably + * on gen2/3). So it's just easier if we just require that the + * fb layout agrees with the fence layout. We already check that the + * fb stride matches the fence stride elsewhere. + */ + if (i915_gem_object_is_tiled(intel_fb->obj) && + (x + width) * cpp > fb->pitches[i]) { + DRM_DEBUG("bad fb plane %d offset: 0x%x\n", + i, fb->offsets[i]); + return -EINVAL; + } + + /* + * First pixel of the framebuffer from + * the start of the normal gtt mapping. + */ + intel_fb->normal[i].x = x; + intel_fb->normal[i].y = y; + + offset = _intel_compute_tile_offset(dev_priv, &x, &y, + fb, 0, fb->pitches[i], + DRM_ROTATE_0, tile_size); + offset /= tile_size; + + if (fb->modifier[i] != DRM_FORMAT_MOD_NONE) { + unsigned int tile_width, tile_height; + unsigned int pitch_tiles; + struct drm_rect r; + + intel_tile_dims(dev_priv, &tile_width, &tile_height, + fb->modifier[i], cpp); + + rot_info->plane[i].offset = offset; + rot_info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i], tile_width * cpp); + rot_info->plane[i].width = DIV_ROUND_UP(x + width, tile_width); + rot_info->plane[i].height = DIV_ROUND_UP(y + height, tile_height); + + intel_fb->rotated[i].pitch = + rot_info->plane[i].height * tile_height; + + /* how many tiles does this plane need */ + size = rot_info->plane[i].stride * rot_info->plane[i].height; + /* + * If the plane isn't horizontally tile aligned, + * we need one more tile. + */ + if (x != 0) + size++; + + /* rotate the x/y offsets to match the GTT view */ + r.x1 = x; + r.y1 = y; + r.x2 = x + width; + r.y2 = y + height; + drm_rect_rotate(&r, + rot_info->plane[i].width * tile_width, + rot_info->plane[i].height * tile_height, + DRM_ROTATE_270); + x = r.x1; + y = r.y1; + + /* rotate the tile dimensions to match the GTT view */ + pitch_tiles = intel_fb->rotated[i].pitch / tile_height; + swap(tile_width, tile_height); + + /* + * We only keep the x/y offsets, so push all of the + * gtt offset into the x/y offsets. + */ + _intel_adjust_tile_offset(&x, &y, tile_size, + tile_width, tile_height, pitch_tiles, + gtt_offset_rotated * tile_size, 0); + + gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height; + + /* + * First pixel of the framebuffer from + * the start of the rotated gtt mapping. + */ + intel_fb->rotated[i].x = x; + intel_fb->rotated[i].y = y; + } else { + size = DIV_ROUND_UP((y + height) * fb->pitches[i] + + x * cpp, tile_size); + } + + /* how many tiles in total needed in the bo */ + max_size = max(max_size, offset + size); + } + + if (max_size * tile_size > to_intel_framebuffer(fb)->obj->base.size) { + DRM_DEBUG("fb too big for bo (need %u bytes, have %zu bytes)\n", + max_size * tile_size, to_intel_framebuffer(fb)->obj->base.size); + return -EINVAL; + } + + return 0; +} + static int i9xx_format_to_fourcc(int format) { switch (format) { @@ -2465,9 +2700,8 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; } - obj->tiling_mode = plane_config->tiling; - if (obj->tiling_mode == I915_TILING_X) - obj->stride = fb->pitches[0]; + if (plane_config->tiling == I915_TILING_X) + obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X; mode_cmd.pixel_format = fb->pixel_format; mode_cmd.width = fb->width; @@ -2488,7 +2722,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return true; out_unref_obj: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); return false; } @@ -2552,7 +2786,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, continue; obj = intel_fb_obj(fb); - if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) { + if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) { drm_framebuffer_reference(fb); goto valid_fb; } @@ -2565,7 +2799,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - to_intel_plane_state(plane_state)->visible = false; + to_intel_plane_state(plane_state)->base.visible = false; crtc_state->plane_mask &= ~(1 << drm_plane_index(primary)); intel_pre_disable_primary_noatomic(&intel_crtc->base); intel_plane->disable_plane(primary, &intel_crtc->base); @@ -2583,24 +2817,188 @@ valid_fb: plane_state->crtc_w = fb->width; plane_state->crtc_h = fb->height; - intel_state->src.x1 = plane_state->src_x; - intel_state->src.y1 = plane_state->src_y; - intel_state->src.x2 = plane_state->src_x + plane_state->src_w; - intel_state->src.y2 = plane_state->src_y + plane_state->src_h; - intel_state->dst.x1 = plane_state->crtc_x; - intel_state->dst.y1 = plane_state->crtc_y; - intel_state->dst.x2 = plane_state->crtc_x + plane_state->crtc_w; - intel_state->dst.y2 = plane_state->crtc_y + plane_state->crtc_h; + intel_state->base.src.x1 = plane_state->src_x; + intel_state->base.src.y1 = plane_state->src_y; + intel_state->base.src.x2 = plane_state->src_x + plane_state->src_w; + intel_state->base.src.y2 = plane_state->src_y + plane_state->src_h; + intel_state->base.dst.x1 = plane_state->crtc_x; + intel_state->base.dst.y1 = plane_state->crtc_y; + intel_state->base.dst.x2 = plane_state->crtc_x + plane_state->crtc_w; + intel_state->base.dst.y2 = plane_state->crtc_y + plane_state->crtc_h; obj = intel_fb_obj(fb); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dev_priv->preserve_bios_swizzle = true; drm_framebuffer_reference(fb); primary->fb = primary->state->fb = fb; primary->crtc = primary->state->crtc = &intel_crtc->base; intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary)); - obj->frontbuffer_bits |= to_intel_plane(primary)->frontbuffer_bit; + atomic_or(to_intel_plane(primary)->frontbuffer_bit, + &obj->frontbuffer_bits); +} + +static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane, + unsigned int rotation) +{ + int cpp = drm_format_plane_cpp(fb->pixel_format, plane); + + switch (fb->modifier[plane]) { + case DRM_FORMAT_MOD_NONE: + case I915_FORMAT_MOD_X_TILED: + switch (cpp) { + case 8: + return 4096; + case 4: + case 2: + case 1: + return 8192; + default: + MISSING_CASE(cpp); + break; + } + break; + case I915_FORMAT_MOD_Y_TILED: + case I915_FORMAT_MOD_Yf_TILED: + switch (cpp) { + case 8: + return 2048; + case 4: + return 4096; + case 2: + case 1: + return 8192; + default: + MISSING_CASE(cpp); + break; + } + break; + default: + MISSING_CASE(fb->modifier[plane]); + } + + return 2048; +} + +static int skl_check_main_surface(struct intel_plane_state *plane_state) +{ + const struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int rotation = plane_state->base.rotation; + int x = plane_state->base.src.x1 >> 16; + int y = plane_state->base.src.y1 >> 16; + int w = drm_rect_width(&plane_state->base.src) >> 16; + int h = drm_rect_height(&plane_state->base.src) >> 16; + int max_width = skl_max_plane_width(fb, 0, rotation); + int max_height = 4096; + u32 alignment, offset, aux_offset = plane_state->aux.offset; + + if (w > max_width || h > max_height) { + DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n", + w, h, max_width, max_height); + return -EINVAL; + } + + intel_add_fb_offsets(&x, &y, plane_state, 0); + offset = intel_compute_tile_offset(&x, &y, plane_state, 0); + + alignment = intel_surf_alignment(dev_priv, fb->modifier[0]); + + /* + * AUX surface offset is specified as the distance from the + * main surface offset, and it must be non-negative. Make + * sure that is what we will get. + */ + if (offset > aux_offset) + offset = intel_adjust_tile_offset(&x, &y, plane_state, 0, + offset, aux_offset & ~(alignment - 1)); + + /* + * When using an X-tiled surface, the plane blows up + * if the x offset + width exceed the stride. + * + * TODO: linear and Y-tiled seem fine, Yf untested, + */ + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) { + int cpp = drm_format_plane_cpp(fb->pixel_format, 0); + + while ((x + w) * cpp > fb->pitches[0]) { + if (offset == 0) { + DRM_DEBUG_KMS("Unable to find suitable display surface offset\n"); + return -EINVAL; + } + + offset = intel_adjust_tile_offset(&x, &y, plane_state, 0, + offset, offset - alignment); + } + } + + plane_state->main.offset = offset; + plane_state->main.x = x; + plane_state->main.y = y; + + return 0; +} + +static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) +{ + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int rotation = plane_state->base.rotation; + int max_width = skl_max_plane_width(fb, 1, rotation); + int max_height = 4096; + int x = plane_state->base.src.x1 >> 17; + int y = plane_state->base.src.y1 >> 17; + int w = drm_rect_width(&plane_state->base.src) >> 17; + int h = drm_rect_height(&plane_state->base.src) >> 17; + u32 offset; + + intel_add_fb_offsets(&x, &y, plane_state, 1); + offset = intel_compute_tile_offset(&x, &y, plane_state, 1); + + /* FIXME not quite sure how/if these apply to the chroma plane */ + if (w > max_width || h > max_height) { + DRM_DEBUG_KMS("CbCr source size %dx%d too big (limit %dx%d)\n", + w, h, max_width, max_height); + return -EINVAL; + } + + plane_state->aux.offset = offset; + plane_state->aux.x = x; + plane_state->aux.y = y; + + return 0; +} + +int skl_check_plane_surface(struct intel_plane_state *plane_state) +{ + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int rotation = plane_state->base.rotation; + int ret; + + /* Rotate src coordinates to match rotated GTT view */ + if (intel_rotation_90_or_270(rotation)) + drm_rect_rotate(&plane_state->base.src, + fb->width, fb->height, DRM_ROTATE_270); + + /* + * Handle the AUX surface first since + * the main surface setup depends on it. + */ + if (fb->pixel_format == DRM_FORMAT_NV12) { + ret = skl_check_nv12_aux_surface(plane_state); + if (ret) + return ret; + } else { + plane_state->aux.offset = ~0xfff; + plane_state->aux.x = 0; + plane_state->aux.y = 0; + } + + ret = skl_check_main_surface(plane_state); + if (ret) + return ret; + + return 0; } static void i9xx_update_primary_plane(struct drm_plane *primary, @@ -2617,9 +3015,8 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, u32 dspcntr; i915_reg_t reg = DSPCNTR(plane); unsigned int rotation = plane_state->base.rotation; - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); - int x = plane_state->src.x1 >> 16; - int y = plane_state->src.y1 >> 16; + int x = plane_state->base.src.x1 >> 16; + int y = plane_state->base.src.y1 >> 16; dspcntr = DISPPLANE_GAMMA_ENABLE; @@ -2670,37 +3067,31 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, BUG(); } - if (INTEL_INFO(dev)->gen >= 4 && - obj->tiling_mode != I915_TILING_NONE) + if (INTEL_GEN(dev_priv) >= 4 && + fb->modifier[0] == I915_FORMAT_MOD_X_TILED) dspcntr |= DISPPLANE_TILED; if (IS_G4X(dev)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; - linear_offset = y * fb->pitches[0] + x * cpp; + intel_add_fb_offsets(&x, &y, plane_state, 0); - if (INTEL_INFO(dev)->gen >= 4) { + if (INTEL_INFO(dev)->gen >= 4) intel_crtc->dspaddr_offset = - intel_compute_tile_offset(&x, &y, fb, 0, - fb->pitches[0], rotation); - linear_offset -= intel_crtc->dspaddr_offset; - } else { - intel_crtc->dspaddr_offset = linear_offset; - } + intel_compute_tile_offset(&x, &y, plane_state, 0); - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { dspcntr |= DISPPLANE_ROTATE_180; x += (crtc_state->pipe_src_w - 1); y += (crtc_state->pipe_src_h - 1); - - /* Finding the last pixel of the last line of the display - data and adding to linear_offset*/ - linear_offset += - (crtc_state->pipe_src_h - 1) * fb->pitches[0] + - (crtc_state->pipe_src_w - 1) * cpp; } + linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + + if (INTEL_INFO(dev)->gen < 4) + intel_crtc->dspaddr_offset = linear_offset; + intel_crtc->adjusted_x = x; intel_crtc->adjusted_y = y; @@ -2709,11 +3100,12 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); if (INTEL_INFO(dev)->gen >= 4) { I915_WRITE(DSPSURF(plane), - i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset); + intel_fb_gtt_offset(fb, rotation) + + intel_crtc->dspaddr_offset); I915_WRITE(DSPTILEOFF(plane), (y << 16) | x); I915_WRITE(DSPLINOFF(plane), linear_offset); } else - I915_WRITE(DSPADDR(plane), i915_gem_obj_ggtt_offset(obj) + linear_offset); + I915_WRITE(DSPADDR(plane), i915_gem_object_ggtt_offset(obj, NULL) + linear_offset); POSTING_READ(reg); } @@ -2741,15 +3133,13 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); int plane = intel_crtc->plane; u32 linear_offset; u32 dspcntr; i915_reg_t reg = DSPCNTR(plane); unsigned int rotation = plane_state->base.rotation; - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); - int x = plane_state->src.x1 >> 16; - int y = plane_state->src.y1 >> 16; + int x = plane_state->base.src.x1 >> 16; + int y = plane_state->base.src.y1 >> 16; dspcntr = DISPPLANE_GAMMA_ENABLE; dspcntr |= DISPLAY_PLANE_ENABLE; @@ -2780,32 +3170,28 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, BUG(); } - if (obj->tiling_mode != I915_TILING_NONE) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) dspcntr |= DISPPLANE_TILED; if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; - linear_offset = y * fb->pitches[0] + x * cpp; + intel_add_fb_offsets(&x, &y, plane_state, 0); + intel_crtc->dspaddr_offset = - intel_compute_tile_offset(&x, &y, fb, 0, - fb->pitches[0], rotation); - linear_offset -= intel_crtc->dspaddr_offset; - if (rotation == BIT(DRM_ROTATE_180)) { + intel_compute_tile_offset(&x, &y, plane_state, 0); + + if (rotation == DRM_ROTATE_180) { dspcntr |= DISPPLANE_ROTATE_180; if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) { x += (crtc_state->pipe_src_w - 1); y += (crtc_state->pipe_src_h - 1); - - /* Finding the last pixel of the last line of the display - data and adding to linear_offset*/ - linear_offset += - (crtc_state->pipe_src_h - 1) * fb->pitches[0] + - (crtc_state->pipe_src_w - 1) * cpp; } } + linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + intel_crtc->adjusted_x = x; intel_crtc->adjusted_y = y; @@ -2813,7 +3199,8 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); I915_WRITE(DSPSURF(plane), - i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset); + intel_fb_gtt_offset(fb, rotation) + + intel_crtc->dspaddr_offset); if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { I915_WRITE(DSPOFFSET(plane), (y << 16) | x); } else { @@ -2835,32 +3222,21 @@ u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv, } } -u32 intel_plane_obj_offset(struct intel_plane *intel_plane, - struct drm_i915_gem_object *obj, - unsigned int plane) +u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, + unsigned int rotation) { + struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct i915_ggtt_view view; struct i915_vma *vma; - u64 offset; - intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb, - intel_plane->base.state->rotation); + intel_fill_fb_ggtt_view(&view, fb, rotation); - vma = i915_gem_obj_to_ggtt_view(obj, &view); + vma = i915_gem_object_to_ggtt(obj, &view); if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n", - view.type)) + view.type)) return -1; - offset = vma->node.start; - - if (plane == 1) { - offset += vma->ggtt_view.params.rotated.uv_start_page * - PAGE_SIZE; - } - - WARN_ON(upper_32_bits(offset)); - - return lower_32_bits(offset); + return i915_ggtt_offset(vma); } static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) @@ -2890,6 +3266,28 @@ static void skl_detach_scalers(struct intel_crtc *intel_crtc) } } +u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, + unsigned int rotation) +{ + const struct drm_i915_private *dev_priv = to_i915(fb->dev); + u32 stride = intel_fb_pitch(fb, plane, rotation); + + /* + * The stride is either expressed as a multiple of 64 bytes chunks for + * linear buffers or in number of tiles for tiled buffers. + */ + if (intel_rotation_90_or_270(rotation)) { + int cpp = drm_format_plane_cpp(fb->pixel_format, plane); + + stride /= intel_tile_height(dev_priv, fb->modifier[0], cpp); + } else { + stride /= intel_fb_stride_alignment(dev_priv, fb->modifier[0], + fb->pixel_format); + } + + return stride; +} + u32 skl_plane_ctl_format(uint32_t pixel_format) { switch (pixel_format) { @@ -2952,17 +3350,17 @@ u32 skl_plane_ctl_tiling(uint64_t fb_modifier) u32 skl_plane_ctl_rotation(unsigned int rotation) { switch (rotation) { - case BIT(DRM_ROTATE_0): + case DRM_ROTATE_0: break; /* * DRM_ROTATE_ is counter clockwise to stay compatible with Xrandr * while i915 HW rotation is clockwise, thats why this swapping. */ - case BIT(DRM_ROTATE_90): + case DRM_ROTATE_90: return PLANE_CTL_ROTATE_270; - case BIT(DRM_ROTATE_180): + case DRM_ROTATE_180: return PLANE_CTL_ROTATE_180; - case BIT(DRM_ROTATE_270): + case DRM_ROTATE_270: return PLANE_CTL_ROTATE_90; default: MISSING_CASE(rotation); @@ -2979,22 +3377,21 @@ static void skylake_update_primary_plane(struct drm_plane *plane, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); + const struct skl_wm_values *wm = &dev_priv->wm.skl_results; int pipe = intel_crtc->pipe; - u32 plane_ctl, stride_div, stride; - u32 tile_height, plane_offset, plane_size; + u32 plane_ctl; unsigned int rotation = plane_state->base.rotation; - int x_offset, y_offset; - u32 surf_addr; + u32 stride = skl_plane_stride(fb, 0, rotation); + u32 surf_addr = plane_state->main.offset; int scaler_id = plane_state->scaler_id; - int src_x = plane_state->src.x1 >> 16; - int src_y = plane_state->src.y1 >> 16; - int src_w = drm_rect_width(&plane_state->src) >> 16; - int src_h = drm_rect_height(&plane_state->src) >> 16; - int dst_x = plane_state->dst.x1; - int dst_y = plane_state->dst.y1; - int dst_w = drm_rect_width(&plane_state->dst); - int dst_h = drm_rect_height(&plane_state->dst); + int src_x = plane_state->main.x; + int src_y = plane_state->main.y; + int src_w = drm_rect_width(&plane_state->base.src) >> 16; + int src_h = drm_rect_height(&plane_state->base.src) >> 16; + int dst_x = plane_state->base.dst.x1; + int dst_y = plane_state->base.dst.y1; + int dst_w = drm_rect_width(&plane_state->base.dst); + int dst_h = drm_rect_height(&plane_state->base.dst); plane_ctl = PLANE_CTL_ENABLE | PLANE_CTL_PIPE_GAMMA_ENABLE | @@ -3005,36 +3402,24 @@ static void skylake_update_primary_plane(struct drm_plane *plane, plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; plane_ctl |= skl_plane_ctl_rotation(rotation); - stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0], - fb->pixel_format); - surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj, 0); + /* Sizes are 0 based */ + src_w--; + src_h--; + dst_w--; + dst_h--; - WARN_ON(drm_rect_width(&plane_state->src) == 0); - - if (intel_rotation_90_or_270(rotation)) { - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); + intel_crtc->dspaddr_offset = surf_addr; - /* stride = Surface height in tiles */ - tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp); - stride = DIV_ROUND_UP(fb->height, tile_height); - x_offset = stride * tile_height - src_y - src_h; - y_offset = src_x; - plane_size = (src_w - 1) << 16 | (src_h - 1); - } else { - stride = fb->pitches[0] / stride_div; - x_offset = src_x; - y_offset = src_y; - plane_size = (src_h - 1) << 16 | (src_w - 1); - } - plane_offset = y_offset << 16 | x_offset; + intel_crtc->adjusted_x = src_x; + intel_crtc->adjusted_y = src_y; - intel_crtc->adjusted_x = x_offset; - intel_crtc->adjusted_y = y_offset; + if (wm->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) + skl_write_plane_wm(intel_crtc, wm, 0); I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl); - I915_WRITE(PLANE_OFFSET(pipe, 0), plane_offset); - I915_WRITE(PLANE_SIZE(pipe, 0), plane_size); + I915_WRITE(PLANE_OFFSET(pipe, 0), (src_y << 16) | src_x); I915_WRITE(PLANE_STRIDE(pipe, 0), stride); + I915_WRITE(PLANE_SIZE(pipe, 0), (src_h << 16) | src_w); if (scaler_id >= 0) { uint32_t ps_ctrl = 0; @@ -3051,7 +3436,8 @@ static void skylake_update_primary_plane(struct drm_plane *plane, I915_WRITE(PLANE_POS(pipe, 0), (dst_y << 16) | dst_x); } - I915_WRITE(PLANE_SURF(pipe, 0), surf_addr); + I915_WRITE(PLANE_SURF(pipe, 0), + intel_fb_gtt_offset(fb, rotation) + surf_addr); POSTING_READ(PLANE_SURF(pipe, 0)); } @@ -3061,7 +3447,15 @@ static void skylake_disable_primary_plane(struct drm_plane *primary, { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = to_intel_crtc(crtc)->pipe; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; + + /* + * We only populate skl_results on watermark updates, and if the + * plane's visiblity isn't actually changing neither is its watermarks. + */ + if (!crtc->primary->state->visible) + skl_write_plane_wm(intel_crtc, &dev_priv->wm.skl_results, 0); I915_WRITE(PLANE_CTL(pipe, 0), 0); I915_WRITE(PLANE_SURF(pipe, 0), 0); @@ -3093,40 +3487,113 @@ static void intel_update_primary_planes(struct drm_device *dev) for_each_crtc(dev, crtc) { struct intel_plane *plane = to_intel_plane(crtc->primary); - struct intel_plane_state *plane_state; - - drm_modeset_lock_crtc(crtc, &plane->base); - plane_state = to_intel_plane_state(plane->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); - if (plane_state->visible) + if (plane_state->base.visible) plane->update_plane(&plane->base, to_intel_crtc_state(crtc->state), plane_state); + } +} + +static int +__intel_display_resume(struct drm_device *dev, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state; + struct drm_crtc *crtc; + int i, ret; - drm_modeset_unlock_crtc(crtc); + intel_modeset_setup_hw_state(dev); + i915_redisable_vga(dev); + + if (!state) + return 0; + + for_each_crtc_in_state(state, crtc, crtc_state, i) { + /* + * Force recalculation even if we restore + * current state. With fast modeset this may not result + * in a modeset when the state is compatible. + */ + crtc_state->mode_changed = true; } + + /* ignore any reset values/BIOS leftovers in the WM registers */ + to_intel_atomic_state(state)->skip_intermediate_wm = true; + + ret = drm_atomic_commit(state); + + WARN_ON(ret == -EDEADLK); + return ret; +} + +static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv) +{ + return intel_has_gpu_reset(dev_priv) && + INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv); } void intel_prepare_reset(struct drm_i915_private *dev_priv) { - /* no reset support for gen2 */ - if (IS_GEN2(dev_priv)) - return; + struct drm_device *dev = &dev_priv->drm; + struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx; + struct drm_atomic_state *state; + int ret; - /* reset doesn't touch the display */ - if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) + /* + * Need mode_config.mutex so that we don't + * trample ongoing ->detect() and whatnot. + */ + mutex_lock(&dev->mode_config.mutex); + drm_modeset_acquire_init(ctx, 0); + while (1) { + ret = drm_modeset_lock_all_ctx(dev, ctx); + if (ret != -EDEADLK) + break; + + drm_modeset_backoff(ctx); + } + + /* reset doesn't touch the display, but flips might get nuked anyway, */ + if (!i915.force_reset_modeset_test && + !gpu_reset_clobbers_display(dev_priv)) return; - drm_modeset_lock_all(&dev_priv->drm); /* * Disabling the crtcs gracefully seems nicer. Also the * g33 docs say we should at least disable all the planes. */ - intel_display_suspend(&dev_priv->drm); + state = drm_atomic_helper_duplicate_state(dev, ctx); + if (IS_ERR(state)) { + ret = PTR_ERR(state); + state = NULL; + DRM_ERROR("Duplicating state failed with %i\n", ret); + goto err; + } + + ret = drm_atomic_helper_disable_all(dev, ctx); + if (ret) { + DRM_ERROR("Suspending crtc's failed with %i\n", ret); + goto err; + } + + dev_priv->modeset_restore_state = state; + state->acquire_ctx = ctx; + return; + +err: + drm_atomic_state_free(state); } void intel_finish_reset(struct drm_i915_private *dev_priv) { + struct drm_device *dev = &dev_priv->drm; + struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx; + struct drm_atomic_state *state = dev_priv->modeset_restore_state; + int ret; + /* * Flips in the rings will be nuked by the reset, * so complete all pending flips so that user space @@ -3134,55 +3601,76 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) */ intel_complete_page_flips(dev_priv); - /* no reset support for gen2 */ - if (IS_GEN2(dev_priv)) - return; + dev_priv->modeset_restore_state = NULL; + + dev_priv->modeset_restore_state = NULL; /* reset doesn't touch the display */ - if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { + if (!gpu_reset_clobbers_display(dev_priv)) { + if (!state) { + /* + * Flips in the rings have been nuked by the reset, + * so update the base address of all primary + * planes to the the last fb to make sure we're + * showing the correct fb after a reset. + * + * FIXME: Atomic will make this obsolete since we won't schedule + * CS-based flips (which might get lost in gpu resets) any more. + */ + intel_update_primary_planes(dev); + } else { + ret = __intel_display_resume(dev, state); + if (ret) + DRM_ERROR("Restoring old state failed with %i\n", ret); + } + } else { /* - * Flips in the rings have been nuked by the reset, - * so update the base address of all primary - * planes to the the last fb to make sure we're - * showing the correct fb after a reset. - * - * FIXME: Atomic will make this obsolete since we won't schedule - * CS-based flips (which might get lost in gpu resets) any more. + * The display has been reset as well, + * so need a full re-initialization. */ - intel_update_primary_planes(&dev_priv->drm); - return; - } + intel_runtime_pm_disable_interrupts(dev_priv); + intel_runtime_pm_enable_interrupts(dev_priv); - /* - * The display has been reset as well, - * so need a full re-initialization. - */ - intel_runtime_pm_disable_interrupts(dev_priv); - intel_runtime_pm_enable_interrupts(dev_priv); + intel_pps_unlock_regs_wa(dev_priv); + intel_modeset_init_hw(dev); + + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->display.hpd_irq_setup) + dev_priv->display.hpd_irq_setup(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); + + ret = __intel_display_resume(dev, state); + if (ret) + DRM_ERROR("Restoring old state failed with %i\n", ret); + + intel_hpd_init(dev_priv); + } - intel_modeset_init_hw(&dev_priv->drm); + drm_modeset_drop_locks(ctx); + drm_modeset_acquire_fini(ctx); + mutex_unlock(&dev->mode_config.mutex); +} - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.hpd_irq_setup) - dev_priv->display.hpd_irq_setup(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); +static bool abort_flip_on_reset(struct intel_crtc *crtc) +{ + struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error; - intel_display_resume(&dev_priv->drm); + if (i915_reset_in_progress(error)) + return true; - intel_hpd_init(dev_priv); + if (crtc->reset_count != i915_reset_count(error)) + return true; - drm_modeset_unlock_all(&dev_priv->drm); + return false; } static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - unsigned reset_counter; bool pending; - reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error); - if (intel_crtc->reset_counter != reset_counter) + if (abort_flip_on_reset(intel_crtc)) return false; spin_lock_irq(&dev->event_lock); @@ -3825,7 +4313,7 @@ static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc) return 0; } -static void lpt_disable_iclkip(struct drm_i915_private *dev_priv) +void lpt_disable_iclkip(struct drm_i915_private *dev_priv) { u32 temp; @@ -4248,7 +4736,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state) intel_crtc->pipe, SKL_CRTC_INDEX); return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX, - &state->scaler_state.scaler_id, BIT(DRM_ROTATE_0), + &state->scaler_state.scaler_id, DRM_ROTATE_0, state->pipe_src_w, state->pipe_src_h, adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay); } @@ -4273,7 +4761,7 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, struct drm_framebuffer *fb = plane_state->base.fb; int ret; - bool force_detach = !fb || !plane_state->visible; + bool force_detach = !fb || !plane_state->base.visible; DRM_DEBUG_KMS("Updating scaler for [PLANE:%d:%s] scaler_user index %u.%u\n", intel_plane->base.base.id, intel_plane->base.name, @@ -4283,10 +4771,10 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, drm_plane_index(&intel_plane->base), &plane_state->scaler_id, plane_state->base.rotation, - drm_rect_width(&plane_state->src) >> 16, - drm_rect_height(&plane_state->src) >> 16, - drm_rect_width(&plane_state->dst), - drm_rect_height(&plane_state->dst)); + drm_rect_width(&plane_state->base.src) >> 16, + drm_rect_height(&plane_state->base.src) >> 16, + drm_rect_width(&plane_state->base.dst), + drm_rect_height(&plane_state->base.dst)); if (ret || plane_state->scaler_id < 0) return ret; @@ -4564,12 +5052,11 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) struct drm_atomic_state *old_state = old_crtc_state->base.state; struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc->base.state); - struct drm_device *dev = crtc->base.dev; struct drm_plane *primary = crtc->base.primary; struct drm_plane_state *old_pri_state = drm_atomic_get_existing_plane_state(old_state, primary); - intel_frontbuffer_flip(dev, pipe_config->fb_bits); + intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits); crtc->wm.cxsr_allowed = true; @@ -4584,9 +5071,9 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) intel_fbc_post_update(crtc); - if (primary_state->visible && + if (primary_state->base.visible && (needs_modeset(&pipe_config->base) || - !old_primary_state->visible)) + !old_primary_state->base.visible)) intel_post_enable_primary(&crtc->base); } } @@ -4612,8 +5099,8 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state) intel_fbc_pre_update(crtc, pipe_config, primary_state); - if (old_primary_state->visible && - (modeset || !primary_state->visible)) + if (old_primary_state->base.visible && + (modeset || !primary_state->base.visible)) intel_pre_disable_primary(&crtc->base); } @@ -4692,18 +5179,140 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask * to compute the mask of flip planes precisely. For the time being * consider this a flip to a NULL plane. */ - intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe)); + intel_frontbuffer_flip(to_i915(dev), INTEL_FRONTBUFFER_ALL_MASK(pipe)); +} + +static void intel_encoders_pre_pll_enable(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state) +{ + struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_connector_in_state(old_state, conn, old_conn_state, i) { + struct drm_connector_state *conn_state = conn->state; + struct intel_encoder *encoder = + to_intel_encoder(conn_state->best_encoder); + + if (conn_state->crtc != crtc) + continue; + + if (encoder->pre_pll_enable) + encoder->pre_pll_enable(encoder, crtc_state, conn_state); + } +} + +static void intel_encoders_pre_enable(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state) +{ + struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_connector_in_state(old_state, conn, old_conn_state, i) { + struct drm_connector_state *conn_state = conn->state; + struct intel_encoder *encoder = + to_intel_encoder(conn_state->best_encoder); + + if (conn_state->crtc != crtc) + continue; + + if (encoder->pre_enable) + encoder->pre_enable(encoder, crtc_state, conn_state); + } } -static void ironlake_crtc_enable(struct drm_crtc *crtc) +static void intel_encoders_enable(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state) { + struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_connector_in_state(old_state, conn, old_conn_state, i) { + struct drm_connector_state *conn_state = conn->state; + struct intel_encoder *encoder = + to_intel_encoder(conn_state->best_encoder); + + if (conn_state->crtc != crtc) + continue; + + encoder->enable(encoder, crtc_state, conn_state); + intel_opregion_notify_encoder(encoder, true); + } +} + +static void intel_encoders_disable(struct drm_crtc *crtc, + struct intel_crtc_state *old_crtc_state, + struct drm_atomic_state *old_state) +{ + struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_connector_in_state(old_state, conn, old_conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(old_conn_state->best_encoder); + + if (old_conn_state->crtc != crtc) + continue; + + intel_opregion_notify_encoder(encoder, false); + encoder->disable(encoder, old_crtc_state, old_conn_state); + } +} + +static void intel_encoders_post_disable(struct drm_crtc *crtc, + struct intel_crtc_state *old_crtc_state, + struct drm_atomic_state *old_state) +{ + struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_connector_in_state(old_state, conn, old_conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(old_conn_state->best_encoder); + + if (old_conn_state->crtc != crtc) + continue; + + if (encoder->post_disable) + encoder->post_disable(encoder, old_crtc_state, old_conn_state); + } +} + +static void intel_encoders_post_pll_disable(struct drm_crtc *crtc, + struct intel_crtc_state *old_crtc_state, + struct drm_atomic_state *old_state) +{ + struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_connector_in_state(old_state, conn, old_conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(old_conn_state->best_encoder); + + if (old_conn_state->crtc != crtc) + continue; + + if (encoder->post_pll_disable) + encoder->post_pll_disable(encoder, old_crtc_state, old_conn_state); + } +} + +static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, + struct drm_atomic_state *old_state) +{ + struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - struct intel_crtc_state *pipe_config = - to_intel_crtc_state(crtc->state); if (WARN_ON(intel_crtc->active)) return; @@ -4741,9 +5350,7 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) intel_crtc->active = true; - for_each_encoder_on_crtc(dev, crtc, encoder) - if (encoder->pre_enable) - encoder->pre_enable(encoder); + intel_encoders_pre_enable(crtc, pipe_config, old_state); if (intel_crtc->config->has_pch_encoder) { /* Note: FDI PLL enabling _must_ be done before we enable the @@ -4773,8 +5380,7 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); - for_each_encoder_on_crtc(dev, crtc, encoder) - encoder->enable(encoder); + intel_encoders_enable(crtc, pipe_config, old_state); if (HAS_PCH_CPT(dev)) cpt_verify_modeset(dev, intel_crtc->pipe); @@ -4792,16 +5398,15 @@ static bool hsw_crtc_supports_ips(struct intel_crtc *crtc) return HAS_IPS(crtc->base.dev) && crtc->pipe == PIPE_A; } -static void haswell_crtc_enable(struct drm_crtc *crtc) +static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, + struct drm_atomic_state *old_state) { + struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_encoder *encoder; int pipe = intel_crtc->pipe, hsw_workaround_pipe; enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; - struct intel_crtc_state *pipe_config = - to_intel_crtc_state(crtc->state); if (WARN_ON(intel_crtc->active)) return; @@ -4810,9 +5415,7 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A, false); - for_each_encoder_on_crtc(dev, crtc, encoder) - if (encoder->pre_pll_enable) - encoder->pre_pll_enable(encoder); + intel_encoders_pre_pll_enable(crtc, pipe_config, old_state); if (intel_crtc->config->shared_dpll) intel_enable_shared_dpll(intel_crtc); @@ -4850,10 +5453,7 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) else intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - for_each_encoder_on_crtc(dev, crtc, encoder) { - if (encoder->pre_enable) - encoder->pre_enable(encoder); - } + intel_encoders_pre_enable(crtc, pipe_config, old_state); if (intel_crtc->config->has_pch_encoder) dev_priv->display.fdi_link_train(crtc); @@ -4894,10 +5494,7 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); - for_each_encoder_on_crtc(dev, crtc, encoder) { - encoder->enable(encoder); - intel_opregion_notify_encoder(encoder, true); - } + intel_encoders_enable(crtc, pipe_config, old_state); if (intel_crtc->config->has_pch_encoder) { intel_wait_for_vblank(dev, pipe); @@ -4931,12 +5528,13 @@ static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force) } } -static void ironlake_crtc_disable(struct drm_crtc *crtc) +static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state, + struct drm_atomic_state *old_state) { + struct drm_crtc *crtc = old_crtc_state->base.crtc; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_encoder *encoder; int pipe = intel_crtc->pipe; /* @@ -4949,8 +5547,7 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc) intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false); } - for_each_encoder_on_crtc(dev, crtc, encoder) - encoder->disable(encoder); + intel_encoders_disable(crtc, old_crtc_state, old_state); drm_crtc_vblank_off(crtc); assert_vblank_disabled(crtc); @@ -4962,9 +5559,7 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc) if (intel_crtc->config->has_pch_encoder) ironlake_fdi_disable(crtc); - for_each_encoder_on_crtc(dev, crtc, encoder) - if (encoder->post_disable) - encoder->post_disable(encoder); + intel_encoders_post_disable(crtc, old_crtc_state, old_state); if (intel_crtc->config->has_pch_encoder) { ironlake_disable_pch_transcoder(dev_priv, pipe); @@ -4994,22 +5589,20 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc) intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true); } -static void haswell_crtc_disable(struct drm_crtc *crtc) +static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, + struct drm_atomic_state *old_state) { + struct drm_crtc *crtc = old_crtc_state->base.crtc; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_encoder *encoder; enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; if (intel_crtc->config->has_pch_encoder) intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A, false); - for_each_encoder_on_crtc(dev, crtc, encoder) { - intel_opregion_notify_encoder(encoder, false); - encoder->disable(encoder); - } + intel_encoders_disable(crtc, old_crtc_state, old_state); drm_crtc_vblank_off(crtc); assert_vblank_disabled(crtc); @@ -5032,18 +5625,11 @@ static void haswell_crtc_disable(struct drm_crtc *crtc) if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_disable_pipe_clock(intel_crtc); - for_each_encoder_on_crtc(dev, crtc, encoder) - if (encoder->post_disable) - encoder->post_disable(encoder); - - if (intel_crtc->config->has_pch_encoder) { - lpt_disable_pch_transcoder(dev_priv); - lpt_disable_iclkip(dev_priv); - intel_ddi_fdi_disable(crtc); + intel_encoders_post_disable(crtc, old_crtc_state, old_state); + if (old_crtc_state->has_pch_encoder) intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A, true); - } } static void i9xx_pfit_enable(struct intel_crtc *crtc) @@ -5691,15 +6277,7 @@ static bool skl_cdclk_pcu_ready(struct drm_i915_private *dev_priv) static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv) { - unsigned int i; - - for (i = 0; i < 15; i++) { - if (skl_cdclk_pcu_ready(dev_priv)) - return true; - udelay(10); - } - - return false; + return _wait_for(skl_cdclk_pcu_ready(dev_priv), 3000, 10) == 0; } static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) @@ -6107,14 +6685,13 @@ static void valleyview_modeset_commit_cdclk(struct drm_atomic_state *old_state) intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); } -static void valleyview_crtc_enable(struct drm_crtc *crtc) +static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, + struct drm_atomic_state *old_state) { + struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_encoder *encoder; - struct intel_crtc_state *pipe_config = - to_intel_crtc_state(crtc->state); int pipe = intel_crtc->pipe; if (WARN_ON(intel_crtc->active)) @@ -6139,9 +6716,7 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - for_each_encoder_on_crtc(dev, crtc, encoder) - if (encoder->pre_pll_enable) - encoder->pre_pll_enable(encoder); + intel_encoders_pre_pll_enable(crtc, pipe_config, old_state); if (IS_CHERRYVIEW(dev)) { chv_prepare_pll(intel_crtc, intel_crtc->config); @@ -6151,9 +6726,7 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) vlv_enable_pll(intel_crtc, intel_crtc->config); } - for_each_encoder_on_crtc(dev, crtc, encoder) - if (encoder->pre_enable) - encoder->pre_enable(encoder); + intel_encoders_pre_enable(crtc, pipe_config, old_state); i9xx_pfit_enable(intel_crtc); @@ -6165,8 +6738,7 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); - for_each_encoder_on_crtc(dev, crtc, encoder) - encoder->enable(encoder); + intel_encoders_enable(crtc, pipe_config, old_state); } static void i9xx_set_pll_dividers(struct intel_crtc *crtc) @@ -6178,14 +6750,13 @@ static void i9xx_set_pll_dividers(struct intel_crtc *crtc) I915_WRITE(FP1(crtc->pipe), crtc->config->dpll_hw_state.fp1); } -static void i9xx_crtc_enable(struct drm_crtc *crtc) +static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, + struct drm_atomic_state *old_state) { + struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_encoder *encoder; - struct intel_crtc_state *pipe_config = - to_intel_crtc_state(crtc->state); enum pipe pipe = intel_crtc->pipe; if (WARN_ON(intel_crtc->active)) @@ -6206,9 +6777,7 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) if (!IS_GEN2(dev)) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - for_each_encoder_on_crtc(dev, crtc, encoder) - if (encoder->pre_enable) - encoder->pre_enable(encoder); + intel_encoders_pre_enable(crtc, pipe_config, old_state); i9xx_enable_pll(intel_crtc); @@ -6222,8 +6791,7 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); - for_each_encoder_on_crtc(dev, crtc, encoder) - encoder->enable(encoder); + intel_encoders_enable(crtc, pipe_config, old_state); } static void i9xx_pfit_disable(struct intel_crtc *crtc) @@ -6241,12 +6809,13 @@ static void i9xx_pfit_disable(struct intel_crtc *crtc) I915_WRITE(PFIT_CONTROL, 0); } -static void i9xx_crtc_disable(struct drm_crtc *crtc) +static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, + struct drm_atomic_state *old_state) { + struct drm_crtc *crtc = old_crtc_state->base.crtc; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_encoder *encoder; int pipe = intel_crtc->pipe; /* @@ -6256,8 +6825,7 @@ static void i9xx_crtc_disable(struct drm_crtc *crtc) if (IS_GEN2(dev)) intel_wait_for_vblank(dev, pipe); - for_each_encoder_on_crtc(dev, crtc, encoder) - encoder->disable(encoder); + intel_encoders_disable(crtc, old_crtc_state, old_state); drm_crtc_vblank_off(crtc); assert_vblank_disabled(crtc); @@ -6266,9 +6834,7 @@ static void i9xx_crtc_disable(struct drm_crtc *crtc) i9xx_pfit_disable(intel_crtc); - for_each_encoder_on_crtc(dev, crtc, encoder) - if (encoder->post_disable) - encoder->post_disable(encoder); + intel_encoders_post_disable(crtc, old_crtc_state, old_state); if (!intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DSI)) { if (IS_CHERRYVIEW(dev)) @@ -6279,9 +6845,7 @@ static void i9xx_crtc_disable(struct drm_crtc *crtc) i9xx_disable_pll(intel_crtc); } - for_each_encoder_on_crtc(dev, crtc, encoder) - if (encoder->post_pll_disable) - encoder->post_pll_disable(encoder); + intel_encoders_post_pll_disable(crtc, old_crtc_state, old_state); if (!IS_GEN2(dev)) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); @@ -6294,20 +6858,34 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum intel_display_power_domain domain; unsigned long domains; + struct drm_atomic_state *state; + struct intel_crtc_state *crtc_state; + int ret; if (!intel_crtc->active) return; - if (to_intel_plane_state(crtc->primary->state)->visible) { + if (to_intel_plane_state(crtc->primary->state)->base.visible) { WARN_ON(intel_crtc->flip_work); intel_pre_disable_primary_noatomic(crtc); intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary)); - to_intel_plane_state(crtc->primary->state)->visible = false; + to_intel_plane_state(crtc->primary->state)->base.visible = false; } - dev_priv->display.crtc_disable(crtc); + state = drm_atomic_state_alloc(crtc->dev); + state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; + + /* Everything's already locked, -EDEADLK can't happen. */ + crtc_state = intel_atomic_get_crtc_state(state, intel_crtc); + ret = drm_atomic_add_affected_connectors(state, crtc); + + WARN_ON(IS_ERR(crtc_state) || ret); + + dev_priv->display.crtc_disable(crtc_state, state); + + drm_atomic_state_free(state); DRM_DEBUG_KMS("[CRTC:%d:%s] hw state adjusted, was enabled, now disabled\n", crtc->base.id, crtc->name); @@ -6822,9 +7400,10 @@ static int i9xx_misc_get_display_clock_speed(struct drm_device *dev) static int pnv_get_display_clock_speed(struct drm_device *dev) { + struct pci_dev *pdev = dev->pdev; u16 gcfgc = 0; - pci_read_config_word(dev->pdev, GCFGC, &gcfgc); + pci_read_config_word(pdev, GCFGC, &gcfgc); switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { case GC_DISPLAY_CLOCK_267_MHZ_PNV: @@ -6846,9 +7425,10 @@ static int pnv_get_display_clock_speed(struct drm_device *dev) static int i915gm_get_display_clock_speed(struct drm_device *dev) { + struct pci_dev *pdev = dev->pdev; u16 gcfgc = 0; - pci_read_config_word(dev->pdev, GCFGC, &gcfgc); + pci_read_config_word(pdev, GCFGC, &gcfgc); if (gcfgc & GC_LOW_FREQUENCY_ENABLE) return 133333; @@ -6870,6 +7450,7 @@ static int i865_get_display_clock_speed(struct drm_device *dev) static int i85x_get_display_clock_speed(struct drm_device *dev) { + struct pci_dev *pdev = dev->pdev; u16 hpllcc = 0; /* @@ -6877,10 +7458,10 @@ static int i85x_get_display_clock_speed(struct drm_device *dev) * encoding is different :( * FIXME is this the right way to detect 852GM/852GMV? */ - if (dev->pdev->revision == 0x1) + if (pdev->revision == 0x1) return 133333; - pci_bus_read_config_word(dev->pdev->bus, + pci_bus_read_config_word(pdev->bus, PCI_DEVFN(0, 3), HPLLCC, &hpllcc); /* Assume that the hardware is in the high speed state. This @@ -6981,10 +7562,11 @@ static unsigned int intel_hpll_vco(struct drm_device *dev) static int gm45_get_display_clock_speed(struct drm_device *dev) { + struct pci_dev *pdev = dev->pdev; unsigned int cdclk_sel, vco = intel_hpll_vco(dev); uint16_t tmp = 0; - pci_read_config_word(dev->pdev, GCFGC, &tmp); + pci_read_config_word(pdev, GCFGC, &tmp); cdclk_sel = (tmp >> 12) & 0x1; @@ -7003,6 +7585,7 @@ static int gm45_get_display_clock_speed(struct drm_device *dev) static int i965gm_get_display_clock_speed(struct drm_device *dev) { + struct pci_dev *pdev = dev->pdev; static const uint8_t div_3200[] = { 16, 10, 8 }; static const uint8_t div_4000[] = { 20, 12, 10 }; static const uint8_t div_5333[] = { 24, 16, 14 }; @@ -7010,7 +7593,7 @@ static int i965gm_get_display_clock_speed(struct drm_device *dev) unsigned int cdclk_sel, vco = intel_hpll_vco(dev); uint16_t tmp = 0; - pci_read_config_word(dev->pdev, GCFGC, &tmp); + pci_read_config_word(pdev, GCFGC, &tmp); cdclk_sel = ((tmp >> 8) & 0x1f) - 1; @@ -7040,6 +7623,7 @@ fail: static int g33_get_display_clock_speed(struct drm_device *dev) { + struct pci_dev *pdev = dev->pdev; static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; @@ -7048,7 +7632,7 @@ static int g33_get_display_clock_speed(struct drm_device *dev) unsigned int cdclk_sel, vco = intel_hpll_vco(dev); uint16_t tmp = 0; - pci_read_config_word(dev->pdev, GCFGC, &tmp); + pci_read_config_word(pdev, GCFGC, &tmp); cdclk_sel = (tmp >> 4) & 0x7; @@ -8928,6 +9512,24 @@ static void ironlake_compute_dpll(struct intel_crtc *intel_crtc, if (intel_crtc_has_dp_encoder(crtc_state)) dpll |= DPLL_SDVO_HIGH_SPEED; + /* + * The high speed IO clock is only really required for + * SDVO/HDMI/DP, but we also enable it for CRT to make it + * possible to share the DPLL between CRT and HDMI. Enabling + * the clock needlessly does no real harm, except use up a + * bit of power potentially. + * + * We'll limit this to IVB with 3 pipes, since it has only two + * DPLLs and so DPLL sharing is the only way to get three pipes + * driving PCH ports at the same time. On SNB we could do this, + * and potentially avoid enabling the second DPLL, but it's not + * clear if it''s a win or loss power wise. No point in doing + * this on ILK at all since it has a fixed DPLL<->pipe mapping. + */ + if (INTEL_INFO(dev_priv)->num_pipes == 3 && + intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) + dpll |= DPLL_SDVO_HIGH_SPEED; + /* compute bitmask from p1 value */ dpll |= (1 << (crtc_state->dpll.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT; /* also FPA1 */ @@ -9214,7 +9816,7 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, return; error: - kfree(fb); + kfree(intel_fb); } static void ironlake_get_pfit_config(struct intel_crtc *crtc, @@ -9420,7 +10022,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv) I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL enabled\n"); I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n"); I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n"); - I915_STATE_WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n"); + I915_STATE_WARN(I915_READ(PP_STATUS(0)) & PP_ON, "Panel power on\n"); I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE, "CPU PWM1 enabled\n"); if (IS_HASWELL(dev)) @@ -9459,7 +10061,7 @@ static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val) mutex_lock(&dev_priv->rps.hw_lock); if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP, val)) - DRM_ERROR("Failed to write to D_COMP\n"); + DRM_DEBUG_KMS("Failed to write to D_COMP\n"); mutex_unlock(&dev_priv->rps.hw_lock); } else { I915_WRITE(D_COMP_BDW, val); @@ -9867,15 +10469,12 @@ static void bxt_get_ddi_pll(struct drm_i915_private *dev_priv, switch (port) { case PORT_A: - pipe_config->ddi_pll_sel = SKL_DPLL0; id = DPLL_ID_SKL_DPLL0; break; case PORT_B: - pipe_config->ddi_pll_sel = SKL_DPLL1; id = DPLL_ID_SKL_DPLL1; break; case PORT_C: - pipe_config->ddi_pll_sel = SKL_DPLL2; id = DPLL_ID_SKL_DPLL2; break; default: @@ -9894,25 +10493,10 @@ static void skylake_get_ddi_pll(struct drm_i915_private *dev_priv, u32 temp; temp = I915_READ(DPLL_CTRL2) & DPLL_CTRL2_DDI_CLK_SEL_MASK(port); - pipe_config->ddi_pll_sel = temp >> (port * 3 + 1); + id = temp >> (port * 3 + 1); - switch (pipe_config->ddi_pll_sel) { - case SKL_DPLL0: - id = DPLL_ID_SKL_DPLL0; - break; - case SKL_DPLL1: - id = DPLL_ID_SKL_DPLL1; - break; - case SKL_DPLL2: - id = DPLL_ID_SKL_DPLL2; - break; - case SKL_DPLL3: - id = DPLL_ID_SKL_DPLL3; - break; - default: - MISSING_CASE(pipe_config->ddi_pll_sel); + if (WARN_ON(id < SKL_DPLL0 || id > SKL_DPLL3)) return; - } pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id); } @@ -9922,10 +10506,9 @@ static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, struct intel_crtc_state *pipe_config) { enum intel_dpll_id id; + uint32_t ddi_pll_sel = I915_READ(PORT_CLK_SEL(port)); - pipe_config->ddi_pll_sel = I915_READ(PORT_CLK_SEL(port)); - - switch (pipe_config->ddi_pll_sel) { + switch (ddi_pll_sel) { case PORT_CLK_SEL_WRPLL1: id = DPLL_ID_WRPLL1; break; @@ -9945,7 +10528,7 @@ static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, id = DPLL_ID_LCPLL_2700; break; default: - MISSING_CASE(pipe_config->ddi_pll_sel); + MISSING_CASE(ddi_pll_sel); /* fall through */ case PORT_CLK_SEL_NONE: return; @@ -10178,7 +10761,7 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t cntl = 0, size = 0; - if (plane_state && plane_state->visible) { + if (plane_state && plane_state->base.visible) { unsigned int width = plane_state->base.crtc_w; unsigned int height = plane_state->base.crtc_h; unsigned int stride = roundup_pow_of_two(width) * 4; @@ -10239,10 +10822,14 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + const struct skl_wm_values *wm = &dev_priv->wm.skl_results; int pipe = intel_crtc->pipe; uint32_t cntl = 0; - if (plane_state && plane_state->visible) { + if (INTEL_GEN(dev_priv) >= 9 && wm->dirty_pipes & drm_crtc_mask(crtc)) + skl_write_cursor_wm(intel_crtc, wm); + + if (plane_state && plane_state->base.visible) { cntl = MCURSOR_GAMMA_ENABLE; switch (plane_state->base.crtc_w) { case 64: @@ -10263,7 +10850,7 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, if (HAS_DDI(dev)) cntl |= CURSOR_PIPE_CSC_ENABLE; - if (plane_state->base.rotation == BIT(DRM_ROTATE_180)) + if (plane_state->base.rotation == DRM_ROTATE_180) cntl |= CURSOR_ROTATE_180; } @@ -10309,7 +10896,7 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, /* ILK+ do this automagically */ if (HAS_GMCH_DISPLAY(dev) && - plane_state->base.rotation == BIT(DRM_ROTATE_180)) { + plane_state->base.rotation == DRM_ROTATE_180) { base += (plane_state->base.crtc_h * plane_state->base.crtc_w - 1) * 4; } @@ -10442,7 +11029,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev, fb = intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return fb; } @@ -10953,13 +11540,13 @@ static void intel_unpin_work_fn(struct work_struct *__work) mutex_lock(&dev->struct_mutex); intel_unpin_fb_obj(work->old_fb, primary->state->rotation); - drm_gem_object_unreference(&work->pending_flip_obj->base); - - if (work->flip_queued_req) - i915_gem_request_assign(&work->flip_queued_req, NULL); + i915_gem_object_put(work->pending_flip_obj); mutex_unlock(&dev->struct_mutex); - intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit); + i915_gem_request_put(work->flip_queued_req); + + intel_frontbuffer_flip_complete(to_i915(dev), + to_intel_plane(primary)->frontbuffer_bit); intel_fbc_post_update(crtc); drm_framebuffer_unreference(work->old_fb); @@ -10980,10 +11567,8 @@ static bool __pageflip_finished_cs(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - unsigned reset_counter; - reset_counter = i915_reset_counter(&dev_priv->gpu_error); - if (crtc->reset_counter != reset_counter) + if (abort_flip_on_reset(crtc)) return true; /* @@ -11124,7 +11709,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; @@ -11140,13 +11725,13 @@ static int intel_gen2_queue_flip(struct drm_device *dev, flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_DISPLAY_FLIP | + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0]); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(engine, 0); /* aux display base address, unused */ + intel_ring_emit(ring, fb->pitches[0]); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, 0); /* aux display base address, unused */ return 0; } @@ -11158,7 +11743,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; @@ -11171,13 +11756,13 @@ static int intel_gen3_queue_flip(struct drm_device *dev, flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0]); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, fb->pitches[0]); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, MI_NOOP); return 0; } @@ -11189,7 +11774,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; @@ -11203,11 +11788,11 @@ static int intel_gen4_queue_flip(struct drm_device *dev, * Display Registers (which do not change across a page-flip) * so we need only reprogram the base address. */ - intel_ring_emit(engine, MI_DISPLAY_FLIP | + intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0]); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset | - obj->tiling_mode); + intel_ring_emit(ring, fb->pitches[0]); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | + intel_fb_modifier_to_tiling(fb->modifier[0])); /* XXX Enabling the panel-fitter across page-flip is so far * untested on non-native modes, so ignore it for now. @@ -11215,7 +11800,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(engine, pf | pipesrc); + intel_ring_emit(ring, pf | pipesrc); return 0; } @@ -11227,7 +11812,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; @@ -11237,10 +11822,11 @@ static int intel_gen6_queue_flip(struct drm_device *dev, if (ret) return ret; - intel_ring_emit(engine, MI_DISPLAY_FLIP | + intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0] | obj->tiling_mode); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, fb->pitches[0] | + intel_fb_modifier_to_tiling(fb->modifier[0])); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); /* Contrary to the suggestions in the documentation, * "Enable Panel Fitter" does not seem to be required when page @@ -11250,7 +11836,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(engine, pf | pipesrc); + intel_ring_emit(ring, pf | pipesrc); return 0; } @@ -11262,7 +11848,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t plane_bit = 0; int len, ret; @@ -11283,7 +11869,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, } len = 4; - if (engine->id == RCS) { + if (req->engine->id == RCS) { len += 6; /* * On Gen 8, SRM is now taking an extra dword to accommodate @@ -11321,30 +11907,32 @@ static int intel_gen7_queue_flip(struct drm_device *dev, * for the RCS also doesn't appear to drop events. Setting the DERRMR * to zero does lead to lockups within MI_DISPLAY_FLIP. */ - if (engine->id == RCS) { - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, DERRMR); - intel_ring_emit(engine, ~(DERRMR_PIPEA_PRI_FLIP_DONE | + if (req->engine->id == RCS) { + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, DERRMR); + intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE | DERRMR_PIPEB_PRI_FLIP_DONE | DERRMR_PIPEC_PRI_FLIP_DONE)); if (IS_GEN8(dev)) - intel_ring_emit(engine, MI_STORE_REGISTER_MEM_GEN8 | + intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT); else - intel_ring_emit(engine, MI_STORE_REGISTER_MEM | + intel_ring_emit(ring, MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(engine, DERRMR); - intel_ring_emit(engine, engine->scratch.gtt_offset + 256); + intel_ring_emit_reg(ring, DERRMR); + intel_ring_emit(ring, + i915_ggtt_offset(req->engine->scratch) + 256); if (IS_GEN8(dev)) { - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); } } - intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | plane_bit); - intel_ring_emit(engine, (fb->pitches[0] | obj->tiling_mode)); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(engine, (MI_NOOP)); + intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); + intel_ring_emit(ring, fb->pitches[0] | + intel_fb_modifier_to_tiling(fb->modifier[0])); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, (MI_NOOP)); return 0; } @@ -11379,7 +11967,8 @@ static bool use_mmio_flip(struct intel_engine_cs *engine, if (resv && !reservation_object_test_signaled_rcu(resv, false)) return true; - return engine != i915_gem_request_get_engine(obj->last_write_req); + return engine != i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex); } static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, @@ -11390,7 +11979,7 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_framebuffer *fb = intel_crtc->base.primary->fb; const enum pipe pipe = intel_crtc->pipe; - u32 ctl, stride, tile_height; + u32 ctl, stride = skl_plane_stride(fb, 0, rotation); ctl = I915_READ(PLANE_CTL(pipe, 0)); ctl &= ~PLANE_CTL_TILED_MASK; @@ -11411,20 +12000,6 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, } /* - * The stride is either expressed as a multiple of 64 bytes chunks for - * linear buffers or in number of tiles for tiled buffers. - */ - if (intel_rotation_90_or_270(rotation)) { - /* stride = Surface height in tiles */ - tile_height = intel_tile_height(dev_priv, fb->modifier[0], 0); - stride = DIV_ROUND_UP(fb->height, tile_height); - } else { - stride = fb->pitches[0] / - intel_fb_stride_alignment(dev_priv, fb->modifier[0], - fb->pixel_format); - } - - /* * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on * PLANE_SURF updates, the update is then guaranteed to be atomic. */ @@ -11440,15 +12015,13 @@ static void ilk_do_mmio_flip(struct intel_crtc *intel_crtc, { struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_framebuffer *intel_fb = - to_intel_framebuffer(intel_crtc->base.primary->fb); - struct drm_i915_gem_object *obj = intel_fb->obj; + struct drm_framebuffer *fb = intel_crtc->base.primary->fb; i915_reg_t reg = DSPCNTR(intel_crtc->plane); u32 dspcntr; dspcntr = I915_READ(reg); - if (obj->tiling_mode != I915_TILING_NONE) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) dspcntr |= DISPPLANE_TILED; else dspcntr &= ~DISPPLANE_TILED; @@ -11471,9 +12044,8 @@ static void intel_mmio_flip_work_func(struct work_struct *w) struct reservation_object *resv; if (work->flip_queued_req) - WARN_ON(__i915_wait_request(work->flip_queued_req, - false, NULL, - &dev_priv->rps.mmioflips)); + WARN_ON(i915_wait_request(work->flip_queued_req, + 0, NULL, NO_WAITBOOST)); /* For framebuffer backed by dmabuf, wait for fence */ resv = i915_gem_object_get_dmabuf_resv(obj); @@ -11584,7 +12156,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, struct intel_flip_work *work; struct intel_engine_cs *engine; bool mmio_flip; - struct drm_i915_gem_request *request = NULL; + struct drm_i915_gem_request *request; + struct i915_vma *vma; int ret; /* @@ -11650,22 +12223,18 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, /* Reference the objects for the scheduled work. */ drm_framebuffer_reference(work->old_fb); - drm_gem_object_reference(&obj->base); crtc->primary->fb = fb; update_state_fb(crtc->primary); - intel_fbc_pre_update(intel_crtc, intel_crtc->config, - to_intel_plane_state(primary->state)); - - work->pending_flip_obj = obj; + work->pending_flip_obj = i915_gem_object_get(obj); ret = i915_mutex_lock_interruptible(dev); if (ret) goto cleanup; - intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error); - if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) { + intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error); + if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) { ret = -EIO; goto cleanup; } @@ -11677,13 +12246,14 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { engine = &dev_priv->engine[BCS]; - if (obj->tiling_mode != intel_fb_obj(work->old_fb)->tiling_mode) + if (fb->modifier[0] != old_fb->modifier[0]) /* vlv: DISPLAY_FLIP fails to change tiling */ engine = NULL; } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { engine = &dev_priv->engine[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { - engine = i915_gem_request_get_engine(obj->last_write_req); + engine = i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex); if (engine == NULL || engine->id != RCS) engine = &dev_priv->engine[BCS]; } else { @@ -11692,47 +12262,52 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, mmio_flip = use_mmio_flip(engine, obj); - /* When using CS flips, we want to emit semaphores between rings. - * However, when using mmio flips we will create a task to do the - * synchronisation, so all we want here is to pin the framebuffer - * into the display plane and skip any waits. - */ - if (!mmio_flip) { - ret = i915_gem_object_sync(obj, engine, &request); - if (!ret && !request) { - request = i915_gem_request_alloc(engine, NULL); - ret = PTR_ERR_OR_ZERO(request); - } - - if (ret) - goto cleanup_pending; - } - - ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation); - if (ret) + vma = intel_pin_and_fence_fb_obj(fb, primary->state->rotation); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto cleanup_pending; + } - work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary), - obj, 0); + work->gtt_offset = intel_fb_gtt_offset(fb, primary->state->rotation); work->gtt_offset += intel_crtc->dspaddr_offset; work->rotation = crtc->primary->state->rotation; + /* + * There's the potential that the next frame will not be compatible with + * FBC, so we want to call pre_update() before the actual page flip. + * The problem is that pre_update() caches some information about the fb + * object, so we want to do this only after the object is pinned. Let's + * be on the safe side and do this immediately before scheduling the + * flip. + */ + intel_fbc_pre_update(intel_crtc, intel_crtc->config, + to_intel_plane_state(primary->state)); + if (mmio_flip) { INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); - i915_gem_request_assign(&work->flip_queued_req, - obj->last_write_req); - + work->flip_queued_req = i915_gem_active_get(&obj->last_write, + &obj->base.dev->struct_mutex); schedule_work(&work->mmio_work); } else { - i915_gem_request_assign(&work->flip_queued_req, request); + request = i915_gem_request_alloc(engine, engine->last_context); + if (IS_ERR(request)) { + ret = PTR_ERR(request); + goto cleanup_unpin; + } + + ret = i915_gem_request_await_object(request, obj, false); + if (ret) + goto cleanup_request; + ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request, page_flip_flags); if (ret) - goto cleanup_unpin; + goto cleanup_request; intel_mark_page_flip_active(intel_crtc, work); + work->flip_queued_req = i915_gem_request_get(request); i915_add_request_no_flush(request); } @@ -11740,25 +12315,25 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, to_intel_plane(primary)->frontbuffer_bit); mutex_unlock(&dev->struct_mutex); - intel_frontbuffer_flip_prepare(dev, + intel_frontbuffer_flip_prepare(to_i915(dev), to_intel_plane(primary)->frontbuffer_bit); trace_i915_flip_request(intel_crtc->plane, obj); return 0; +cleanup_request: + i915_add_request_no_flush(request); cleanup_unpin: intel_unpin_fb_obj(fb, crtc->primary->state->rotation); cleanup_pending: - if (!IS_ERR_OR_NULL(request)) - i915_add_request_no_flush(request); atomic_dec(&intel_crtc->unpin_work_count); mutex_unlock(&dev->struct_mutex); cleanup: crtc->primary->fb = old_fb; update_state_fb(crtc->primary); - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); drm_framebuffer_unreference(work->old_fb); spin_lock_irq(&dev->event_lock); @@ -11826,7 +12401,7 @@ static bool intel_wm_need_update(struct drm_plane *plane, struct intel_plane_state *cur = to_intel_plane_state(plane->state); /* Update watermarks on tiling or size changes. */ - if (new->visible != cur->visible) + if (new->base.visible != cur->base.visible) return true; if (!cur->base.fb || !new->base.fb) @@ -11834,10 +12409,10 @@ static bool intel_wm_need_update(struct drm_plane *plane, if (cur->base.fb->modifier[0] != new->base.fb->modifier[0] || cur->base.rotation != new->base.rotation || - drm_rect_width(&new->src) != drm_rect_width(&cur->src) || - drm_rect_height(&new->src) != drm_rect_height(&cur->src) || - drm_rect_width(&new->dst) != drm_rect_width(&cur->dst) || - drm_rect_height(&new->dst) != drm_rect_height(&cur->dst)) + drm_rect_width(&new->base.src) != drm_rect_width(&cur->base.src) || + drm_rect_height(&new->base.src) != drm_rect_height(&cur->base.src) || + drm_rect_width(&new->base.dst) != drm_rect_width(&cur->base.dst) || + drm_rect_height(&new->base.dst) != drm_rect_height(&cur->base.dst)) return true; return false; @@ -11845,10 +12420,10 @@ static bool intel_wm_need_update(struct drm_plane *plane, static bool needs_scaling(struct intel_plane_state *state) { - int src_w = drm_rect_width(&state->src) >> 16; - int src_h = drm_rect_height(&state->src) >> 16; - int dst_w = drm_rect_width(&state->dst); - int dst_h = drm_rect_height(&state->dst); + int src_w = drm_rect_width(&state->base.src) >> 16; + int src_h = drm_rect_height(&state->base.src) >> 16; + int dst_w = drm_rect_width(&state->base.dst); + int dst_h = drm_rect_height(&state->base.dst); return (src_w != dst_w || src_h != dst_h); } @@ -11879,8 +12454,8 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, return ret; } - was_visible = old_plane_state->visible; - visible = to_intel_plane_state(plane_state)->visible; + was_visible = old_plane_state->base.visible; + visible = to_intel_plane_state(plane_state)->base.visible; if (!was_crtc_enabled && WARN_ON(was_visible)) was_visible = false; @@ -11896,7 +12471,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, * only combine the results from all planes in the current place? */ if (!is_crtc_enabled) - to_intel_plane_state(plane_state)->visible = visible = false; + to_intel_plane_state(plane_state)->base.visible = visible = false; if (!was_visible && !visible) return 0; @@ -12100,35 +12675,25 @@ static void connected_sink_compute_bpp(struct intel_connector *connector, struct intel_crtc_state *pipe_config) { + const struct drm_display_info *info = &connector->base.display_info; int bpp = pipe_config->pipe_bpp; DRM_DEBUG_KMS("[CONNECTOR:%d:%s] checking for sink bpp constrains\n", - connector->base.base.id, - connector->base.name); + connector->base.base.id, + connector->base.name); /* Don't use an invalid EDID bpc value */ - if (connector->base.display_info.bpc && - connector->base.display_info.bpc * 3 < bpp) { + if (info->bpc != 0 && info->bpc * 3 < bpp) { DRM_DEBUG_KMS("clamping display bpp (was %d) to EDID reported max of %d\n", - bpp, connector->base.display_info.bpc*3); - pipe_config->pipe_bpp = connector->base.display_info.bpc*3; + bpp, info->bpc * 3); + pipe_config->pipe_bpp = info->bpc * 3; } - /* Clamp bpp to default limit on screens without EDID 1.4 */ - if (connector->base.display_info.bpc == 0) { - int type = connector->base.connector_type; - int clamp_bpp = 24; - - /* Fall back to 18 bpp when DP sink capability is unknown. */ - if (type == DRM_MODE_CONNECTOR_DisplayPort || - type == DRM_MODE_CONNECTOR_eDP) - clamp_bpp = 18; - - if (bpp > clamp_bpp) { - DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n", - bpp, clamp_bpp); - pipe_config->pipe_bpp = clamp_bpp; - } + /* Clamp bpp to 8 on screens without EDID 1.4 */ + if (info->bpc == 0 && bpp > 24) { + DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n", + bpp); + pipe_config->pipe_bpp = 24; } } @@ -12244,10 +12809,9 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, DRM_DEBUG_KMS("double wide: %i\n", pipe_config->double_wide); if (IS_BROXTON(dev)) { - DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: ebb0: 0x%x, ebb4: 0x%x," + DRM_DEBUG_KMS("dpll_hw_state: ebb0: 0x%x, ebb4: 0x%x," "pll0: 0x%x, pll1: 0x%x, pll2: 0x%x, pll3: 0x%x, " "pll6: 0x%x, pll8: 0x%x, pll9: 0x%x, pll10: 0x%x, pcsdw12: 0x%x\n", - pipe_config->ddi_pll_sel, pipe_config->dpll_hw_state.ebb0, pipe_config->dpll_hw_state.ebb4, pipe_config->dpll_hw_state.pll0, @@ -12260,15 +12824,13 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, pipe_config->dpll_hw_state.pll10, pipe_config->dpll_hw_state.pcsdw12); } else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { - DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: " + DRM_DEBUG_KMS("dpll_hw_state: " "ctrl1: 0x%x, cfgcr1: 0x%x, cfgcr2: 0x%x\n", - pipe_config->ddi_pll_sel, pipe_config->dpll_hw_state.ctrl1, pipe_config->dpll_hw_state.cfgcr1, pipe_config->dpll_hw_state.cfgcr2); } else if (HAS_DDI(dev)) { - DRM_DEBUG_KMS("ddi_pll_sel: 0x%x; dpll_hw_state: wrpll: 0x%x spll: 0x%x\n", - pipe_config->ddi_pll_sel, + DRM_DEBUG_KMS("dpll_hw_state: wrpll: 0x%x spll: 0x%x\n", pipe_config->dpll_hw_state.wrpll, pipe_config->dpll_hw_state.spll); } else { @@ -12282,6 +12844,7 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, DRM_DEBUG_KMS("planes on this crtc\n"); list_for_each_entry(plane, &dev->mode_config.plane_list, head) { + char *format_name; intel_plane = to_intel_plane(plane); if (intel_plane->pipe != crtc->pipe) continue; @@ -12294,19 +12857,23 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, continue; } + format_name = drm_get_format_name(fb->pixel_format); + DRM_DEBUG_KMS("[PLANE:%d:%s] enabled", plane->base.id, plane->name); DRM_DEBUG_KMS("\tFB:%d, fb = %ux%u format = %s", - fb->base.id, fb->width, fb->height, - drm_get_format_name(fb->pixel_format)); + fb->base.id, fb->width, fb->height, format_name); DRM_DEBUG_KMS("\tscaler:%d src %dx%d+%d+%d dst %dx%d+%d+%d\n", state->scaler_id, - state->src.x1 >> 16, state->src.y1 >> 16, - drm_rect_width(&state->src) >> 16, - drm_rect_height(&state->src) >> 16, - state->dst.x1, state->dst.y1, - drm_rect_width(&state->dst), - drm_rect_height(&state->dst)); + state->base.src.x1 >> 16, + state->base.src.y1 >> 16, + drm_rect_width(&state->base.src) >> 16, + drm_rect_height(&state->base.src) >> 16, + state->base.dst.x1, state->base.dst.y1, + drm_rect_width(&state->base.dst), + drm_rect_height(&state->base.dst)); + + kfree(format_name); } } @@ -12315,6 +12882,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) struct drm_device *dev = state->dev; struct drm_connector *connector; unsigned int used_ports = 0; + unsigned int used_mst_ports = 0; /* * Walk the connector list instead of the encoder @@ -12351,11 +12919,20 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) return false; used_ports |= port_mask; + break; + case INTEL_OUTPUT_DP_MST: + used_mst_ports |= + 1 << enc_to_mst(&encoder->base)->primary->port; + break; default: break; } } + /* can't mix MST and SST/HDMI on the same port */ + if (used_ports & used_mst_ports) + return false; + return true; } @@ -12366,7 +12943,6 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) struct intel_crtc_scaler_state scaler_state; struct intel_dpll_hw_state dpll_hw_state; struct intel_shared_dpll *shared_dpll; - uint32_t ddi_pll_sel; bool force_thru; /* FIXME: before the switch to atomic started, a new pipe_config was @@ -12378,7 +12954,6 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) scaler_state = crtc_state->scaler_state; shared_dpll = crtc_state->shared_dpll; dpll_hw_state = crtc_state->dpll_hw_state; - ddi_pll_sel = crtc_state->ddi_pll_sel; force_thru = crtc_state->pch_pfit.force_thru; memset(crtc_state, 0, sizeof *crtc_state); @@ -12387,7 +12962,6 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) crtc_state->scaler_state = scaler_state; crtc_state->shared_dpll = shared_dpll; crtc_state->dpll_hw_state = dpll_hw_state; - crtc_state->ddi_pll_sel = ddi_pll_sel; crtc_state->pch_pfit.force_thru = force_thru; } @@ -12475,7 +13049,7 @@ encoder_retry: encoder = to_intel_encoder(connector_state->best_encoder); - if (!(encoder->compute_config(encoder, pipe_config))) { + if (!(encoder->compute_config(encoder, pipe_config, connector_state))) { DRM_DEBUG_KMS("Encoder config failure\n"); goto fail; } @@ -12563,12 +13137,6 @@ static bool intel_fuzzy_clock_check(int clock1, int clock2) return false; } -#define for_each_intel_crtc_masked(dev, mask, intel_crtc) \ - list_for_each_entry((intel_crtc), \ - &(dev)->mode_config.crtc_list, \ - base.head) \ - for_each_if (mask & (1 <<(intel_crtc)->pipe)) - static bool intel_compare_m_n(unsigned int m, unsigned int n, unsigned int m2, unsigned int n2, @@ -12816,8 +13384,6 @@ intel_pipe_config_compare(struct drm_device *dev, PIPE_CONF_CHECK_I(double_wide); - PIPE_CONF_CHECK_X(ddi_pll_sel); - PIPE_CONF_CHECK_P(shared_dpll); PIPE_CONF_CHECK_X(dpll_hw_state.dpll); PIPE_CONF_CHECK_X(dpll_hw_state.dpll_md); @@ -12899,16 +13465,23 @@ static void verify_wm_state(struct drm_crtc *crtc, hw_entry->start, hw_entry->end); } - /* cursor */ - hw_entry = &hw_ddb.plane[pipe][PLANE_CURSOR]; - sw_entry = &sw_ddb->plane[pipe][PLANE_CURSOR]; - - if (!skl_ddb_entry_equal(hw_entry, sw_entry)) { - DRM_ERROR("mismatch in DDB state pipe %c cursor " - "(expected (%u,%u), found (%u,%u))\n", - pipe_name(pipe), - sw_entry->start, sw_entry->end, - hw_entry->start, hw_entry->end); + /* + * cursor + * If the cursor plane isn't active, we may not have updated it's ddb + * allocation. In that case since the ddb allocation will be updated + * once the plane becomes visible, we can skip this check + */ + if (intel_crtc->cursor_addr) { + hw_entry = &hw_ddb.plane[pipe][PLANE_CURSOR]; + sw_entry = &sw_ddb->plane[pipe][PLANE_CURSOR]; + + if (!skl_ddb_entry_equal(hw_entry, sw_entry)) { + DRM_ERROR("mismatch in DDB state pipe %c cursor " + "(expected (%u,%u), found (%u,%u))\n", + pipe_name(pipe), + sw_entry->start, sw_entry->end, + hw_entry->start, hw_entry->end); + } } } @@ -13523,8 +14096,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, if (!intel_plane_state->wait_req) continue; - ret = __i915_wait_request(intel_plane_state->wait_req, - true, NULL, NULL); + ret = i915_wait_request(intel_plane_state->wait_req, + I915_WAIT_INTERRUPTIBLE, + NULL, NULL); if (ret) { /* Any hang should be swallowed by the wait */ WARN_ON(ret == -EIO); @@ -13614,6 +14188,111 @@ static bool needs_vblank_wait(struct intel_crtc_state *crtc_state) return false; } +static void intel_update_crtc(struct drm_crtc *crtc, + struct drm_atomic_state *state, + struct drm_crtc_state *old_crtc_state, + unsigned int *crtc_vblank_mask) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc->state); + bool modeset = needs_modeset(crtc->state); + + if (modeset) { + update_scanline_offset(intel_crtc); + dev_priv->display.crtc_enable(pipe_config, state); + } else { + intel_pre_plane_update(to_intel_crtc_state(old_crtc_state)); + } + + if (drm_atomic_get_existing_plane_state(state, crtc->primary)) { + intel_fbc_enable( + intel_crtc, pipe_config, + to_intel_plane_state(crtc->primary->state)); + } + + drm_atomic_helper_commit_planes_on_crtc(old_crtc_state); + + if (needs_vblank_wait(pipe_config)) + *crtc_vblank_mask |= drm_crtc_mask(crtc); +} + +static void intel_update_crtcs(struct drm_atomic_state *state, + unsigned int *crtc_vblank_mask) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state; + int i; + + for_each_crtc_in_state(state, crtc, old_crtc_state, i) { + if (!crtc->state->active) + continue; + + intel_update_crtc(crtc, state, old_crtc_state, + crtc_vblank_mask); + } +} + +static void skl_update_crtcs(struct drm_atomic_state *state, + unsigned int *crtc_vblank_mask) +{ + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state; + struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; + struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; + unsigned int updated = 0; + bool progress; + enum pipe pipe; + + /* + * Whenever the number of active pipes changes, we need to make sure we + * update the pipes in the right order so that their ddb allocations + * never overlap with eachother inbetween CRTC updates. Otherwise we'll + * cause pipe underruns and other bad stuff. + */ + do { + int i; + progress = false; + + for_each_crtc_in_state(state, crtc, old_crtc_state, i) { + bool vbl_wait = false; + unsigned int cmask = drm_crtc_mask(crtc); + pipe = to_intel_crtc(crtc)->pipe; + + if (updated & cmask || !crtc->state->active) + continue; + if (skl_ddb_allocation_overlaps(state, cur_ddb, new_ddb, + pipe)) + continue; + + updated |= cmask; + + /* + * If this is an already active pipe, it's DDB changed, + * and this isn't the last pipe that needs updating + * then we need to wait for a vblank to pass for the + * new ddb allocation to take effect. + */ + if (!skl_ddb_allocation_equals(cur_ddb, new_ddb, pipe) && + !crtc->state->active_changed && + intel_state->wm_results.dirty_pipes != updated) + vbl_wait = true; + + intel_update_crtc(crtc, state, old_crtc_state, + crtc_vblank_mask); + + if (vbl_wait) + intel_wait_for_vblank(dev, pipe); + + progress = true; + } + } while (progress); +} + static void intel_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; @@ -13636,8 +14315,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (!intel_plane_state->wait_req) continue; - ret = __i915_wait_request(intel_plane_state->wait_req, - true, NULL, NULL); + ret = i915_wait_request(intel_plane_state->wait_req, + 0, NULL, NULL); /* EIO should be eaten, and we can't get interrupted in the * worker, and blocking commits have waited already. */ WARN_ON(ret); @@ -13673,7 +14352,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (old_crtc_state->active) { intel_crtc_disable_planes(crtc, old_crtc_state->plane_mask); - dev_priv->display.crtc_disable(crtc); + dev_priv->display.crtc_disable(to_intel_crtc_state(old_crtc_state), state); intel_crtc->active = false; intel_fbc_disable(intel_crtc); intel_disable_shared_dpll(intel_crtc); @@ -13702,20 +14381,19 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco)) dev_priv->display.modeset_commit_cdclk(state); + /* + * SKL workaround: bspec recommends we disable the SAGV when we + * have more then one pipe enabled + */ + if (!intel_can_enable_sagv(state)) + intel_disable_sagv(dev_priv); + intel_modeset_verify_disabled(dev); } - /* Now enable the clocks, plane, pipe, and connectors that we set up. */ + /* Complete the events for pipes that have now been disabled */ for_each_crtc_in_state(state, crtc, old_crtc_state, i) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); bool modeset = needs_modeset(crtc->state); - struct intel_crtc_state *pipe_config = - to_intel_crtc_state(crtc->state); - - if (modeset && crtc->state->active) { - update_scanline_offset(to_intel_crtc(crtc)); - dev_priv->display.crtc_enable(crtc); - } /* Complete events for now disable pipes here. */ if (modeset && !crtc->state->active && crtc->state->event) { @@ -13725,21 +14403,11 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) crtc->state->event = NULL; } - - if (!modeset) - intel_pre_plane_update(to_intel_crtc_state(old_crtc_state)); - - if (crtc->state->active && - drm_atomic_get_existing_plane_state(state, crtc->primary)) - intel_fbc_enable(intel_crtc, pipe_config, to_intel_plane_state(crtc->primary->state)); - - if (crtc->state->active) - drm_atomic_helper_commit_planes_on_crtc(old_crtc_state); - - if (pipe_config->base.active && needs_vblank_wait(pipe_config)) - crtc_vblank_mask |= 1 << i; } + /* Now enable the clocks, plane, pipe, and connectors that we set up. */ + dev_priv->display.update_crtcs(state, &crtc_vblank_mask); + /* FIXME: We should call drm_atomic_helper_commit_hw_done() here * already, but still need the state for the delayed optimization. To * fix this: @@ -13775,6 +14443,9 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_modeset_verify_crtc(crtc, old_crtc_state, crtc->state); } + if (intel_state->modeset && intel_can_enable_sagv(state)) + intel_enable_sagv(dev_priv); + drm_atomic_helper_commit_hw_done(state); if (intel_state->modeset) @@ -13814,19 +14485,12 @@ static void intel_atomic_track_fbs(struct drm_atomic_state *state) { struct drm_plane_state *old_plane_state; struct drm_plane *plane; - struct drm_i915_gem_object *obj, *old_obj; - struct intel_plane *intel_plane; int i; - mutex_lock(&state->dev->struct_mutex); - for_each_plane_in_state(state, plane, old_plane_state, i) { - obj = intel_fb_obj(plane->state->fb); - old_obj = intel_fb_obj(old_plane_state->fb); - intel_plane = to_intel_plane(plane); - - i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit); - } - mutex_unlock(&state->dev->struct_mutex); + for_each_plane_in_state(state, plane, old_plane_state, i) + i915_gem_track_fb(intel_fb_obj(old_plane_state->fb), + intel_fb_obj(plane->state->fb), + to_intel_plane(plane)->frontbuffer_bit); } /** @@ -13922,8 +14586,6 @@ out: drm_atomic_state_free(state); } -#undef for_each_intel_crtc_masked - /* * FIXME: Remove this once i915 is fully DRIVER_ATOMIC by calling * drm_atomic_helper_legacy_gamma_set() directly. @@ -13992,7 +14654,7 @@ static const struct drm_crtc_funcs intel_crtc_funcs = { */ int intel_prepare_plane_fb(struct drm_plane *plane, - const struct drm_plane_state *new_state) + struct drm_plane_state *new_state) { struct drm_device *dev = plane->dev; struct drm_framebuffer *fb = new_state->fb; @@ -14051,15 +14713,17 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret) DRM_DEBUG_KMS("failed to attach phys object\n"); } else { - ret = intel_pin_and_fence_fb_obj(fb, new_state->rotation); + struct i915_vma *vma; + + vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); + if (IS_ERR(vma)) + ret = PTR_ERR(vma); } if (ret == 0) { - struct intel_plane_state *plane_state = - to_intel_plane_state(new_state); - - i915_gem_request_assign(&plane_state->wait_req, - obj->last_write_req); + to_intel_plane_state(new_state)->wait_req = + i915_gem_active_get(&obj->last_write, + &obj->base.dev->struct_mutex); } return ret; @@ -14076,10 +14740,11 @@ intel_prepare_plane_fb(struct drm_plane *plane, */ void intel_cleanup_plane_fb(struct drm_plane *plane, - const struct drm_plane_state *old_state) + struct drm_plane_state *old_state) { struct drm_device *dev = plane->dev; struct intel_plane_state *old_intel_state; + struct intel_plane_state *intel_state = to_intel_plane_state(plane->state); struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb); struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb); @@ -14092,6 +14757,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, !INTEL_INFO(dev)->cursor_needs_physical)) intel_unpin_fb_obj(old_state->fb, old_state->rotation); + i915_gem_request_assign(&intel_state->wait_req, NULL); i915_gem_request_assign(&old_intel_state->wait_req, NULL); } @@ -14126,13 +14792,14 @@ intel_check_primary_plane(struct drm_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state) { + struct drm_i915_private *dev_priv = to_i915(plane->dev); struct drm_crtc *crtc = state->base.crtc; - struct drm_framebuffer *fb = state->base.fb; int min_scale = DRM_PLANE_HELPER_NO_SCALING; int max_scale = DRM_PLANE_HELPER_NO_SCALING; bool can_position = false; + int ret; - if (INTEL_INFO(plane->dev)->gen >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { /* use scaler when colorkey is not required */ if (state->ckey.flags == I915_SET_COLORKEY_NONE) { min_scale = 1; @@ -14141,22 +14808,35 @@ intel_check_primary_plane(struct drm_plane *plane, can_position = true; } - return drm_plane_helper_check_update(plane, crtc, fb, &state->src, - &state->dst, &state->clip, - state->base.rotation, - min_scale, max_scale, - can_position, true, - &state->visible); + ret = drm_plane_helper_check_state(&state->base, + &state->clip, + min_scale, max_scale, + can_position, true); + if (ret) + return ret; + + if (!state->base.fb) + return 0; + + if (INTEL_GEN(dev_priv) >= 9) { + ret = skl_check_plane_surface(state); + if (ret) + return ret; + } + + return 0; } static void intel_begin_crtc_commit(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_crtc_state *old_intel_state = to_intel_crtc_state(old_crtc_state); bool modeset = needs_modeset(crtc->state); + enum pipe pipe = intel_crtc->pipe; /* Perform vblank evasion around commit operation */ intel_pipe_update_start(intel_crtc); @@ -14171,8 +14851,12 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc, if (to_intel_crtc_state(crtc->state)->update_pipe) intel_update_pipe_config(intel_crtc, old_intel_state); - else if (INTEL_INFO(dev)->gen >= 9) + else if (INTEL_GEN(dev_priv) >= 9) { skl_detach_scalers(intel_crtc); + + I915_WRITE(PIPE_WM_LINETIME(pipe), + dev_priv->wm.skl_hw.wm_linetime[pipe]); + } } static void intel_finish_crtc_commit(struct drm_crtc *crtc, @@ -14306,11 +14990,11 @@ fail: void intel_create_rotation_property(struct drm_device *dev, struct intel_plane *plane) { if (!dev->mode_config.rotation_property) { - unsigned long flags = BIT(DRM_ROTATE_0) | - BIT(DRM_ROTATE_180); + unsigned long flags = DRM_ROTATE_0 | + DRM_ROTATE_180; if (INTEL_INFO(dev)->gen >= 9) - flags |= BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270); + flags |= DRM_ROTATE_90 | DRM_ROTATE_270; dev->mode_config.rotation_property = drm_mode_create_rotation_property(dev, flags); @@ -14326,19 +15010,17 @@ intel_check_cursor_plane(struct drm_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state) { - struct drm_crtc *crtc = crtc_state->base.crtc; struct drm_framebuffer *fb = state->base.fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); enum pipe pipe = to_intel_plane(plane)->pipe; unsigned stride; int ret; - ret = drm_plane_helper_check_update(plane, crtc, fb, &state->src, - &state->dst, &state->clip, - state->base.rotation, - DRM_PLANE_HELPER_NO_SCALING, - DRM_PLANE_HELPER_NO_SCALING, - true, true, &state->visible); + ret = drm_plane_helper_check_state(&state->base, + &state->clip, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, true); if (ret) return ret; @@ -14375,7 +15057,7 @@ intel_check_cursor_plane(struct drm_plane *plane, * Refuse the put the cursor into that compromised position. */ if (IS_CHERRYVIEW(plane->dev) && pipe == PIPE_C && - state->visible && state->base.crtc_x < 0) { + state->base.visible && state->base.crtc_x < 0) { DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n"); return -EINVAL; } @@ -14407,7 +15089,7 @@ intel_update_cursor_plane(struct drm_plane *plane, if (!obj) addr = 0; else if (!INTEL_INFO(dev)->cursor_needs_physical) - addr = i915_gem_obj_ggtt_offset(obj); + addr = i915_gem_object_ggtt_offset(obj, NULL); else addr = obj->phys_handle->busaddr; @@ -14453,8 +15135,8 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev, if (!dev->mode_config.rotation_property) dev->mode_config.rotation_property = drm_mode_create_rotation_property(dev, - BIT(DRM_ROTATE_0) | - BIT(DRM_ROTATE_180)); + DRM_ROTATE_0 | + DRM_ROTATE_180); if (dev->mode_config.rotation_property) drm_object_attach_property(&cursor->base.base, dev->mode_config.rotation_property, @@ -14660,12 +15342,50 @@ static bool intel_crt_present(struct drm_device *dev) return true; } +void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv) +{ + int pps_num; + int pps_idx; + + if (HAS_DDI(dev_priv)) + return; + /* + * This w/a is needed at least on CPT/PPT, but to be sure apply it + * everywhere where registers can be write protected. + */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + pps_num = 2; + else + pps_num = 1; + + for (pps_idx = 0; pps_idx < pps_num; pps_idx++) { + u32 val = I915_READ(PP_CONTROL(pps_idx)); + + val = (val & ~PANEL_UNLOCK_MASK) | PANEL_UNLOCK_REGS; + I915_WRITE(PP_CONTROL(pps_idx), val); + } +} + +static void intel_pps_init(struct drm_i915_private *dev_priv) +{ + if (HAS_PCH_SPLIT(dev_priv) || IS_BROXTON(dev_priv)) + dev_priv->pps_mmio_base = PCH_PPS_BASE; + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->pps_mmio_base = VLV_PPS_BASE; + else + dev_priv->pps_mmio_base = PPS_BASE; + + intel_pps_unlock_regs_wa(dev_priv); +} + static void intel_setup_outputs(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_encoder *encoder; bool dpd_is_edp = false; + intel_pps_init(dev_priv); + /* * intel_edp_init_connector() depends on this completing first, to * prevent the registeration of both eDP and LVDS and the incorrect @@ -14853,7 +15573,7 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) drm_framebuffer_cleanup(fb); mutex_lock(&dev->struct_mutex); WARN_ON(!intel_fb->obj->framebuffer_references--); - drm_gem_object_unreference(&intel_fb->obj->base); + i915_gem_object_put(intel_fb->obj); mutex_unlock(&dev->struct_mutex); kfree(intel_fb); } @@ -14933,24 +15653,27 @@ static int intel_framebuffer_init(struct drm_device *dev, struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(dev); - unsigned int aligned_height; + unsigned int tiling = i915_gem_object_get_tiling(obj); int ret; u32 pitch_limit, stride_alignment; + char *format_name; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { - /* Enforce that fb modifier and tiling mode match, but only for - * X-tiled. This is needed for FBC. */ - if (!!(obj->tiling_mode == I915_TILING_X) != - !!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) { + /* + * If there's a fence, enforce that + * the fb modifier and tiling mode match. + */ + if (tiling != I915_TILING_NONE && + tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { DRM_DEBUG("tiling_mode doesn't match fb modifier\n"); return -EINVAL; } } else { - if (obj->tiling_mode == I915_TILING_X) + if (tiling == I915_TILING_X) { mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; - else if (obj->tiling_mode == I915_TILING_Y) { + } else if (tiling == I915_TILING_Y) { DRM_DEBUG("No Y tiling for legacy addfb\n"); return -EINVAL; } @@ -14974,6 +15697,16 @@ static int intel_framebuffer_init(struct drm_device *dev, return -EINVAL; } + /* + * gen2/3 display engine uses the fence if present, + * so the tiling mode must match the fb modifier exactly. + */ + if (INTEL_INFO(dev_priv)->gen < 4 && + tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { + DRM_DEBUG("tiling_mode must match fb modifier exactly on gen2/3\n"); + return -EINVAL; + } + stride_alignment = intel_fb_stride_alignment(dev_priv, mode_cmd->modifier[0], mode_cmd->pixel_format); @@ -14993,10 +15726,15 @@ static int intel_framebuffer_init(struct drm_device *dev, return -EINVAL; } - if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED && - mode_cmd->pitches[0] != obj->stride) { + /* + * If there's a fence, enforce that + * the fb pitch and fence stride match. + */ + if (tiling != I915_TILING_NONE && + mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) { DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", - mode_cmd->pitches[0], obj->stride); + mode_cmd->pitches[0], + i915_gem_object_get_stride(obj)); return -EINVAL; } @@ -15009,16 +15747,18 @@ static int intel_framebuffer_init(struct drm_device *dev, break; case DRM_FORMAT_XRGB1555: if (INTEL_INFO(dev)->gen > 3) { - DRM_DEBUG("unsupported pixel format: %s\n", - drm_get_format_name(mode_cmd->pixel_format)); + format_name = drm_get_format_name(mode_cmd->pixel_format); + DRM_DEBUG("unsupported pixel format: %s\n", format_name); + kfree(format_name); return -EINVAL; } break; case DRM_FORMAT_ABGR8888: if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) && INTEL_INFO(dev)->gen < 9) { - DRM_DEBUG("unsupported pixel format: %s\n", - drm_get_format_name(mode_cmd->pixel_format)); + format_name = drm_get_format_name(mode_cmd->pixel_format); + DRM_DEBUG("unsupported pixel format: %s\n", format_name); + kfree(format_name); return -EINVAL; } break; @@ -15026,15 +15766,17 @@ static int intel_framebuffer_init(struct drm_device *dev, case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_XBGR2101010: if (INTEL_INFO(dev)->gen < 4) { - DRM_DEBUG("unsupported pixel format: %s\n", - drm_get_format_name(mode_cmd->pixel_format)); + format_name = drm_get_format_name(mode_cmd->pixel_format); + DRM_DEBUG("unsupported pixel format: %s\n", format_name); + kfree(format_name); return -EINVAL; } break; case DRM_FORMAT_ABGR2101010: if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) { - DRM_DEBUG("unsupported pixel format: %s\n", - drm_get_format_name(mode_cmd->pixel_format)); + format_name = drm_get_format_name(mode_cmd->pixel_format); + DRM_DEBUG("unsupported pixel format: %s\n", format_name); + kfree(format_name); return -EINVAL; } break; @@ -15043,14 +15785,16 @@ static int intel_framebuffer_init(struct drm_device *dev, case DRM_FORMAT_YVYU: case DRM_FORMAT_VYUY: if (INTEL_INFO(dev)->gen < 5) { - DRM_DEBUG("unsupported pixel format: %s\n", - drm_get_format_name(mode_cmd->pixel_format)); + format_name = drm_get_format_name(mode_cmd->pixel_format); + DRM_DEBUG("unsupported pixel format: %s\n", format_name); + kfree(format_name); return -EINVAL; } break; default: - DRM_DEBUG("unsupported pixel format: %s\n", - drm_get_format_name(mode_cmd->pixel_format)); + format_name = drm_get_format_name(mode_cmd->pixel_format); + DRM_DEBUG("unsupported pixel format: %s\n", format_name); + kfree(format_name); return -EINVAL; } @@ -15058,17 +15802,12 @@ static int intel_framebuffer_init(struct drm_device *dev, if (mode_cmd->offsets[0] != 0) return -EINVAL; - aligned_height = intel_fb_align_height(dev, mode_cmd->height, - mode_cmd->pixel_format, - mode_cmd->modifier[0]); - /* FIXME drm helper for size checks (especially planar formats)? */ - if (obj->base.size < aligned_height * mode_cmd->pitches[0]) - return -EINVAL; - drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd); intel_fb->obj = obj; - intel_fill_fb_info(dev_priv, &intel_fb->base); + ret = intel_fill_fb_info(dev_priv, &intel_fb->base); + if (ret) + return ret; ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs); if (ret) { @@ -15090,13 +15829,13 @@ intel_user_framebuffer_create(struct drm_device *dev, struct drm_i915_gem_object *obj; struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd; - obj = to_intel_bo(drm_gem_object_lookup(filp, mode_cmd.handles[0])); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]); + if (!obj) return ERR_PTR(-ENOENT); fb = intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return fb; } @@ -15279,6 +16018,11 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) skl_modeset_calc_cdclk; } + if (dev_priv->info.gen >= 9) + dev_priv->display.update_crtcs = skl_update_crtcs; + else + dev_priv->display.update_crtcs = intel_update_crtcs; + switch (INTEL_INFO(dev_priv)->gen) { case 2: dev_priv->display.queue_flip = intel_gen2_queue_flip; @@ -15480,15 +16224,16 @@ static void intel_init_quirks(struct drm_device *dev) static void i915_disable_vga(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; u8 sr1; i915_reg_t vga_reg = i915_vgacntrl_reg(dev); /* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */ - vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO); + vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); outb(SR01, VGA_SR_INDEX); sr1 = inb(VGA_SR_DATA); outb(sr1 | 1<<5, VGA_SR_DATA); - vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); + vga_put(pdev, VGA_RSRC_LEGACY_IO); udelay(300); I915_WRITE(vga_reg, VGA_DISP_DISABLE); @@ -15504,7 +16249,6 @@ void intel_modeset_init_hw(struct drm_device *dev) dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; intel_init_clock_gating(dev); - intel_enable_gt_powersave(dev_priv); } /* @@ -15771,15 +16515,22 @@ static bool intel_crtc_has_encoders(struct intel_crtc *crtc) return false; } -static bool intel_encoder_has_connectors(struct intel_encoder *encoder) +static struct intel_connector *intel_encoder_find_connector(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; struct intel_connector *connector; for_each_connector_on_encoder(dev, &encoder->base, connector) - return true; + return connector; - return false; + return NULL; +} + +static bool has_pch_trancoder(struct drm_i915_private *dev_priv, + enum transcoder pch_transcoder) +{ + return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || + (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A); } static void intel_sanitize_crtc(struct intel_crtc *crtc) @@ -15825,7 +16576,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) * Temporarily change the plane mapping and disable everything * ... */ plane = crtc->plane; - to_intel_plane_state(crtc->base.primary->state)->visible = true; + to_intel_plane_state(crtc->base.primary->state)->base.visible = true; crtc->plane = !plane; intel_crtc_disable_noatomic(&crtc->base); crtc->plane = plane; @@ -15860,14 +16611,23 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) * worst a fifo underrun happens which also sets this to false. */ crtc->cpu_fifo_underrun_disabled = true; - crtc->pch_fifo_underrun_disabled = true; + /* + * We track the PCH trancoder underrun reporting state + * within the crtc. With crtc for pipe A housing the underrun + * reporting state for PCH transcoder A, crtc for pipe B housing + * it for PCH transcoder B, etc. LPT-H has only PCH transcoder A, + * and marking underrun reporting as disabled for the non-existing + * PCH transcoders B and C would prevent enabling the south + * error interrupt (see cpt_can_enable_serr_int()). + */ + if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe)) + crtc->pch_fifo_underrun_disabled = true; } } static void intel_sanitize_encoder(struct intel_encoder *encoder) { struct intel_connector *connector; - struct drm_device *dev = encoder->base.dev; /* We need to check both for a crtc link (meaning that the * encoder is active and trying to read from a pipe) and the @@ -15875,7 +16635,8 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) bool has_active_crtc = encoder->base.crtc && to_intel_crtc(encoder->base.crtc)->active; - if (intel_encoder_has_connectors(encoder) && !has_active_crtc) { + connector = intel_encoder_find_connector(encoder); + if (connector && !has_active_crtc) { DRM_DEBUG_KMS("[ENCODER:%d:%s] has active connectors but no active pipe!\n", encoder->base.base.id, encoder->base.name); @@ -15884,12 +16645,14 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) * fallout from our resume register restoring. Disable * the encoder manually again. */ if (encoder->base.crtc) { + struct drm_crtc_state *crtc_state = encoder->base.crtc->state; + DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n", encoder->base.base.id, encoder->base.name); - encoder->disable(encoder); + encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); if (encoder->post_disable) - encoder->post_disable(encoder); + encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); } encoder->base.crtc = NULL; @@ -15897,12 +16660,9 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) * a bug in one of the get_hw_state functions. Or someplace else * in our code, like the register restore mess on resume. Clamp * things to off as a safer default. */ - for_each_intel_connector(dev, connector) { - if (connector->encoder != encoder) - continue; - connector->base.dpms = DRM_MODE_DPMS_OFF; - connector->base.encoder = NULL; - } + + connector->base.dpms = DRM_MODE_DPMS_OFF; + connector->base.encoder = NULL; } /* Enabled encoders without active connectors will be fixed in * the crtc fixup. */ @@ -15952,10 +16712,10 @@ static void readout_plane_state(struct intel_crtc *crtc) struct intel_plane_state *plane_state = to_intel_plane_state(primary->state); - plane_state->visible = crtc->active && + plane_state->base.visible = crtc->active && primary_get_hw_state(to_intel_plane(primary)); - if (plane_state->visible) + if (plane_state->base.visible) crtc->base.state->plane_mask |= 1 << drm_plane_index(primary); } @@ -16174,9 +16934,10 @@ void intel_display_resume(struct drm_device *dev) struct drm_atomic_state *state = dev_priv->modeset_restore_state; struct drm_modeset_acquire_ctx ctx; int ret; - bool setup = false; dev_priv->modeset_restore_state = NULL; + if (state) + state->acquire_ctx = &ctx; /* * This is a cludge because with real atomic modeset mode_config.mutex @@ -16187,43 +16948,17 @@ void intel_display_resume(struct drm_device *dev) mutex_lock(&dev->mode_config.mutex); drm_modeset_acquire_init(&ctx, 0); -retry: - ret = drm_modeset_lock_all_ctx(dev, &ctx); - - if (ret == 0 && !setup) { - setup = true; - - intel_modeset_setup_hw_state(dev); - i915_redisable_vga(dev); - } - - if (ret == 0 && state) { - struct drm_crtc_state *crtc_state; - struct drm_crtc *crtc; - int i; - - state->acquire_ctx = &ctx; - - /* ignore any reset values/BIOS leftovers in the WM registers */ - to_intel_atomic_state(state)->skip_intermediate_wm = true; - - for_each_crtc_in_state(state, crtc, crtc_state, i) { - /* - * Force recalculation even if we restore - * current state. With fast modeset this may not result - * in a modeset when the state is compatible. - */ - crtc_state->mode_changed = true; - } - - ret = drm_atomic_commit(state); - } + while (1) { + ret = drm_modeset_lock_all_ctx(dev, &ctx); + if (ret != -EDEADLK) + break; - if (ret == -EDEADLK) { drm_modeset_backoff(&ctx); - goto retry; } + if (!ret) + ret = __intel_display_resume(dev, state); + drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); mutex_unlock(&dev->mode_config.mutex); @@ -16239,7 +16974,6 @@ void intel_modeset_gem_init(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *c; struct drm_i915_gem_object *obj; - int ret; intel_init_gt_powersave(dev_priv); @@ -16253,15 +16987,17 @@ void intel_modeset_gem_init(struct drm_device *dev) * for this. */ for_each_crtc(dev, c) { + struct i915_vma *vma; + obj = intel_fb_obj(c->primary->fb); if (obj == NULL) continue; mutex_lock(&dev->struct_mutex); - ret = intel_pin_and_fence_fb_obj(c->primary->fb, + vma = intel_pin_and_fence_fb_obj(c->primary->fb, c->primary->state->rotation); mutex_unlock(&dev->struct_mutex); - if (ret) { + if (IS_ERR(vma)) { DRM_ERROR("failed to pin boot fb on pipe %d\n", to_intel_crtc(c)->pipe); drm_framebuffer_unreference(c->primary->fb); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 21b04c3eda41..14a3cf0b7213 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -190,6 +190,29 @@ intel_dp_max_data_rate(int max_link_clock, int max_lanes) return (max_link_clock * max_lanes * 8) / 10; } +static int +intel_dp_downstream_max_dotclock(struct intel_dp *intel_dp) +{ + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct intel_encoder *encoder = &intel_dig_port->base; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + int max_dotclk = dev_priv->max_dotclk_freq; + int ds_max_dotclk; + + int type = intel_dp->downstream_ports[0] & DP_DS_PORT_TYPE_MASK; + + if (type != DP_DS_PORT_TYPE_VGA) + return max_dotclk; + + ds_max_dotclk = drm_dp_downstream_max_clock(intel_dp->dpcd, + intel_dp->downstream_ports); + + if (ds_max_dotclk != 0) + max_dotclk = min(max_dotclk, ds_max_dotclk); + + return max_dotclk; +} + static enum drm_mode_status intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) @@ -199,7 +222,9 @@ intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode; int target_clock = mode->clock; int max_rate, mode_rate, max_lanes, max_link_clock; - int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; + int max_dotclk; + + max_dotclk = intel_dp_downstream_max_dotclock(intel_dp); if (is_edp(intel_dp) && fixed_mode) { if (mode->hdisplay > fixed_mode->hdisplay) @@ -256,6 +281,8 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, static void intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, struct intel_dp *intel_dp); +static void +intel_dp_pps_init(struct drm_device *dev, struct intel_dp *intel_dp); static void pps_lock(struct intel_dp *intel_dp) { @@ -463,13 +490,13 @@ typedef bool (*vlv_pipe_check)(struct drm_i915_private *dev_priv, static bool vlv_pipe_has_pp_on(struct drm_i915_private *dev_priv, enum pipe pipe) { - return I915_READ(VLV_PIPE_PP_STATUS(pipe)) & PP_ON; + return I915_READ(PP_STATUS(pipe)) & PP_ON; } static bool vlv_pipe_has_vdd_on(struct drm_i915_private *dev_priv, enum pipe pipe) { - return I915_READ(VLV_PIPE_PP_CONTROL(pipe)) & EDP_FORCE_VDD; + return I915_READ(PP_CONTROL(pipe)) & EDP_FORCE_VDD; } static bool vlv_pipe_any(struct drm_i915_private *dev_priv, @@ -486,7 +513,7 @@ vlv_initial_pps_pipe(struct drm_i915_private *dev_priv, enum pipe pipe; for (pipe = PIPE_A; pipe <= PIPE_B; pipe++) { - u32 port_sel = I915_READ(VLV_PIPE_PP_ON_DELAYS(pipe)) & + u32 port_sel = I915_READ(PP_ON_DELAYS(pipe)) & PANEL_PORT_SELECT_MASK; if (port_sel != PANEL_PORT_SELECT_VLV(port)) @@ -583,30 +610,21 @@ static void intel_pps_get_registers(struct drm_i915_private *dev_priv, struct intel_dp *intel_dp, struct pps_registers *regs) { + int pps_idx = 0; + memset(regs, 0, sizeof(*regs)); - if (IS_BROXTON(dev_priv)) { - int idx = bxt_power_sequencer_idx(intel_dp); - - regs->pp_ctrl = BXT_PP_CONTROL(idx); - regs->pp_stat = BXT_PP_STATUS(idx); - regs->pp_on = BXT_PP_ON_DELAYS(idx); - regs->pp_off = BXT_PP_OFF_DELAYS(idx); - } else if (HAS_PCH_SPLIT(dev_priv)) { - regs->pp_ctrl = PCH_PP_CONTROL; - regs->pp_stat = PCH_PP_STATUS; - regs->pp_on = PCH_PP_ON_DELAYS; - regs->pp_off = PCH_PP_OFF_DELAYS; - regs->pp_div = PCH_PP_DIVISOR; - } else { - enum pipe pipe = vlv_power_sequencer_pipe(intel_dp); + if (IS_BROXTON(dev_priv)) + pps_idx = bxt_power_sequencer_idx(intel_dp); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + pps_idx = vlv_power_sequencer_pipe(intel_dp); - regs->pp_ctrl = VLV_PIPE_PP_CONTROL(pipe); - regs->pp_stat = VLV_PIPE_PP_STATUS(pipe); - regs->pp_on = VLV_PIPE_PP_ON_DELAYS(pipe); - regs->pp_off = VLV_PIPE_PP_OFF_DELAYS(pipe); - regs->pp_div = VLV_PIPE_PP_DIVISOR(pipe); - } + regs->pp_ctrl = PP_CONTROL(pps_idx); + regs->pp_stat = PP_STATUS(pps_idx); + regs->pp_on = PP_ON_DELAYS(pps_idx); + regs->pp_off = PP_OFF_DELAYS(pps_idx); + if (!IS_BROXTON(dev_priv)) + regs->pp_div = PP_DIVISOR(pps_idx); } static i915_reg_t @@ -651,8 +669,8 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code, i915_reg_t pp_ctrl_reg, pp_div_reg; u32 pp_div; - pp_ctrl_reg = VLV_PIPE_PP_CONTROL(pipe); - pp_div_reg = VLV_PIPE_PP_DIVISOR(pipe); + pp_ctrl_reg = PP_CONTROL(pipe); + pp_div_reg = PP_DIVISOR(pipe); pp_div = I915_READ(pp_div_reg); pp_div &= PP_REFERENCE_DIVIDER_MASK; @@ -1041,10 +1059,10 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) if (WARN_ON(txsize > 20)) return -E2BIG; + WARN_ON(!msg->buffer != !msg->size); + if (msg->buffer) memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size); - else - WARN_ON(msg->size); ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize); if (ret > 0) { @@ -1250,7 +1268,7 @@ intel_dp_aux_fini(struct intel_dp *intel_dp) } static void -intel_dp_aux_init(struct intel_dp *intel_dp, struct intel_connector *connector) +intel_dp_aux_init(struct intel_dp *intel_dp) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); enum port port = intel_dig_port->port; @@ -1426,6 +1444,44 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp) DRM_DEBUG_KMS("common rates: %s\n", str); } +static void intel_dp_print_hw_revision(struct intel_dp *intel_dp) +{ + uint8_t rev; + int len; + + if ((drm_debug & DRM_UT_KMS) == 0) + return; + + if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & + DP_DWN_STRM_PORT_PRESENT)) + return; + + len = drm_dp_dpcd_read(&intel_dp->aux, DP_BRANCH_HW_REV, &rev, 1); + if (len < 0) + return; + + DRM_DEBUG_KMS("sink hw revision: %d.%d\n", (rev & 0xf0) >> 4, rev & 0xf); +} + +static void intel_dp_print_sw_revision(struct intel_dp *intel_dp) +{ + uint8_t rev[2]; + int len; + + if ((drm_debug & DRM_UT_KMS) == 0) + return; + + if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & + DP_DWN_STRM_PORT_PRESENT)) + return; + + len = drm_dp_dpcd_read(&intel_dp->aux, DP_BRANCH_SW_REV, &rev, 2); + if (len < 0) + return; + + DRM_DEBUG_KMS("sink sw revision: %d.%d\n", rev[0], rev[1]); +} + static int rate_to_index(int find, const int *rates) { int i = 0; @@ -1447,7 +1503,7 @@ intel_dp_max_link_rate(struct intel_dp *intel_dp) if (WARN_ON(len <= 0)) return 162000; - return rates[rate_to_index(0, rates) - 1]; + return rates[len - 1]; } int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) @@ -1468,9 +1524,24 @@ void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, } } +static int intel_dp_compute_bpp(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config) +{ + int bpp, bpc; + + bpp = pipe_config->pipe_bpp; + bpc = drm_dp_downstream_max_bpc(intel_dp->dpcd, intel_dp->downstream_ports); + + if (bpc > 0) + bpp = min(bpp, 3*bpc); + + return bpp; +} + bool intel_dp_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -1533,7 +1604,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Walk through all bpp values. Luckily they're all nicely spaced with 2 * bpc in between. */ - bpp = pipe_config->pipe_bpp; + bpp = intel_dp_compute_bpp(intel_dp, pipe_config); if (is_edp(intel_dp)) { /* Get bpp from vbt only for panels that dont have bpp in edid */ @@ -1647,22 +1718,28 @@ found: } void intel_dp_set_link_params(struct intel_dp *intel_dp, - const struct intel_crtc_state *pipe_config) + int link_rate, uint8_t lane_count, + bool link_mst) { - intel_dp->link_rate = pipe_config->port_clock; - intel_dp->lane_count = pipe_config->lane_count; + intel_dp->link_rate = link_rate; + intel_dp->lane_count = lane_count; + intel_dp->link_mst = link_mst; } -static void intel_dp_prepare(struct intel_encoder *encoder) +static void intel_dp_prepare(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); enum port port = dp_to_dig_port(intel_dp)->port; struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); - const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - intel_dp_set_link_params(intel_dp, crtc->config); + intel_dp_set_link_params(intel_dp, pipe_config->port_clock, + pipe_config->lane_count, + intel_crtc_has_type(pipe_config, + INTEL_OUTPUT_DP_MST)); /* * There are four kinds of DP registers: @@ -1688,7 +1765,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder) /* Handle DP bits in common between all three register formats */ intel_dp->DP |= DP_VOLTAGE_0_4 | DP_PRE_EMPHASIS_0; - intel_dp->DP |= DP_PORT_WIDTH(crtc->config->lane_count); + intel_dp->DP |= DP_PORT_WIDTH(pipe_config->lane_count); /* Split out the IBX/CPU vs CPT settings */ @@ -1716,7 +1793,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder) I915_WRITE(TRANS_DP_CTL(crtc->pipe), trans_dp); } else { if (!HAS_PCH_SPLIT(dev) && !IS_VALLEYVIEW(dev) && - !IS_CHERRYVIEW(dev) && crtc->config->limited_color_range) + !IS_CHERRYVIEW(dev) && pipe_config->limited_color_range) intel_dp->DP |= DP_COLOR_RANGE_16_235; if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC) @@ -1835,7 +1912,8 @@ static u32 ironlake_get_pp_control(struct intel_dp *intel_dp) lockdep_assert_held(&dev_priv->pps_mutex); control = I915_READ(_pp_ctrl_reg(intel_dp)); - if (!IS_BROXTON(dev)) { + if (WARN_ON(!HAS_DDI(dev_priv) && + (control & PANEL_UNLOCK_MASK) != PANEL_UNLOCK_REGS)) { control &= ~PANEL_UNLOCK_MASK; control |= PANEL_UNLOCK_REGS; } @@ -1956,7 +2034,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) DRM_DEBUG_KMS("PP_STATUS: 0x%08x PP_CONTROL: 0x%08x\n", I915_READ(pp_stat_reg), I915_READ(pp_ctrl_reg)); - if ((pp & POWER_TARGET_ON) == 0) + if ((pp & PANEL_POWER_ON) == 0) intel_dp->panel_power_off_time = ktime_get_boottime(); power_domain = intel_display_port_aux_power_domain(intel_encoder); @@ -2043,7 +2121,7 @@ static void edp_panel_on(struct intel_dp *intel_dp) POSTING_READ(pp_ctrl_reg); } - pp |= POWER_TARGET_ON; + pp |= PANEL_POWER_ON; if (!IS_GEN5(dev)) pp |= PANEL_POWER_RESET; @@ -2095,7 +2173,7 @@ static void edp_panel_off(struct intel_dp *intel_dp) pp = ironlake_get_pp_control(intel_dp); /* We need to switch off panel power _and_ force vdd, for otherwise some * panels get very unhappy and cease to work. */ - pp &= ~(POWER_TARGET_ON | PANEL_POWER_RESET | EDP_FORCE_VDD | + pp &= ~(PANEL_POWER_ON | PANEL_POWER_RESET | EDP_FORCE_VDD | EDP_BLC_ENABLE); pp_ctrl_reg = _pp_ctrl_reg(intel_dp); @@ -2254,10 +2332,10 @@ static void assert_edp_pll(struct drm_i915_private *dev_priv, bool state) #define assert_edp_pll_enabled(d) assert_edp_pll((d), true) #define assert_edp_pll_disabled(d) assert_edp_pll((d), false) -static void ironlake_edp_pll_on(struct intel_dp *intel_dp) +static void ironlake_edp_pll_on(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); assert_pipe_disabled(dev_priv, crtc->pipe); @@ -2265,11 +2343,11 @@ static void ironlake_edp_pll_on(struct intel_dp *intel_dp) assert_edp_pll_disabled(dev_priv); DRM_DEBUG_KMS("enabling eDP PLL for clock %d\n", - crtc->config->port_clock); + pipe_config->port_clock); intel_dp->DP &= ~DP_PLL_FREQ_MASK; - if (crtc->config->port_clock == 162000) + if (pipe_config->port_clock == 162000) intel_dp->DP |= DP_PLL_FREQ_162MHZ; else intel_dp->DP |= DP_PLL_FREQ_270MHZ; @@ -2478,16 +2556,17 @@ static void intel_dp_get_config(struct intel_encoder *encoder, } } -static void intel_disable_dp(struct intel_encoder *encoder) +static void intel_disable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct drm_device *dev = encoder->base.dev; - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (crtc->config->has_audio) + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder); - if (HAS_PSR(dev) && !HAS_DDI(dev)) + if (HAS_PSR(dev_priv) && !HAS_DDI(dev_priv)) intel_psr_disable(intel_dp); /* Make sure the panel is off before trying to change the mode. But also @@ -2498,11 +2577,13 @@ static void intel_disable_dp(struct intel_encoder *encoder) intel_edp_panel_off(intel_dp); /* disable the port before the pipe on g4x */ - if (INTEL_INFO(dev)->gen < 5) + if (INTEL_GEN(dev_priv) < 5) intel_dp_link_down(intel_dp); } -static void ilk_post_disable_dp(struct intel_encoder *encoder) +static void ilk_post_disable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); enum port port = dp_to_dig_port(intel_dp)->port; @@ -2514,14 +2595,18 @@ static void ilk_post_disable_dp(struct intel_encoder *encoder) ironlake_edp_pll_off(intel_dp); } -static void vlv_post_disable_dp(struct intel_encoder *encoder) +static void vlv_post_disable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); intel_dp_link_down(intel_dp); } -static void chv_post_disable_dp(struct intel_encoder *encoder) +static void chv_post_disable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_device *dev = encoder->base.dev; @@ -2547,6 +2632,10 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, struct drm_i915_private *dev_priv = to_i915(dev); enum port port = intel_dig_port->port; + if (dp_train_pat & DP_TRAINING_PATTERN_MASK) + DRM_DEBUG_KMS("Using DP training pattern TPS%d\n", + dp_train_pat & DP_TRAINING_PATTERN_MASK); + if (HAS_DDI(dev)) { uint32_t temp = I915_READ(DP_TP_CTL(port)); @@ -2588,7 +2677,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, *DP |= DP_LINK_TRAIN_PAT_2_CPT; break; case DP_TRAINING_PATTERN_3: - DRM_ERROR("DP training pattern 3 not supported\n"); + DRM_DEBUG_KMS("TPS3 not supported, using TPS2 instead\n"); *DP |= DP_LINK_TRAIN_PAT_2_CPT; break; } @@ -2613,7 +2702,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, if (IS_CHERRYVIEW(dev)) { *DP |= DP_LINK_TRAIN_PAT_3_CHV; } else { - DRM_ERROR("DP training pattern 3 not supported\n"); + DRM_DEBUG_KMS("TPS3 not supported, using TPS2 instead\n"); *DP |= DP_LINK_TRAIN_PAT_2; } break; @@ -2621,19 +2710,15 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, } } -static void intel_dp_enable_port(struct intel_dp *intel_dp) +static void intel_dp_enable_port(struct intel_dp *intel_dp, + struct intel_crtc_state *old_crtc_state) { struct drm_device *dev = intel_dp_to_dev(intel_dp); struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = - to_intel_crtc(dp_to_dig_port(intel_dp)->base.base.crtc); /* enable with pattern 1 (as per spec) */ - _intel_dp_set_link_train(intel_dp, &intel_dp->DP, - DP_TRAINING_PATTERN_1); - I915_WRITE(intel_dp->output_reg, intel_dp->DP); - POSTING_READ(intel_dp->output_reg); + intel_dp_program_link_training_pattern(intel_dp, DP_TRAINING_PATTERN_1); /* * Magic for VLV/CHV. We _must_ first set up the register @@ -2642,14 +2727,15 @@ static void intel_dp_enable_port(struct intel_dp *intel_dp) * fail when the power sequencer is freshly used for this port. */ intel_dp->DP |= DP_PORT_EN; - if (crtc->config->has_audio) + if (old_crtc_state->has_audio) intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE; I915_WRITE(intel_dp->output_reg, intel_dp->DP); POSTING_READ(intel_dp->output_reg); } -static void intel_enable_dp(struct intel_encoder *encoder) +static void intel_enable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_device *dev = encoder->base.dev; @@ -2666,7 +2752,7 @@ static void intel_enable_dp(struct intel_encoder *encoder) if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) vlv_init_panel_power_sequencer(intel_dp); - intel_dp_enable_port(intel_dp); + intel_dp_enable_port(intel_dp, pipe_config); edp_panel_vdd_on(intel_dp); edp_panel_on(intel_dp); @@ -2678,7 +2764,7 @@ static void intel_enable_dp(struct intel_encoder *encoder) unsigned int lane_mask = 0x0; if (IS_CHERRYVIEW(dev)) - lane_mask = intel_dp_unused_lane_mask(crtc->config->lane_count); + lane_mask = intel_dp_unused_lane_mask(pipe_config->lane_count); vlv_wait_port_ready(dev_priv, dp_to_dig_port(intel_dp), lane_mask); @@ -2688,22 +2774,26 @@ static void intel_enable_dp(struct intel_encoder *encoder) intel_dp_start_link_train(intel_dp); intel_dp_stop_link_train(intel_dp); - if (crtc->config->has_audio) { + if (pipe_config->has_audio) { DRM_DEBUG_DRIVER("Enabling DP audio on pipe %c\n", pipe_name(pipe)); intel_audio_codec_enable(encoder); } } -static void g4x_enable_dp(struct intel_encoder *encoder) +static void g4x_enable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - intel_enable_dp(encoder); + intel_enable_dp(encoder, pipe_config); intel_edp_backlight_on(intel_dp); } -static void vlv_enable_dp(struct intel_encoder *encoder) +static void vlv_enable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); @@ -2711,16 +2801,18 @@ static void vlv_enable_dp(struct intel_encoder *encoder) intel_psr_enable(intel_dp); } -static void g4x_pre_enable_dp(struct intel_encoder *encoder) +static void g4x_pre_enable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); enum port port = dp_to_dig_port(intel_dp)->port; - intel_dp_prepare(encoder); + intel_dp_prepare(encoder, pipe_config); /* Only ilk+ has port A */ if (port == PORT_A) - ironlake_edp_pll_on(intel_dp); + ironlake_edp_pll_on(intel_dp, pipe_config); } static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) @@ -2728,7 +2820,7 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); enum pipe pipe = intel_dp->pps_pipe; - i915_reg_t pp_on_reg = VLV_PIPE_PP_ON_DELAYS(pipe); + i915_reg_t pp_on_reg = PP_ON_DELAYS(pipe); edp_panel_vdd_off_sync(intel_dp); @@ -2826,38 +2918,48 @@ static void vlv_init_panel_power_sequencer(struct intel_dp *intel_dp) intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); } -static void vlv_pre_enable_dp(struct intel_encoder *encoder) +static void vlv_pre_enable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { vlv_phy_pre_encoder_enable(encoder); - intel_enable_dp(encoder); + intel_enable_dp(encoder, pipe_config); } -static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder) +static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { - intel_dp_prepare(encoder); + intel_dp_prepare(encoder, pipe_config); vlv_phy_pre_pll_enable(encoder); } -static void chv_pre_enable_dp(struct intel_encoder *encoder) +static void chv_pre_enable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { chv_phy_pre_encoder_enable(encoder); - intel_enable_dp(encoder); + intel_enable_dp(encoder, pipe_config); /* Second common lane will stay alive on its own now */ chv_phy_release_cl2_override(encoder); } -static void chv_dp_pre_pll_enable(struct intel_encoder *encoder) +static void chv_dp_pre_pll_enable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { - intel_dp_prepare(encoder); + intel_dp_prepare(encoder, pipe_config); chv_phy_pre_pll_enable(encoder); } -static void chv_dp_post_pll_disable(struct intel_encoder *encoder) +static void chv_dp_post_pll_disable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { chv_phy_post_pll_disable(encoder); } @@ -3395,84 +3497,67 @@ intel_dp_link_down(struct intel_dp *intel_dp) } static bool -intel_dp_get_dpcd(struct intel_dp *intel_dp) +intel_dp_read_dpcd(struct intel_dp *intel_dp) { - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd, sizeof(intel_dp->dpcd)) < 0) return false; /* aux transfer failed */ DRM_DEBUG_KMS("DPCD: %*ph\n", (int) sizeof(intel_dp->dpcd), intel_dp->dpcd); - if (intel_dp->dpcd[DP_DPCD_REV] == 0) - return false; /* DPCD not present */ + return intel_dp->dpcd[DP_DPCD_REV] != 0; +} - if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT, - &intel_dp->sink_count, 1) < 0) - return false; +static bool +intel_edp_init_dpcd(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = + to_i915(dp_to_dig_port(intel_dp)->base.base.dev); - /* - * Sink count can change between short pulse hpd hence - * a member variable in intel_dp will track any changes - * between short pulse interrupts. - */ - intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count); + /* this function is meant to be called only once */ + WARN_ON(intel_dp->dpcd[DP_DPCD_REV] != 0); - /* - * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that - * a dongle is present but no display. Unless we require to know - * if a dongle is present or not, we don't need to update - * downstream port information. So, an early return here saves - * time from performing other operations which are not required. - */ - if (!is_edp(intel_dp) && !intel_dp->sink_count) + if (!intel_dp_read_dpcd(intel_dp)) return false; - /* Check if the panel supports PSR */ - memset(intel_dp->psr_dpcd, 0, sizeof(intel_dp->psr_dpcd)); - if (is_edp(intel_dp)) { - drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, - intel_dp->psr_dpcd, - sizeof(intel_dp->psr_dpcd)); - if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) { - dev_priv->psr.sink_support = true; - DRM_DEBUG_KMS("Detected EDP PSR Panel.\n"); - } + if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) + dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] & + DP_NO_AUX_HANDSHAKE_LINK_TRAINING; - if (INTEL_INFO(dev)->gen >= 9 && - (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) { - uint8_t frame_sync_cap; - - dev_priv->psr.sink_support = true; - drm_dp_dpcd_read(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, - &frame_sync_cap, 1); - dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; - /* PSR2 needs frame sync as well */ - dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; - DRM_DEBUG_KMS("PSR2 %s on sink", - dev_priv->psr.psr2_support ? "supported" : "not supported"); - } - - /* Read the eDP Display control capabilities registers */ - memset(intel_dp->edp_dpcd, 0, sizeof(intel_dp->edp_dpcd)); - if ((intel_dp->dpcd[DP_EDP_CONFIGURATION_CAP] & DP_DPCD_DISPLAY_CONTROL_CAPABLE) && - (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, - intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) == - sizeof(intel_dp->edp_dpcd))) - DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd), - intel_dp->edp_dpcd); - } - - DRM_DEBUG_KMS("Display Port TPS3 support: source %s, sink %s\n", - yesno(intel_dp_source_supports_hbr2(intel_dp)), - yesno(drm_dp_tps3_supported(intel_dp->dpcd))); + /* Check if the panel supports PSR */ + drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, + intel_dp->psr_dpcd, + sizeof(intel_dp->psr_dpcd)); + if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) { + dev_priv->psr.sink_support = true; + DRM_DEBUG_KMS("Detected EDP PSR Panel.\n"); + } + + if (INTEL_GEN(dev_priv) >= 9 && + (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) { + uint8_t frame_sync_cap; + + dev_priv->psr.sink_support = true; + drm_dp_dpcd_read(&intel_dp->aux, + DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, + &frame_sync_cap, 1); + dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; + /* PSR2 needs frame sync as well */ + dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; + DRM_DEBUG_KMS("PSR2 %s on sink", + dev_priv->psr.psr2_support ? "supported" : "not supported"); + } + + /* Read the eDP Display control capabilities registers */ + if ((intel_dp->dpcd[DP_EDP_CONFIGURATION_CAP] & DP_DPCD_DISPLAY_CONTROL_CAPABLE) && + drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, + intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd) == + sizeof(intel_dp->edp_dpcd))) + DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd), + intel_dp->edp_dpcd); /* Intermediate frequency support */ - if (is_edp(intel_dp) && (intel_dp->edp_dpcd[0] >= 0x03)) { /* eDp v1.4 or higher */ + if (intel_dp->edp_dpcd[0] >= 0x03) { /* eDp v1.4 or higher */ __le16 sink_rates[DP_MAX_SUPPORTED_RATES]; int i; @@ -3491,7 +3576,36 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) intel_dp->num_sink_rates = i; } - intel_dp_print_rates(intel_dp); + return true; +} + + +static bool +intel_dp_get_dpcd(struct intel_dp *intel_dp) +{ + if (!intel_dp_read_dpcd(intel_dp)) + return false; + + if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT, + &intel_dp->sink_count, 1) < 0) + return false; + + /* + * Sink count can change between short pulse hpd hence + * a member variable in intel_dp will track any changes + * between short pulse interrupts. + */ + intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count); + + /* + * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that + * a dongle is present but no display. Unless we require to know + * if a dongle is present or not, we don't need to update + * downstream port information. So, an early return here saves + * time from performing other operations which are not required. + */ + if (!is_edp(intel_dp) && !intel_dp->sink_count) + return false; if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_PRESENT)) @@ -3526,7 +3640,7 @@ intel_dp_probe_oui(struct intel_dp *intel_dp) } static bool -intel_dp_probe_mst(struct intel_dp *intel_dp) +intel_dp_can_mst(struct intel_dp *intel_dp) { u8 buf[1]; @@ -3539,18 +3653,30 @@ intel_dp_probe_mst(struct intel_dp *intel_dp) if (intel_dp->dpcd[DP_DPCD_REV] < 0x12) return false; - if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1)) { - if (buf[0] & DP_MST_CAP) { - DRM_DEBUG_KMS("Sink is MST capable\n"); - intel_dp->is_mst = true; - } else { - DRM_DEBUG_KMS("Sink is not MST capable\n"); - intel_dp->is_mst = false; - } - } + if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1) != 1) + return false; + + return buf[0] & DP_MST_CAP; +} + +static void +intel_dp_configure_mst(struct intel_dp *intel_dp) +{ + if (!i915.enable_dp_mst) + return; + + if (!intel_dp->can_mst) + return; - drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst); - return intel_dp->is_mst; + intel_dp->is_mst = intel_dp_can_mst(intel_dp); + + if (intel_dp->is_mst) + DRM_DEBUG_KMS("Sink is MST capable\n"); + else + DRM_DEBUG_KMS("Sink is not MST capable\n"); + + drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, + intel_dp->is_mst); } static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp) @@ -3909,7 +4035,7 @@ static bool intel_dp_short_pulse(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); - u8 sink_irq_vector; + u8 sink_irq_vector = 0; u8 old_sink_count = intel_dp->sink_count; bool ret; @@ -3936,7 +4062,8 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) /* Try to read the source of the interrupt */ if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 && - intel_dp_get_sink_irq(intel_dp, &sink_irq_vector)) { + intel_dp_get_sink_irq(intel_dp, &sink_irq_vector) && + sink_irq_vector != 0) { /* Clear interrupt source */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR, @@ -3980,6 +4107,9 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) connector_status_connected : connector_status_disconnected; } + if (intel_dp_can_mst(intel_dp)) + return connector_status_connected; + /* If no HPD, poke DDC gently */ if (drm_probe_ddc(&intel_dp->aux.ddc)) return connector_status_connected; @@ -4148,7 +4278,7 @@ static bool bxt_digital_port_connected(struct drm_i915_private *dev_priv, * * Return %true if @port is connected, %false otherwise. */ -bool intel_digital_port_connected(struct drm_i915_private *dev_priv, +static bool intel_digital_port_connected(struct drm_i915_private *dev_priv, struct intel_digital_port *port) { if (HAS_PCH_IBX(dev_priv)) @@ -4207,7 +4337,7 @@ intel_dp_unset_edid(struct intel_dp *intel_dp) intel_dp->has_audio = false; } -static void +static enum drm_connector_status intel_dp_long_pulse(struct intel_connector *intel_connector) { struct drm_connector *connector = &intel_connector->base; @@ -4217,8 +4347,7 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) struct drm_device *dev = connector->dev; enum drm_connector_status status; enum intel_display_power_domain power_domain; - bool ret; - u8 sink_irq_vector; + u8 sink_irq_vector = 0; power_domain = intel_display_port_aux_power_domain(intel_encoder); intel_display_power_get(to_i915(dev), power_domain); @@ -4232,7 +4361,7 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) else status = connector_status_disconnected; - if (status != connector_status_connected) { + if (status == connector_status_disconnected) { intel_dp->compliance_test_active = 0; intel_dp->compliance_test_type = 0; intel_dp->compliance_test_data = 0; @@ -4252,10 +4381,20 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) if (intel_encoder->type != INTEL_OUTPUT_EDP) intel_encoder->type = INTEL_OUTPUT_DP; + DRM_DEBUG_KMS("Display Port TPS3 support: source %s, sink %s\n", + yesno(intel_dp_source_supports_hbr2(intel_dp)), + yesno(drm_dp_tps3_supported(intel_dp->dpcd))); + + intel_dp_print_rates(intel_dp); + intel_dp_probe_oui(intel_dp); - ret = intel_dp_probe_mst(intel_dp); - if (ret) { + intel_dp_print_hw_revision(intel_dp); + intel_dp_print_sw_revision(intel_dp); + + intel_dp_configure_mst(intel_dp); + + if (intel_dp->is_mst) { /* * If we are in MST mode then this connector * won't appear connected or have anything @@ -4284,13 +4423,14 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) intel_dp->aux.i2c_defer_count = 0; intel_dp_set_edid(intel_dp); - - status = connector_status_connected; + if (is_edp(intel_dp) || intel_connector->detect_edid) + status = connector_status_connected; intel_dp->detect_done = true; /* Try to read the source of the interrupt */ if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 && - intel_dp_get_sink_irq(intel_dp, &sink_irq_vector)) { + intel_dp_get_sink_irq(intel_dp, &sink_irq_vector) && + sink_irq_vector != 0) { /* Clear interrupt source */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR, @@ -4303,12 +4443,11 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) } out: - if ((status != connector_status_connected) && - (intel_dp->is_mst == false)) + if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); intel_display_power_put(to_i915(dev), power_domain); - return; + return status; } static enum drm_connector_status @@ -4317,7 +4456,7 @@ intel_dp_detect(struct drm_connector *connector, bool force) struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *intel_encoder = &intel_dig_port->base; - struct intel_connector *intel_connector = to_intel_connector(connector); + enum drm_connector_status status = connector->status; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -4332,14 +4471,11 @@ intel_dp_detect(struct drm_connector *connector, bool force) /* If full detect is not performed yet, do a full detect */ if (!intel_dp->detect_done) - intel_dp_long_pulse(intel_dp->attached_connector); + status = intel_dp_long_pulse(intel_dp->attached_connector); intel_dp->detect_done = false; - if (is_edp(intel_dp) || intel_connector->detect_edid) - return connector_status_connected; - else - return connector_status_disconnected; + return status; } static void @@ -4630,13 +4766,8 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) pps_lock(intel_dp); - /* - * Read out the current power sequencer assignment, - * in case the BIOS did something with it. - */ - if (IS_VALLEYVIEW(encoder->dev) || IS_CHERRYVIEW(encoder->dev)) - vlv_initial_power_sequencer_setup(intel_dp); - + /* Reinit the power sequencer, in case BIOS did something with it. */ + intel_dp_pps_init(encoder->dev, intel_dp); intel_edp_panel_vdd_sanitize(intel_dp); pps_unlock(intel_dp); @@ -4696,36 +4827,34 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) port_name(intel_dig_port->port), long_hpd ? "long" : "short"); + if (long_hpd) { + intel_dp->detect_done = false; + return IRQ_NONE; + } + power_domain = intel_display_port_aux_power_domain(intel_encoder); intel_display_power_get(dev_priv, power_domain); - if (long_hpd) { - intel_dp_long_pulse(intel_dp->attached_connector); - if (intel_dp->is_mst) - ret = IRQ_HANDLED; - goto put_power; - - } else { - if (intel_dp->is_mst) { - if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { - /* - * If we were in MST mode, and device is not - * there, get out of MST mode - */ - DRM_DEBUG_KMS("MST device may have disappeared %d vs %d\n", - intel_dp->is_mst, intel_dp->mst_mgr.mst_state); - intel_dp->is_mst = false; - drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, - intel_dp->is_mst); - goto put_power; - } + if (intel_dp->is_mst) { + if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { + /* + * If we were in MST mode, and device is not + * there, get out of MST mode + */ + DRM_DEBUG_KMS("MST device may have disappeared %d vs %d\n", + intel_dp->is_mst, intel_dp->mst_mgr.mst_state); + intel_dp->is_mst = false; + drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, + intel_dp->is_mst); + intel_dp->detect_done = false; + goto put_power; } + } - if (!intel_dp->is_mst) { - if (!intel_dp_short_pulse(intel_dp)) { - intel_dp_long_pulse(intel_dp->attached_connector); - goto put_power; - } + if (!intel_dp->is_mst) { + if (!intel_dp_short_pulse(intel_dp)) { + intel_dp->detect_done = false; + goto put_power; } } @@ -4984,9 +5113,21 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, I915_READ(regs.pp_div)); } +static void intel_dp_pps_init(struct drm_device *dev, + struct intel_dp *intel_dp) +{ + if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { + vlv_initial_power_sequencer_setup(intel_dp); + } else { + intel_dp_init_panel_power_sequencer(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + } +} + /** * intel_dp_set_drrs_state - program registers for RR switch to take effect - * @dev: DRM device + * @dev_priv: i915 device + * @crtc_state: a pointer to the active intel_crtc_state * @refresh_rate: RR to be programmed * * This function gets called when refresh rate (RR) has to be changed from @@ -4996,14 +5137,14 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, * * The caller of this function needs to take a lock on dev_priv->drrs. */ -static void intel_dp_set_drrs_state(struct drm_device *dev, int refresh_rate) +static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, + struct intel_crtc_state *crtc_state, + int refresh_rate) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_encoder *encoder; struct intel_digital_port *dig_port = NULL; struct intel_dp *intel_dp = dev_priv->drrs.dp; - struct intel_crtc_state *config = NULL; - struct intel_crtc *intel_crtc = NULL; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); enum drrs_refresh_rate_type index = DRRS_HIGH_RR; if (refresh_rate <= 0) { @@ -5030,8 +5171,6 @@ static void intel_dp_set_drrs_state(struct drm_device *dev, int refresh_rate) return; } - config = intel_crtc->config; - if (dev_priv->drrs.type < SEAMLESS_DRRS_SUPPORT) { DRM_DEBUG_KMS("Only Seamless DRRS supported.\n"); return; @@ -5047,12 +5186,12 @@ static void intel_dp_set_drrs_state(struct drm_device *dev, int refresh_rate) return; } - if (!intel_crtc->active) { + if (!crtc_state->base.active) { DRM_DEBUG_KMS("eDP encoder disabled. CRTC not Active\n"); return; } - if (INTEL_INFO(dev)->gen >= 8 && !IS_CHERRYVIEW(dev)) { + if (INTEL_GEN(dev_priv) >= 8 && !IS_CHERRYVIEW(dev_priv)) { switch (index) { case DRRS_HIGH_RR: intel_dp_set_m_n(intel_crtc, M1_N1); @@ -5064,18 +5203,18 @@ static void intel_dp_set_drrs_state(struct drm_device *dev, int refresh_rate) default: DRM_ERROR("Unsupported refreshrate type\n"); } - } else if (INTEL_INFO(dev)->gen > 6) { - i915_reg_t reg = PIPECONF(intel_crtc->config->cpu_transcoder); + } else if (INTEL_GEN(dev_priv) > 6) { + i915_reg_t reg = PIPECONF(crtc_state->cpu_transcoder); u32 val; val = I915_READ(reg); if (index > DRRS_HIGH_RR) { - if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) val |= PIPECONF_EDP_RR_MODE_SWITCH_VLV; else val |= PIPECONF_EDP_RR_MODE_SWITCH; } else { - if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) val &= ~PIPECONF_EDP_RR_MODE_SWITCH_VLV; else val &= ~PIPECONF_EDP_RR_MODE_SWITCH; @@ -5091,18 +5230,17 @@ static void intel_dp_set_drrs_state(struct drm_device *dev, int refresh_rate) /** * intel_edp_drrs_enable - init drrs struct if supported * @intel_dp: DP struct + * @crtc_state: A pointer to the active crtc state. * * Initializes frontbuffer_bits and drrs.dp */ -void intel_edp_drrs_enable(struct intel_dp *intel_dp) +void intel_edp_drrs_enable(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) { struct drm_device *dev = intel_dp_to_dev(intel_dp); struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_crtc *crtc = dig_port->base.base.crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - if (!intel_crtc->config->has_drrs) { + if (!crtc_state->has_drrs) { DRM_DEBUG_KMS("Panel doesn't support DRRS\n"); return; } @@ -5124,17 +5262,16 @@ unlock: /** * intel_edp_drrs_disable - Disable DRRS * @intel_dp: DP struct + * @old_crtc_state: Pointer to old crtc_state. * */ -void intel_edp_drrs_disable(struct intel_dp *intel_dp) +void intel_edp_drrs_disable(struct intel_dp *intel_dp, + struct intel_crtc_state *old_crtc_state) { struct drm_device *dev = intel_dp_to_dev(intel_dp); struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_crtc *crtc = dig_port->base.base.crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - if (!intel_crtc->config->has_drrs) + if (!old_crtc_state->has_drrs) return; mutex_lock(&dev_priv->drrs.mutex); @@ -5144,9 +5281,8 @@ void intel_edp_drrs_disable(struct intel_dp *intel_dp) } if (dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR) - intel_dp_set_drrs_state(&dev_priv->drm, - intel_dp->attached_connector->panel. - fixed_mode->vrefresh); + intel_dp_set_drrs_state(dev_priv, old_crtc_state, + intel_dp->attached_connector->panel.fixed_mode->vrefresh); dev_priv->drrs.dp = NULL; mutex_unlock(&dev_priv->drrs.mutex); @@ -5175,10 +5311,12 @@ static void intel_edp_drrs_downclock_work(struct work_struct *work) if (dev_priv->drrs.busy_frontbuffer_bits) goto unlock; - if (dev_priv->drrs.refresh_rate_type != DRRS_LOW_RR) - intel_dp_set_drrs_state(&dev_priv->drm, - intel_dp->attached_connector->panel. - downclock_mode->vrefresh); + if (dev_priv->drrs.refresh_rate_type != DRRS_LOW_RR) { + struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc; + + intel_dp_set_drrs_state(dev_priv, to_intel_crtc(crtc)->config, + intel_dp->attached_connector->panel.downclock_mode->vrefresh); + } unlock: mutex_unlock(&dev_priv->drrs.mutex); @@ -5186,7 +5324,7 @@ unlock: /** * intel_edp_drrs_invalidate - Disable Idleness DRRS - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called everytime rendering on the given planes start. @@ -5194,10 +5332,9 @@ unlock: * * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits. */ -void intel_edp_drrs_invalidate(struct drm_device *dev, - unsigned frontbuffer_bits) +void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@ -5220,16 +5357,15 @@ void intel_edp_drrs_invalidate(struct drm_device *dev, /* invalidate means busy screen hence upclock */ if (frontbuffer_bits && dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR) - intel_dp_set_drrs_state(&dev_priv->drm, - dev_priv->drrs.dp->attached_connector->panel. - fixed_mode->vrefresh); + intel_dp_set_drrs_state(dev_priv, to_intel_crtc(crtc)->config, + dev_priv->drrs.dp->attached_connector->panel.fixed_mode->vrefresh); mutex_unlock(&dev_priv->drrs.mutex); } /** * intel_edp_drrs_flush - Restart Idleness DRRS - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called every time rendering on the given planes has @@ -5239,10 +5375,9 @@ void intel_edp_drrs_invalidate(struct drm_device *dev, * * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits. */ -void intel_edp_drrs_flush(struct drm_device *dev, - unsigned frontbuffer_bits) +void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@ -5265,9 +5400,8 @@ void intel_edp_drrs_flush(struct drm_device *dev, /* flush means busy screen hence upclock */ if (frontbuffer_bits && dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR) - intel_dp_set_drrs_state(&dev_priv->drm, - dev_priv->drrs.dp->attached_connector->panel. - fixed_mode->vrefresh); + intel_dp_set_drrs_state(dev_priv, to_intel_crtc(crtc)->config, + dev_priv->drrs.dp->attached_connector->panel.fixed_mode->vrefresh); /* * flush also means no more activity hence schedule downclock, if all @@ -5400,27 +5534,15 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, pps_lock(intel_dp); intel_dp_init_panel_power_timestamps(intel_dp); - - if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { - vlv_initial_power_sequencer_setup(intel_dp); - } else { - intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); - } - + intel_dp_pps_init(dev, intel_dp); intel_edp_panel_vdd_sanitize(intel_dp); pps_unlock(intel_dp); /* Cache DPCD and EDID for edp. */ - has_dpcd = intel_dp_get_dpcd(intel_dp); + has_dpcd = intel_edp_init_dpcd(intel_dp); - if (has_dpcd) { - if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) - dev_priv->no_aux_handshake = - intel_dp->dpcd[DP_MAX_DOWNSPREAD] & - DP_NO_AUX_HANDSHAKE_LINK_TRAINING; - } else { + if (!has_dpcd) { /* if this fails, presume the device is a ghost */ DRM_INFO("failed to retrieve link info, disabling eDP\n"); goto out_vdd_off; @@ -5576,7 +5698,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, connector->interlace_allowed = true; connector->doublescan_allowed = 0; - intel_dp_aux_init(intel_dp, intel_connector); + intel_dp_aux_init(intel_dp); INIT_DELAYED_WORK(&intel_dp->panel_vdd_work, edp_panel_vdd_work); diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index 60fb39cd220b..c438b02184cb 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -24,6 +24,15 @@ #include "intel_drv.h" static void +intel_dp_dump_link_status(const uint8_t link_status[DP_LINK_STATUS_SIZE]) +{ + + DRM_DEBUG_KMS("ln0_1:0x%x ln2_3:0x%x align:0x%x sink:0x%x adj_req0_1:0x%x adj_req2_3:0x%x", + link_status[0], link_status[1], link_status[2], + link_status[3], link_status[4], link_status[5]); +} + +static void intel_get_adjust_train(struct intel_dp *intel_dp, const uint8_t link_status[DP_LINK_STATUS_SIZE]) { @@ -103,13 +112,24 @@ intel_dp_update_link_train(struct intel_dp *intel_dp) return ret == intel_dp->lane_count; } +static bool intel_dp_link_max_vswing_reached(struct intel_dp *intel_dp) +{ + int lane; + + for (lane = 0; lane < intel_dp->lane_count; lane++) + if ((intel_dp->train_set[lane] & + DP_TRAIN_MAX_SWING_REACHED) == 0) + return false; + + return true; +} + /* Enable corresponding port and start training pattern 1 */ -static void +static bool intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) { - int i; uint8_t voltage; - int voltage_tries, loop_tries; + int voltage_tries, max_vswing_tries; uint8_t link_config[2]; uint8_t link_bw, rate_select; @@ -125,6 +145,7 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) if (drm_dp_enhanced_frame_cap(intel_dp->dpcd)) link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2); + if (intel_dp->num_sink_rates) drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET, &rate_select, 1); @@ -140,60 +161,54 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) DP_TRAINING_PATTERN_1 | DP_LINK_SCRAMBLING_DISABLE)) { DRM_ERROR("failed to enable link training\n"); - return; + return false; } - voltage = 0xff; - voltage_tries = 0; - loop_tries = 0; + voltage_tries = 1; + max_vswing_tries = 0; for (;;) { uint8_t link_status[DP_LINK_STATUS_SIZE]; drm_dp_link_train_clock_recovery_delay(intel_dp->dpcd); + if (!intel_dp_get_link_status(intel_dp, link_status)) { DRM_ERROR("failed to get link status\n"); - break; + return false; } if (drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) { DRM_DEBUG_KMS("clock recovery OK\n"); - break; + return true; } - /* Check to see if we've tried the max voltage */ - for (i = 0; i < intel_dp->lane_count; i++) - if ((intel_dp->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0) - break; - if (i == intel_dp->lane_count) { - ++loop_tries; - if (loop_tries == 5) { - DRM_ERROR("too many full retries, give up\n"); - break; - } - intel_dp_reset_link_train(intel_dp, - DP_TRAINING_PATTERN_1 | - DP_LINK_SCRAMBLING_DISABLE); - voltage_tries = 0; - continue; + if (voltage_tries == 5) { + DRM_DEBUG_KMS("Same voltage tried 5 times\n"); + return false; + } + + if (max_vswing_tries == 1) { + DRM_DEBUG_KMS("Max Voltage Swing reached\n"); + return false; } - /* Check to see if we've tried the same voltage 5 times */ - if ((intel_dp->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK) == voltage) { - ++voltage_tries; - if (voltage_tries == 5) { - DRM_ERROR("too many voltage retries, give up\n"); - break; - } - } else - voltage_tries = 0; voltage = intel_dp->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK; /* Update training set as requested by target */ intel_get_adjust_train(intel_dp, link_status); if (!intel_dp_update_link_train(intel_dp)) { DRM_ERROR("failed to update link training\n"); - break; + return false; } + + if ((intel_dp->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK) == + voltage) + ++voltage_tries; + else + voltage_tries = 1; + + if (intel_dp_link_max_vswing_reached(intel_dp)) + ++max_vswing_tries; + } } @@ -229,12 +244,12 @@ static u32 intel_dp_training_pattern(struct intel_dp *intel_dp) return training_pattern; } -static void +static bool intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) { - bool channel_eq = false; - int tries, cr_tries; + int tries; u32 training_pattern; + uint8_t link_status[DP_LINK_STATUS_SIZE]; training_pattern = intel_dp_training_pattern(intel_dp); @@ -243,19 +258,11 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) training_pattern | DP_LINK_SCRAMBLING_DISABLE)) { DRM_ERROR("failed to start channel equalization\n"); - return; + return false; } - tries = 0; - cr_tries = 0; - channel_eq = false; - for (;;) { - uint8_t link_status[DP_LINK_STATUS_SIZE]; - - if (cr_tries > 5) { - DRM_ERROR("failed to train DP, aborting\n"); - break; - } + intel_dp->channel_eq_status = false; + for (tries = 0; tries < 5; tries++) { drm_dp_link_train_channel_eq_delay(intel_dp->dpcd); if (!intel_dp_get_link_status(intel_dp, link_status)) { @@ -266,44 +273,38 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) /* Make sure clock is still ok */ if (!drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) { - intel_dp_link_training_clock_recovery(intel_dp); - intel_dp_set_link_train(intel_dp, - training_pattern | - DP_LINK_SCRAMBLING_DISABLE); - cr_tries++; - continue; + intel_dp_dump_link_status(link_status); + DRM_DEBUG_KMS("Clock recovery check failed, cannot " + "continue channel equalization\n"); + break; } if (drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { - channel_eq = true; + intel_dp->channel_eq_status = true; + DRM_DEBUG_KMS("Channel EQ done. DP Training " + "successful\n"); break; } - /* Try 5 times, then try clock recovery if that fails */ - if (tries > 5) { - intel_dp_link_training_clock_recovery(intel_dp); - intel_dp_set_link_train(intel_dp, - training_pattern | - DP_LINK_SCRAMBLING_DISABLE); - tries = 0; - cr_tries++; - continue; - } - /* Update training set as requested by target */ intel_get_adjust_train(intel_dp, link_status); if (!intel_dp_update_link_train(intel_dp)) { DRM_ERROR("failed to update link training\n"); break; } - ++tries; + } + + /* Try 5 times, else fail and try at lower BW */ + if (tries == 5) { + intel_dp_dump_link_status(link_status); + DRM_DEBUG_KMS("Channel equalization failed 5 times\n"); } intel_dp_set_idle_link_train(intel_dp); - if (channel_eq) - DRM_DEBUG_KMS("Channel EQ done. DP Training successful\n"); + return intel_dp->channel_eq_status; + } void intel_dp_stop_link_train(struct intel_dp *intel_dp) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 68a005d729e9..54a9d7610d8f 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -31,18 +31,16 @@ #include <drm/drm_edid.h> static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_atomic_state *state; - int bpp, i; + int bpp; int lane_count, slots; const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - struct drm_connector *drm_connector; - struct intel_connector *connector, *found = NULL; - struct drm_connector_state *connector_state; int mst_pbn; pipe_config->dp_encoder_is_mst = true; @@ -54,7 +52,6 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, */ lane_count = drm_dp_max_lane_count(intel_dp->dpcd); - pipe_config->lane_count = lane_count; pipe_config->pipe_bpp = 24; @@ -62,20 +59,6 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, state = pipe_config->base.state; - for_each_connector_in_state(state, drm_connector, connector_state, i) { - connector = to_intel_connector(drm_connector); - - if (connector_state->best_encoder == &encoder->base) { - found = connector; - break; - } - } - - if (!found) { - DRM_ERROR("can't find connector\n"); - return false; - } - mst_pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock, bpp); pipe_config->pbn = mst_pbn; @@ -92,16 +75,20 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, } -static void intel_mst_disable_dp(struct intel_encoder *encoder) +static void intel_mst_disable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; + struct intel_connector *connector = + to_intel_connector(old_conn_state->connector); int ret; DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); - drm_dp_mst_reset_vcpi_slots(&intel_dp->mst_mgr, intel_mst->connector->port); + drm_dp_mst_reset_vcpi_slots(&intel_dp->mst_mgr, connector->port); ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); if (ret) { @@ -109,11 +96,15 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder) } } -static void intel_mst_post_disable_dp(struct intel_encoder *encoder) +static void intel_mst_post_disable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; + struct intel_connector *connector = + to_intel_connector(old_conn_state->connector); DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); @@ -122,59 +113,51 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder) /* and this can also fail */ drm_dp_update_payload_part2(&intel_dp->mst_mgr); - drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, intel_mst->connector->port); + drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port); intel_dp->active_mst_links--; intel_mst->connector = NULL; if (intel_dp->active_mst_links == 0) { - intel_dig_port->base.post_disable(&intel_dig_port->base); + intel_dig_port->base.post_disable(&intel_dig_port->base, + NULL, NULL); + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); } } -static void intel_mst_pre_enable_dp(struct intel_encoder *encoder) +static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_dig_port->port; + struct intel_connector *connector = + to_intel_connector(conn_state->connector); int ret; uint32_t temp; - struct intel_connector *found = NULL, *connector; int slots; - struct drm_crtc *crtc = encoder->base.crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - - for_each_intel_connector(dev, connector) { - if (connector->base.state->best_encoder == &encoder->base) { - found = connector; - break; - } - } - - if (!found) { - DRM_ERROR("can't find connector\n"); - return; - } /* MST encoders are bound to a crtc, not to a connector, * force the mapping here for get_hw_state. */ - found->encoder = encoder; + connector->encoder = encoder; + intel_mst->connector = connector; DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); - intel_mst->connector = found; - if (intel_dp->active_mst_links == 0) { - intel_prepare_ddi_buffer(&intel_dig_port->base); - - intel_ddi_clk_select(&intel_dig_port->base, intel_crtc->config); + intel_ddi_clk_select(&intel_dig_port->base, + pipe_config->shared_dpll); - intel_dp_set_link_params(intel_dp, intel_crtc->config); + intel_prepare_dp_ddi_buffers(&intel_dig_port->base); + intel_dp_set_link_params(intel_dp, + pipe_config->port_clock, + pipe_config->lane_count, + true); intel_ddi_init_dp_buf_reg(&intel_dig_port->base); @@ -185,8 +168,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder) } ret = drm_dp_mst_allocate_vcpi(&intel_dp->mst_mgr, - intel_mst->connector->port, - intel_crtc->config->pbn, &slots); + connector->port, + pipe_config->pbn, &slots); if (ret == false) { DRM_ERROR("failed to allocate vcpi\n"); return; @@ -200,13 +183,14 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder) ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); } -static void intel_mst_enable_dp(struct intel_encoder *encoder) +static void intel_mst_enable_dp(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_dig_port->port; int ret; @@ -239,9 +223,8 @@ static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder, { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; u32 temp, flags = 0; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 5c1f2d235ffa..1c59ca50c430 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -24,6 +24,44 @@ #include "intel_drv.h" struct intel_shared_dpll * +skl_find_link_pll(struct drm_i915_private *dev_priv, int clock) +{ + struct intel_shared_dpll *pll = NULL; + struct intel_dpll_hw_state dpll_hw_state; + enum intel_dpll_id i; + bool found = false; + + if (!skl_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state)) + return pll; + + for (i = DPLL_ID_SKL_DPLL1; i <= DPLL_ID_SKL_DPLL3; i++) { + pll = &dev_priv->shared_dplls[i]; + + /* Only want to check enabled timings first */ + if (pll->config.crtc_mask == 0) + continue; + + if (memcmp(&dpll_hw_state, &pll->config.hw_state, + sizeof(pll->config.hw_state)) == 0) { + found = true; + break; + } + } + + /* Ok no matching timings, maybe there's a free one? */ + for (i = DPLL_ID_SKL_DPLL1; + ((found == false) && (i <= DPLL_ID_SKL_DPLL3)); i++) { + pll = &dev_priv->shared_dplls[i]; + if (pll->config.crtc_mask == 0) { + pll->config.hw_state = dpll_hw_state; + break; + } + } + + return pll; +} + +struct intel_shared_dpll * intel_get_shared_dpll_by_id(struct drm_i915_private *dev_priv, enum intel_dpll_id id) { @@ -452,26 +490,6 @@ static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv, return val & SPLL_PLL_ENABLE; } -static uint32_t hsw_pll_to_ddi_pll_sel(struct intel_shared_dpll *pll) -{ - switch (pll->id) { - case DPLL_ID_WRPLL1: - return PORT_CLK_SEL_WRPLL1; - case DPLL_ID_WRPLL2: - return PORT_CLK_SEL_WRPLL2; - case DPLL_ID_SPLL: - return PORT_CLK_SEL_SPLL; - case DPLL_ID_LCPLL_810: - return PORT_CLK_SEL_LCPLL_810; - case DPLL_ID_LCPLL_1350: - return PORT_CLK_SEL_LCPLL_1350; - case DPLL_ID_LCPLL_2700: - return PORT_CLK_SEL_LCPLL_2700; - default: - return PORT_CLK_SEL_NONE; - } -} - #define LC_FREQ 2700 #define LC_FREQ_2K U64_C(LC_FREQ * 2000) @@ -687,11 +705,65 @@ hsw_ddi_calculate_wrpll(int clock /* in Hz */, *r2_out = best.r2; } +static struct intel_shared_dpll *hsw_ddi_hdmi_get_dpll(int clock, + struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) +{ + struct intel_shared_dpll *pll; + uint32_t val; + unsigned int p, n2, r2; + + hsw_ddi_calculate_wrpll(clock * 1000, &r2, &n2, &p); + + val = WRPLL_PLL_ENABLE | WRPLL_PLL_LCPLL | + WRPLL_DIVIDER_REFERENCE(r2) | WRPLL_DIVIDER_FEEDBACK(n2) | + WRPLL_DIVIDER_POST(p); + + crtc_state->dpll_hw_state.wrpll = val; + + pll = intel_find_shared_dpll(crtc, crtc_state, + DPLL_ID_WRPLL1, DPLL_ID_WRPLL2); + + if (!pll) + return NULL; + + return pll; +} + +struct intel_shared_dpll *hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, + int clock) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_shared_dpll *pll; + enum intel_dpll_id pll_id; + + switch (clock / 2) { + case 81000: + pll_id = DPLL_ID_LCPLL_810; + break; + case 135000: + pll_id = DPLL_ID_LCPLL_1350; + break; + case 270000: + pll_id = DPLL_ID_LCPLL_2700; + break; + default: + DRM_DEBUG_KMS("Invalid clock for DP: %d\n", clock); + return NULL; + } + + pll = intel_get_shared_dpll_by_id(dev_priv, pll_id); + + if (!pll) + return NULL; + + return pll; +} + static struct intel_shared_dpll * hsw_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll; int clock = crtc_state->port_clock; @@ -699,41 +771,12 @@ hsw_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, sizeof(crtc_state->dpll_hw_state)); if (encoder->type == INTEL_OUTPUT_HDMI) { - uint32_t val; - unsigned p, n2, r2; - - hsw_ddi_calculate_wrpll(clock * 1000, &r2, &n2, &p); - - val = WRPLL_PLL_ENABLE | WRPLL_PLL_LCPLL | - WRPLL_DIVIDER_REFERENCE(r2) | WRPLL_DIVIDER_FEEDBACK(n2) | - WRPLL_DIVIDER_POST(p); - - crtc_state->dpll_hw_state.wrpll = val; - - pll = intel_find_shared_dpll(crtc, crtc_state, - DPLL_ID_WRPLL1, DPLL_ID_WRPLL2); + pll = hsw_ddi_hdmi_get_dpll(clock, crtc, crtc_state); } else if (encoder->type == INTEL_OUTPUT_DP || encoder->type == INTEL_OUTPUT_DP_MST || encoder->type == INTEL_OUTPUT_EDP) { - enum intel_dpll_id pll_id; - - switch (clock / 2) { - case 81000: - pll_id = DPLL_ID_LCPLL_810; - break; - case 135000: - pll_id = DPLL_ID_LCPLL_1350; - break; - case 270000: - pll_id = DPLL_ID_LCPLL_2700; - break; - default: - DRM_DEBUG_KMS("Invalid clock for DP: %d\n", clock); - return NULL; - } - - pll = intel_get_shared_dpll_by_id(dev_priv, pll_id); + pll = hsw_ddi_dp_get_dpll(encoder, clock); } else if (encoder->type == INTEL_OUTPUT_ANALOG) { if (WARN_ON(crtc_state->port_clock / 2 != 135000)) @@ -751,14 +794,11 @@ hsw_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, if (!pll) return NULL; - crtc_state->ddi_pll_sel = hsw_pll_to_ddi_pll_sel(pll); - intel_reference_shared_dpll(pll, crtc_state); return pll; } - static const struct intel_shared_dpll_funcs hsw_ddi_wrpll_funcs = { .enable = hsw_ddi_wrpll_enable, .disable = hsw_ddi_wrpll_disable, @@ -1194,75 +1234,110 @@ skip_remaining_dividers: return true; } -static struct intel_shared_dpll * -skl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) +static bool skl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state, + int clock) { - struct intel_shared_dpll *pll; uint32_t ctrl1, cfgcr1, cfgcr2; - int clock = crtc_state->port_clock; + struct skl_wrpll_params wrpll_params = { 0, }; /* * See comment in intel_dpll_hw_state to understand why we always use 0 * as the DPLL id in this function. */ - ctrl1 = DPLL_CTRL1_OVERRIDE(0); - if (encoder->type == INTEL_OUTPUT_HDMI) { - struct skl_wrpll_params wrpll_params = { 0, }; + ctrl1 |= DPLL_CTRL1_HDMI_MODE(0); + + if (!skl_ddi_calculate_wrpll(clock * 1000, &wrpll_params)) + return false; - ctrl1 |= DPLL_CTRL1_HDMI_MODE(0); + cfgcr1 = DPLL_CFGCR1_FREQ_ENABLE | + DPLL_CFGCR1_DCO_FRACTION(wrpll_params.dco_fraction) | + wrpll_params.dco_integer; - if (!skl_ddi_calculate_wrpll(clock * 1000, &wrpll_params)) - return NULL; + cfgcr2 = DPLL_CFGCR2_QDIV_RATIO(wrpll_params.qdiv_ratio) | + DPLL_CFGCR2_QDIV_MODE(wrpll_params.qdiv_mode) | + DPLL_CFGCR2_KDIV(wrpll_params.kdiv) | + DPLL_CFGCR2_PDIV(wrpll_params.pdiv) | + wrpll_params.central_freq; + + memset(&crtc_state->dpll_hw_state, 0, + sizeof(crtc_state->dpll_hw_state)); + + crtc_state->dpll_hw_state.ctrl1 = ctrl1; + crtc_state->dpll_hw_state.cfgcr1 = cfgcr1; + crtc_state->dpll_hw_state.cfgcr2 = cfgcr2; + return true; +} + + +bool skl_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) +{ + uint32_t ctrl1; + + /* + * See comment in intel_dpll_hw_state to understand why we always use 0 + * as the DPLL id in this function. + */ + ctrl1 = DPLL_CTRL1_OVERRIDE(0); + switch (clock / 2) { + case 81000: + ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, 0); + break; + case 135000: + ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, 0); + break; + case 270000: + ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, 0); + break; + /* eDP 1.4 rates */ + case 162000: + ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, 0); + break; + case 108000: + ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, 0); + break; + case 216000: + ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, 0); + break; + } + + dpll_hw_state->ctrl1 = ctrl1; + return true; +} + +static struct intel_shared_dpll * +skl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder) +{ + struct intel_shared_dpll *pll; + int clock = crtc_state->port_clock; + bool bret; + struct intel_dpll_hw_state dpll_hw_state; - cfgcr1 = DPLL_CFGCR1_FREQ_ENABLE | - DPLL_CFGCR1_DCO_FRACTION(wrpll_params.dco_fraction) | - wrpll_params.dco_integer; + memset(&dpll_hw_state, 0, sizeof(dpll_hw_state)); - cfgcr2 = DPLL_CFGCR2_QDIV_RATIO(wrpll_params.qdiv_ratio) | - DPLL_CFGCR2_QDIV_MODE(wrpll_params.qdiv_mode) | - DPLL_CFGCR2_KDIV(wrpll_params.kdiv) | - DPLL_CFGCR2_PDIV(wrpll_params.pdiv) | - wrpll_params.central_freq; + if (encoder->type == INTEL_OUTPUT_HDMI) { + bret = skl_ddi_hdmi_pll_dividers(crtc, crtc_state, clock); + if (!bret) { + DRM_DEBUG_KMS("Could not get HDMI pll dividers.\n"); + return NULL; + } } else if (encoder->type == INTEL_OUTPUT_DP || encoder->type == INTEL_OUTPUT_DP_MST || encoder->type == INTEL_OUTPUT_EDP) { - switch (crtc_state->port_clock / 2) { - case 81000: - ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, 0); - break; - case 135000: - ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, 0); - break; - case 270000: - ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, 0); - break; - /* eDP 1.4 rates */ - case 162000: - ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, 0); - break; - case 108000: - ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, 0); - break; - case 216000: - ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, 0); - break; + bret = skl_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state); + if (!bret) { + DRM_DEBUG_KMS("Could not set DP dpll HW state.\n"); + return NULL; } - - cfgcr1 = cfgcr2 = 0; + crtc_state->dpll_hw_state = dpll_hw_state; } else { return NULL; } - memset(&crtc_state->dpll_hw_state, 0, - sizeof(crtc_state->dpll_hw_state)); - - crtc_state->dpll_hw_state.ctrl1 = ctrl1; - crtc_state->dpll_hw_state.cfgcr1 = cfgcr1; - crtc_state->dpll_hw_state.cfgcr2 = cfgcr2; - if (encoder->type == INTEL_OUTPUT_EDP) pll = intel_find_shared_dpll(crtc, crtc_state, DPLL_ID_SKL_DPLL0, @@ -1274,8 +1349,6 @@ skl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, if (!pll) return NULL; - crtc_state->ddi_pll_sel = pll->id; - intel_reference_shared_dpll(pll, crtc_state); return pll; @@ -1484,6 +1557,8 @@ struct bxt_clk_div { uint32_t m2_frac; bool m2_frac_en; uint32_t n; + + int vco; }; /* pre-calculated values for DP linkrates */ @@ -1497,57 +1572,60 @@ static const struct bxt_clk_div bxt_dp_clk_val[] = { {432000, 3, 1, 32, 1677722, 1, 1} }; -static struct intel_shared_dpll * -bxt_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) +static bool +bxt_ddi_hdmi_pll_dividers(struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state, int clock, + struct bxt_clk_div *clk_div) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_shared_dpll *pll; - enum intel_dpll_id i; - struct intel_digital_port *intel_dig_port; - struct bxt_clk_div clk_div = {0}; - int vco = 0; - uint32_t prop_coef, int_coef, gain_ctl, targ_cnt; - uint32_t lanestagger; - int clock = crtc_state->port_clock; + struct dpll best_clock; - if (encoder->type == INTEL_OUTPUT_HDMI) { - struct dpll best_clock; + /* Calculate HDMI div */ + /* + * FIXME: tie the following calculation into + * i9xx_crtc_compute_clock + */ + if (!bxt_find_best_dpll(crtc_state, clock, &best_clock)) { + DRM_DEBUG_DRIVER("no PLL dividers found for clock %d pipe %c\n", + clock, pipe_name(intel_crtc->pipe)); + return false; + } - /* Calculate HDMI div */ - /* - * FIXME: tie the following calculation into - * i9xx_crtc_compute_clock - */ - if (!bxt_find_best_dpll(crtc_state, clock, &best_clock)) { - DRM_DEBUG_DRIVER("no PLL dividers found for clock %d pipe %c\n", - clock, pipe_name(crtc->pipe)); - return NULL; - } + clk_div->p1 = best_clock.p1; + clk_div->p2 = best_clock.p2; + WARN_ON(best_clock.m1 != 2); + clk_div->n = best_clock.n; + clk_div->m2_int = best_clock.m2 >> 22; + clk_div->m2_frac = best_clock.m2 & ((1 << 22) - 1); + clk_div->m2_frac_en = clk_div->m2_frac != 0; - clk_div.p1 = best_clock.p1; - clk_div.p2 = best_clock.p2; - WARN_ON(best_clock.m1 != 2); - clk_div.n = best_clock.n; - clk_div.m2_int = best_clock.m2 >> 22; - clk_div.m2_frac = best_clock.m2 & ((1 << 22) - 1); - clk_div.m2_frac_en = clk_div.m2_frac != 0; + clk_div->vco = best_clock.vco; - vco = best_clock.vco; - } else if (encoder->type == INTEL_OUTPUT_DP || - encoder->type == INTEL_OUTPUT_EDP) { - int i; + return true; +} - clk_div = bxt_dp_clk_val[0]; - for (i = 0; i < ARRAY_SIZE(bxt_dp_clk_val); ++i) { - if (bxt_dp_clk_val[i].clock == clock) { - clk_div = bxt_dp_clk_val[i]; - break; - } +static void bxt_ddi_dp_pll_dividers(int clock, struct bxt_clk_div *clk_div) +{ + int i; + + *clk_div = bxt_dp_clk_val[0]; + for (i = 0; i < ARRAY_SIZE(bxt_dp_clk_val); ++i) { + if (bxt_dp_clk_val[i].clock == clock) { + *clk_div = bxt_dp_clk_val[i]; + break; } - vco = clock * 10 / 2 * clk_div.p1 * clk_div.p2; } + clk_div->vco = clock * 10 / 2 * clk_div->p1 * clk_div->p2; +} + +static bool bxt_ddi_set_dpll_hw_state(int clock, + struct bxt_clk_div *clk_div, + struct intel_dpll_hw_state *dpll_hw_state) +{ + int vco = clk_div->vco; + uint32_t prop_coef, int_coef, gain_ctl, targ_cnt; + uint32_t lanestagger; + if (vco >= 6200000 && vco <= 6700000) { prop_coef = 4; int_coef = 9; @@ -1566,12 +1644,9 @@ bxt_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, targ_cnt = 9; } else { DRM_ERROR("Invalid VCO\n"); - return NULL; + return false; } - memset(&crtc_state->dpll_hw_state, 0, - sizeof(crtc_state->dpll_hw_state)); - if (clock > 270000) lanestagger = 0x18; else if (clock > 135000) @@ -1583,35 +1658,86 @@ bxt_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, else lanestagger = 0x02; - crtc_state->dpll_hw_state.ebb0 = - PORT_PLL_P1(clk_div.p1) | PORT_PLL_P2(clk_div.p2); - crtc_state->dpll_hw_state.pll0 = clk_div.m2_int; - crtc_state->dpll_hw_state.pll1 = PORT_PLL_N(clk_div.n); - crtc_state->dpll_hw_state.pll2 = clk_div.m2_frac; + dpll_hw_state->ebb0 = PORT_PLL_P1(clk_div->p1) | PORT_PLL_P2(clk_div->p2); + dpll_hw_state->pll0 = clk_div->m2_int; + dpll_hw_state->pll1 = PORT_PLL_N(clk_div->n); + dpll_hw_state->pll2 = clk_div->m2_frac; - if (clk_div.m2_frac_en) - crtc_state->dpll_hw_state.pll3 = - PORT_PLL_M2_FRAC_ENABLE; + if (clk_div->m2_frac_en) + dpll_hw_state->pll3 = PORT_PLL_M2_FRAC_ENABLE; - crtc_state->dpll_hw_state.pll6 = - prop_coef | PORT_PLL_INT_COEFF(int_coef); - crtc_state->dpll_hw_state.pll6 |= - PORT_PLL_GAIN_CTL(gain_ctl); + dpll_hw_state->pll6 = prop_coef | PORT_PLL_INT_COEFF(int_coef); + dpll_hw_state->pll6 |= PORT_PLL_GAIN_CTL(gain_ctl); - crtc_state->dpll_hw_state.pll8 = targ_cnt; + dpll_hw_state->pll8 = targ_cnt; - crtc_state->dpll_hw_state.pll9 = 5 << PORT_PLL_LOCK_THRESHOLD_SHIFT; + dpll_hw_state->pll9 = 5 << PORT_PLL_LOCK_THRESHOLD_SHIFT; - crtc_state->dpll_hw_state.pll10 = + dpll_hw_state->pll10 = PORT_PLL_DCO_AMP(PORT_PLL_DCO_AMP_DEFAULT) | PORT_PLL_DCO_AMP_OVR_EN_H; - crtc_state->dpll_hw_state.ebb4 = PORT_PLL_10BIT_CLK_ENABLE; + dpll_hw_state->ebb4 = PORT_PLL_10BIT_CLK_ENABLE; + + dpll_hw_state->pcsdw12 = LANESTAGGER_STRAP_OVRD | lanestagger; - crtc_state->dpll_hw_state.pcsdw12 = - LANESTAGGER_STRAP_OVRD | lanestagger; + return true; +} - intel_dig_port = enc_to_dig_port(&encoder->base); +bool bxt_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) +{ + struct bxt_clk_div clk_div = {0}; + + bxt_ddi_dp_pll_dividers(clock, &clk_div); + + return bxt_ddi_set_dpll_hw_state(clock, &clk_div, dpll_hw_state); +} + +static bool +bxt_ddi_hdmi_set_dpll_hw_state(struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state, int clock, + struct intel_dpll_hw_state *dpll_hw_state) +{ + struct bxt_clk_div clk_div = { }; + + bxt_ddi_hdmi_pll_dividers(intel_crtc, crtc_state, clock, &clk_div); + + return bxt_ddi_set_dpll_hw_state(clock, &clk_div, dpll_hw_state); +} + +static struct intel_shared_dpll * +bxt_get_dpll(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder) +{ + struct intel_dpll_hw_state dpll_hw_state = { }; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_digital_port *intel_dig_port; + struct intel_shared_dpll *pll; + int i, clock = crtc_state->port_clock; + + if (encoder->type == INTEL_OUTPUT_HDMI && + !bxt_ddi_hdmi_set_dpll_hw_state(crtc, crtc_state, clock, + &dpll_hw_state)) + return NULL; + + if ((encoder->type == INTEL_OUTPUT_DP || + encoder->type == INTEL_OUTPUT_EDP) && + !bxt_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state)) + return NULL; + + memset(&crtc_state->dpll_hw_state, 0, + sizeof(crtc_state->dpll_hw_state)); + + crtc_state->dpll_hw_state = dpll_hw_state; + + if (encoder->type == INTEL_OUTPUT_DP_MST) { + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + + intel_dig_port = intel_mst->primary; + } else + intel_dig_port = enc_to_dig_port(&encoder->base); /* 1:1 mapping between ports and PLLs */ i = (enum intel_dpll_id) intel_dig_port->port; @@ -1622,9 +1748,6 @@ bxt_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, intel_reference_shared_dpll(pll, crtc_state); - /* shared DPLL id 0 is DPLL A */ - crtc_state->ddi_pll_sel = pll->id; - return pll; } diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index 89c5ada1a315..f4385353bc11 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -160,5 +160,20 @@ void intel_disable_shared_dpll(struct intel_crtc *crtc); void intel_shared_dpll_commit(struct drm_atomic_state *state); void intel_shared_dpll_init(struct drm_device *dev); +/* BXT dpll related functions */ +bool bxt_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state); + + +/* SKL dpll related functions */ +bool skl_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state); +struct intel_shared_dpll *skl_find_link_pll(struct drm_i915_private *dev_priv, + int clock); + + +/* HSW dpll related functions */ +struct intel_shared_dpll *hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, + int clock); #endif /* _INTEL_DPLL_MGR_H_ */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index cc937a19b1ba..a19ec06f9e42 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -52,11 +52,15 @@ */ #define _wait_for(COND, US, W) ({ \ unsigned long timeout__ = jiffies + usecs_to_jiffies(US) + 1; \ - int ret__ = 0; \ - while (!(COND)) { \ - if (time_after(jiffies, timeout__)) { \ - if (!(COND)) \ - ret__ = -ETIMEDOUT; \ + int ret__; \ + for (;;) { \ + bool expired__ = time_after(jiffies, timeout__); \ + if (COND) { \ + ret__ = 0; \ + break; \ + } \ + if (expired__) { \ + ret__ = -ETIMEDOUT; \ break; \ } \ if ((W) && drm_can_sleep()) { \ @@ -178,11 +182,22 @@ struct intel_framebuffer { struct drm_framebuffer base; struct drm_i915_gem_object *obj; struct intel_rotation_info rot_info; + + /* for each plane in the normal GTT view */ + struct { + unsigned int x, y; + } normal[2]; + /* for each plane in the rotated GTT view */ + struct { + unsigned int x, y; + unsigned int pitch; /* pixels */ + } rotated[2]; }; struct intel_fbdev { struct drm_fb_helper helper; struct intel_framebuffer *fb; + struct i915_vma *vma; async_cookie_t cookie; int preferred_bpp; }; @@ -194,14 +209,26 @@ struct intel_encoder { unsigned int cloneable; void (*hot_plug)(struct intel_encoder *); bool (*compute_config)(struct intel_encoder *, - struct intel_crtc_state *); - void (*pre_pll_enable)(struct intel_encoder *); - void (*pre_enable)(struct intel_encoder *); - void (*enable)(struct intel_encoder *); - void (*mode_set)(struct intel_encoder *intel_encoder); - void (*disable)(struct intel_encoder *); - void (*post_disable)(struct intel_encoder *); - void (*post_pll_disable)(struct intel_encoder *); + struct intel_crtc_state *, + struct drm_connector_state *); + void (*pre_pll_enable)(struct intel_encoder *, + struct intel_crtc_state *, + struct drm_connector_state *); + void (*pre_enable)(struct intel_encoder *, + struct intel_crtc_state *, + struct drm_connector_state *); + void (*enable)(struct intel_encoder *, + struct intel_crtc_state *, + struct drm_connector_state *); + void (*disable)(struct intel_encoder *, + struct intel_crtc_state *, + struct drm_connector_state *); + void (*post_disable)(struct intel_encoder *, + struct intel_crtc_state *, + struct drm_connector_state *); + void (*post_pll_disable)(struct intel_encoder *, + struct intel_crtc_state *, + struct drm_connector_state *); /* Read out the current hw state of this connector, returning true if * the encoder is active. If the encoder is enabled it also set the pipe * it is connected to in the pipe parameter. */ @@ -236,6 +263,7 @@ struct intel_panel { bool enabled; bool combination_mode; /* gen 2/4 only */ bool active_low_pwm; + bool alternate_pwm_increment; /* lpt+ */ /* PWM chip */ bool util_pin_active_low; /* bxt+ */ @@ -338,10 +366,16 @@ struct intel_atomic_state { struct intel_plane_state { struct drm_plane_state base; - struct drm_rect src; - struct drm_rect dst; struct drm_rect clip; - bool visible; + + struct { + u32 offset; + int x, y; + } main; + struct { + u32 offset; + int x, y; + } aux; /* * scaler_id @@ -561,12 +595,6 @@ struct intel_crtc_state { /* Selected dpll when shared or NULL. */ struct intel_shared_dpll *shared_dpll; - /* - * - PORT_CLK_SEL for DDI ports on HSW/BDW. - * - enum skl_dpll on SKL - */ - uint32_t ddi_pll_sel; - /* Actual register state of the dpll, for shared dpll cross-checking. */ struct intel_dpll_hw_state dpll_hw_state; @@ -683,8 +711,8 @@ struct intel_crtc { struct intel_crtc_state *config; - /* reset counter value when the last flip was submitted */ - unsigned int reset_counter; + /* global reset count when the last flip was submitted */ + unsigned int reset_count; /* Access to these should be protected by dev_priv->irq_lock. */ bool cpu_fifo_underrun_disabled; @@ -852,8 +880,10 @@ struct intel_dp { int link_rate; uint8_t lane_count; uint8_t sink_count; + bool link_mst; bool has_audio; bool detect_done; + bool channel_eq_status; enum hdmi_force_audio force_audio; bool limited_color_range; bool color_range_auto; @@ -1106,8 +1136,11 @@ void intel_crt_reset(struct drm_encoder *encoder); /* intel_ddi.c */ void intel_ddi_clk_select(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config); -void intel_prepare_ddi_buffer(struct intel_encoder *encoder); + struct intel_shared_dpll *pll); +void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state); +void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder); void hsw_fdi_link_train(struct drm_crtc *crtc); void intel_ddi_init(struct drm_device *dev, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); @@ -1122,7 +1155,6 @@ bool intel_ddi_pll_select(struct intel_crtc *crtc, void intel_ddi_set_pipe_settings(struct drm_crtc *crtc); void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); -void intel_ddi_fdi_disable(struct drm_crtc *crtc); void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); struct intel_encoder * @@ -1133,22 +1165,12 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); - -/* intel_frontbuffer.c */ -void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin); -void intel_frontbuffer_flip_prepare(struct drm_device *dev, - unsigned frontbuffer_bits); -void intel_frontbuffer_flip_complete(struct drm_device *dev, - unsigned frontbuffer_bits); -void intel_frontbuffer_flip(struct drm_device *dev, - unsigned frontbuffer_bits); +struct intel_shared_dpll *intel_ddi_get_link_dpll(struct intel_dp *intel_dp, + int clock); unsigned int intel_fb_align_height(struct drm_device *dev, unsigned int height, uint32_t pixel_format, uint64_t fb_format_modifier); -void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire, - enum fb_op_origin origin); u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv, uint64_t fb_modifier, uint32_t pixel_format); @@ -1164,14 +1186,22 @@ void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco); void intel_update_rawclk(struct drm_i915_private *dev_priv); int vlv_get_cck_clock(struct drm_i915_private *dev_priv, const char *name, u32 reg, int ref_freq); +void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv); +void lpt_disable_iclkip(struct drm_i915_private *dev_priv); extern const struct drm_plane_funcs intel_plane_funcs; void intel_init_display_hooks(struct drm_i915_private *dev_priv); +unsigned int intel_fb_xy_to_linear(int x, int y, + const struct intel_plane_state *state, + int plane); +void intel_add_fb_offsets(int *x, int *y, + const struct intel_plane_state *state, int plane); unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info); bool intel_has_pending_fb_unpin(struct drm_device *dev); void intel_mark_busy(struct drm_i915_private *dev_priv); void intel_mark_idle(struct drm_i915_private *dev_priv); void intel_crtc_restore_mode(struct drm_crtc *crtc); int intel_display_suspend(struct drm_device *dev); +void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv); void intel_encoder_destroy(struct drm_encoder *encoder); int intel_connector_init(struct intel_connector *); struct intel_connector *intel_connector_alloc(void); @@ -1227,8 +1257,8 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector, void intel_release_load_detect_pipe(struct drm_connector *connector, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx); -int intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, - unsigned int rotation); +struct i915_vma * +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); struct drm_framebuffer * __intel_framebuffer_create(struct drm_device *dev, @@ -1238,9 +1268,9 @@ void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe); void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe); void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe); int intel_prepare_plane_fb(struct drm_plane *plane, - const struct drm_plane_state *new_state); + struct drm_plane_state *new_state); void intel_cleanup_plane_fb(struct drm_plane *plane, - const struct drm_plane_state *old_state); + struct drm_plane_state *old_state); int intel_plane_atomic_get_property(struct drm_plane *plane, const struct drm_plane_state *state, struct drm_property *property, @@ -1258,7 +1288,7 @@ unsigned int intel_tile_height(const struct drm_i915_private *dev_priv, static inline bool intel_rotation_90_or_270(unsigned int rotation) { - return rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270)); + return rotation & (DRM_ROTATE_90 | DRM_ROTATE_270); } void intel_create_rotation_property(struct drm_device *dev, @@ -1290,9 +1320,7 @@ void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state); #define assert_pipe_enabled(d, p) assert_pipe(d, p, true) #define assert_pipe_disabled(d, p) assert_pipe(d, p, false) u32 intel_compute_tile_offset(int *x, int *y, - const struct drm_framebuffer *fb, int plane, - unsigned int pitch, - unsigned int rotation); + const struct intel_plane_state *state, int plane); void intel_prepare_reset(struct drm_i915_private *dev_priv); void intel_finish_reset(struct drm_i915_private *dev_priv); void hsw_enable_pc8(struct drm_i915_private *dev_priv); @@ -1335,13 +1363,14 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode, int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); -u32 intel_plane_obj_offset(struct intel_plane *intel_plane, - struct drm_i915_gem_object *obj, - unsigned int plane); +u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, unsigned int rotation); u32 skl_plane_ctl_format(uint32_t pixel_format); u32 skl_plane_ctl_tiling(uint64_t fb_modifier); u32 skl_plane_ctl_rotation(unsigned int rotation); +u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, + unsigned int rotation); +int skl_check_plane_surface(struct intel_plane_state *plane_state); /* intel_csr.c */ void intel_csr_ucode_init(struct drm_i915_private *); @@ -1355,7 +1384,8 @@ bool intel_dp_init(struct drm_device *dev, i915_reg_t output_reg, enum port port bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector); void intel_dp_set_link_params(struct intel_dp *intel_dp, - const struct intel_crtc_state *pipe_config); + int link_rate, uint8_t lane_count, + bool link_mst); void intel_dp_start_link_train(struct intel_dp *intel_dp); void intel_dp_stop_link_train(struct intel_dp *intel_dp); void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode); @@ -1364,7 +1394,8 @@ void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder); void intel_dp_encoder_destroy(struct drm_encoder *encoder); int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc); bool intel_dp_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config); + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state); bool intel_dp_is_edp(struct drm_device *dev, enum port port); enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd); @@ -1382,13 +1413,14 @@ void intel_dp_hot_plug(struct intel_encoder *intel_encoder); void intel_power_sequencer_reset(struct drm_i915_private *dev_priv); uint32_t intel_dp_pack_aux(const uint8_t *src, int src_bytes); void intel_plane_destroy(struct drm_plane *plane); -void intel_edp_drrs_enable(struct intel_dp *intel_dp); -void intel_edp_drrs_disable(struct intel_dp *intel_dp); -void intel_edp_drrs_invalidate(struct drm_device *dev, - unsigned frontbuffer_bits); -void intel_edp_drrs_flush(struct drm_device *dev, unsigned frontbuffer_bits); -bool intel_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port); +void intel_edp_drrs_enable(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state); +void intel_edp_drrs_disable(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state); +void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits); +void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits); void intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, @@ -1488,7 +1520,8 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector); struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder); bool intel_hdmi_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config); + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state); void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable); @@ -1561,13 +1594,13 @@ static inline void intel_backlight_device_unregister(struct intel_connector *con /* intel_psr.c */ void intel_psr_enable(struct intel_dp *intel_dp); void intel_psr_disable(struct intel_dp *intel_dp); -void intel_psr_invalidate(struct drm_device *dev, +void intel_psr_invalidate(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); -void intel_psr_flush(struct drm_device *dev, +void intel_psr_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin); void intel_psr_init(struct drm_device *dev); -void intel_psr_single_frame_update(struct drm_device *dev, +void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); /* intel_runtime_pm.c */ @@ -1667,13 +1700,6 @@ enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) atomic_dec(&dev_priv->pm.wakeref_count); } -/* TODO: convert users of these to rely instead on proper RPM refcounting */ -#define DISABLE_RPM_WAKEREF_ASSERTS(dev_priv) \ - disable_rpm_wakeref_asserts(dev_priv) - -#define ENABLE_RPM_WAKEREF_ASSERTS(dev_priv) \ - enable_rpm_wakeref_asserts(dev_priv) - void intel_runtime_pm_get(struct drm_i915_private *dev_priv); bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv); void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv); @@ -1699,11 +1725,11 @@ void intel_gpu_ips_init(struct drm_i915_private *dev_priv); void intel_gpu_ips_teardown(void); void intel_init_gt_powersave(struct drm_i915_private *dev_priv); void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); +void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); -void intel_reset_gt_powersave(struct drm_i915_private *dev_priv); -void gen6_update_ring_freq(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); @@ -1716,6 +1742,21 @@ void ilk_wm_get_hw_state(struct drm_device *dev); void skl_wm_get_hw_state(struct drm_device *dev); void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb /* out */); +bool intel_can_enable_sagv(struct drm_atomic_state *state); +int intel_enable_sagv(struct drm_i915_private *dev_priv); +int intel_disable_sagv(struct drm_i915_private *dev_priv); +bool skl_ddb_allocation_equals(const struct skl_ddb_allocation *old, + const struct skl_ddb_allocation *new, + enum pipe pipe); +bool skl_ddb_allocation_overlaps(struct drm_atomic_state *state, + const struct skl_ddb_allocation *old, + const struct skl_ddb_allocation *new, + enum pipe pipe); +void skl_write_cursor_wm(struct intel_crtc *intel_crtc, + const struct skl_wm_values *wm); +void skl_write_plane_wm(struct intel_crtc *intel_crtc, + const struct skl_wm_values *wm, + int plane); uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config); bool ilk_disable_lp_wm(struct drm_device *dev); int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index de8e9fb51595..b2e3d3a334f7 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -312,7 +312,8 @@ static inline bool is_cmd_mode(struct intel_dsi *intel_dsi) } static bool intel_dsi_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi, @@ -533,14 +534,15 @@ static void intel_dsi_enable(struct intel_encoder *encoder) intel_panel_enable_backlight(intel_dsi->attached_connector); } -static void intel_dsi_prepare(struct intel_encoder *intel_encoder); +static void intel_dsi_prepare(struct intel_encoder *intel_encoder, + struct intel_crtc_state *pipe_config); -static void intel_dsi_pre_enable(struct intel_encoder *encoder) +static void intel_dsi_pre_enable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); enum port port; DRM_DEBUG_KMS("\n"); @@ -550,9 +552,9 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder) * lock. It needs to be fully powered down to fix it. */ intel_disable_dsi_pll(encoder); - intel_enable_dsi_pll(encoder, crtc->config); + intel_enable_dsi_pll(encoder, pipe_config); - intel_dsi_prepare(encoder); + intel_dsi_prepare(encoder, pipe_config); /* Panel Enable over CRC PMIC */ if (intel_dsi->gpio_panel) @@ -582,7 +584,9 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder) intel_dsi_enable(encoder); } -static void intel_dsi_enable_nop(struct intel_encoder *encoder) +static void intel_dsi_enable_nop(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { DRM_DEBUG_KMS("\n"); @@ -592,7 +596,9 @@ static void intel_dsi_enable_nop(struct intel_encoder *encoder) */ } -static void intel_dsi_pre_disable(struct intel_encoder *encoder) +static void intel_dsi_pre_disable(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; @@ -694,7 +700,9 @@ static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) intel_disable_dsi_pll(encoder); } -static void intel_dsi_post_disable(struct intel_encoder *encoder) +static void intel_dsi_post_disable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); @@ -819,6 +827,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, u16 crtc_htotal_sw, crtc_hsync_start_sw, crtc_hsync_end_sw, crtc_hblank_start_sw, crtc_hblank_end_sw; + /* FIXME: hw readout should not depend on SW state */ intel_crtc = to_intel_crtc(encoder->base.crtc); adjusted_mode_sw = &intel_crtc->config->base.adjusted_mode; @@ -1104,14 +1113,15 @@ static u32 pixel_format_to_reg(enum mipi_dsi_pixel_format fmt) } } -static void intel_dsi_prepare(struct intel_encoder *intel_encoder) +static void intel_dsi_prepare(struct intel_encoder *intel_encoder, + struct intel_crtc_state *pipe_config) { struct drm_encoder *encoder = &intel_encoder->base; struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - const struct drm_display_mode *adjusted_mode = &intel_crtc->config->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; enum port port; unsigned int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); u32 val, tmp; @@ -1348,7 +1358,7 @@ static int intel_dsi_set_property(struct drm_connector *connector, intel_connector->panel.fitting_mode = val; } - crtc = intel_attached_encoder(connector)->base.crtc; + crtc = connector->state->crtc; if (crtc && crtc->state->enable) { /* * If the CRTC is enabled, the display will be changed diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 47bdf9dad0d3..2e452c505e7e 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -174,7 +174,9 @@ static void intel_dvo_get_config(struct intel_encoder *encoder, pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; } -static void intel_disable_dvo(struct intel_encoder *encoder) +static void intel_disable_dvo(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dvo *intel_dvo = enc_to_dvo(encoder); @@ -186,17 +188,18 @@ static void intel_disable_dvo(struct intel_encoder *encoder) I915_READ(dvo_reg); } -static void intel_enable_dvo(struct intel_encoder *encoder) +static void intel_enable_dvo(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dvo *intel_dvo = enc_to_dvo(encoder); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); i915_reg_t dvo_reg = intel_dvo->dev.dvo_reg; u32 temp = I915_READ(dvo_reg); intel_dvo->dev.dev_ops->mode_set(&intel_dvo->dev, - &crtc->config->base.mode, - &crtc->config->base.adjusted_mode); + &pipe_config->base.mode, + &pipe_config->base.adjusted_mode); I915_WRITE(dvo_reg, temp | DVO_ENABLE); I915_READ(dvo_reg); @@ -235,7 +238,8 @@ intel_dvo_mode_valid(struct drm_connector *connector, } static bool intel_dvo_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_dvo *intel_dvo = enc_to_dvo(encoder); const struct drm_display_mode *fixed_mode = @@ -253,12 +257,13 @@ static bool intel_dvo_compute_config(struct intel_encoder *encoder, return true; } -static void intel_dvo_pre_enable(struct intel_encoder *encoder) +static void intel_dvo_pre_enable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); - const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); + const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; struct intel_dvo *intel_dvo = enc_to_dvo(encoder); int pipe = crtc->pipe; u32 dvo_val; @@ -554,7 +559,6 @@ void intel_dvo_init(struct drm_device *dev) return; } - drm_encoder_cleanup(&intel_encoder->base); kfree(intel_dvo); kfree(intel_connector); } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c new file mode 100644 index 000000000000..025e232a4205 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -0,0 +1,321 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" +#include "intel_ringbuffer.h" +#include "intel_lrc.h" + +static const struct engine_info { + const char *name; + unsigned exec_id; + enum intel_engine_hw_id hw_id; + u32 mmio_base; + unsigned irq_shift; + int (*init_legacy)(struct intel_engine_cs *engine); + int (*init_execlists)(struct intel_engine_cs *engine); +} intel_engines[] = { + [RCS] = { + .name = "render ring", + .exec_id = I915_EXEC_RENDER, + .hw_id = RCS_HW, + .mmio_base = RENDER_RING_BASE, + .irq_shift = GEN8_RCS_IRQ_SHIFT, + .init_execlists = logical_render_ring_init, + .init_legacy = intel_init_render_ring_buffer, + }, + [BCS] = { + .name = "blitter ring", + .exec_id = I915_EXEC_BLT, + .hw_id = BCS_HW, + .mmio_base = BLT_RING_BASE, + .irq_shift = GEN8_BCS_IRQ_SHIFT, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_blt_ring_buffer, + }, + [VCS] = { + .name = "bsd ring", + .exec_id = I915_EXEC_BSD, + .hw_id = VCS_HW, + .mmio_base = GEN6_BSD_RING_BASE, + .irq_shift = GEN8_VCS1_IRQ_SHIFT, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_bsd_ring_buffer, + }, + [VCS2] = { + .name = "bsd2 ring", + .exec_id = I915_EXEC_BSD, + .hw_id = VCS2_HW, + .mmio_base = GEN8_BSD2_RING_BASE, + .irq_shift = GEN8_VCS2_IRQ_SHIFT, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_bsd2_ring_buffer, + }, + [VECS] = { + .name = "video enhancement ring", + .exec_id = I915_EXEC_VEBOX, + .hw_id = VECS_HW, + .mmio_base = VEBOX_RING_BASE, + .irq_shift = GEN8_VECS_IRQ_SHIFT, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_vebox_ring_buffer, + }, +}; + +static struct intel_engine_cs * +intel_engine_setup(struct drm_i915_private *dev_priv, + enum intel_engine_id id) +{ + const struct engine_info *info = &intel_engines[id]; + struct intel_engine_cs *engine = &dev_priv->engine[id]; + + engine->id = id; + engine->i915 = dev_priv; + engine->name = info->name; + engine->exec_id = info->exec_id; + engine->hw_id = engine->guc_id = info->hw_id; + engine->mmio_base = info->mmio_base; + engine->irq_shift = info->irq_shift; + + return engine; +} + +/** + * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * @dev: DRM device. + * + * Return: non-zero if the initialization failed. + */ +int intel_engines_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_device_info *device_info = mkwrite_device_info(dev_priv); + unsigned int mask = 0; + int (*init)(struct intel_engine_cs *engine); + unsigned int i; + int ret; + + WARN_ON(INTEL_INFO(dev_priv)->ring_mask == 0); + WARN_ON(INTEL_INFO(dev_priv)->ring_mask & + GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); + + for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { + if (!HAS_ENGINE(dev_priv, i)) + continue; + + if (i915.enable_execlists) + init = intel_engines[i].init_execlists; + else + init = intel_engines[i].init_legacy; + + if (!init) + continue; + + ret = init(intel_engine_setup(dev_priv, i)); + if (ret) + goto cleanup; + + mask |= ENGINE_MASK(i); + } + + /* + * Catch failures to update intel_engines table when the new engines + * are added to the driver by a warning and disabling the forgotten + * engines. + */ + if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) + device_info->ring_mask = mask; + + device_info->num_rings = hweight32(mask); + + return 0; + +cleanup: + for (i = 0; i < I915_NUM_ENGINES; i++) { + if (i915.enable_execlists) + intel_logical_ring_cleanup(&dev_priv->engine[i]); + else + intel_engine_cleanup(&dev_priv->engine[i]); + } + + return ret; +} + +void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) +{ + struct drm_i915_private *dev_priv = engine->i915; + + /* Our semaphore implementation is strictly monotonic (i.e. we proceed + * so long as the semaphore value in the register/page is greater + * than the sync value), so whenever we reset the seqno, + * so long as we reset the tracking semaphore value to 0, it will + * always be before the next request's seqno. If we don't reset + * the semaphore value, then when the seqno moves backwards all + * future waits will complete instantly (causing rendering corruption). + */ + if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { + I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); + I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); + if (HAS_VEBOX(dev_priv)) + I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); + } + if (dev_priv->semaphore) { + struct page *page = i915_vma_first_page(dev_priv->semaphore); + void *semaphores; + + /* Semaphores are in noncoherent memory, flush to be safe */ + semaphores = kmap(page); + memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), + 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); + drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), + I915_NUM_ENGINES * gen8_semaphore_seqno_size); + kunmap(page); + } + memset(engine->semaphore.sync_seqno, 0, + sizeof(engine->semaphore.sync_seqno)); + + intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + engine->last_submitted_seqno = seqno; + + engine->hangcheck.seqno = seqno; + + /* After manually advancing the seqno, fake the interrupt in case + * there are any waiters for that seqno. + */ + intel_engine_wakeup(engine); +} + +void intel_engine_init_hangcheck(struct intel_engine_cs *engine) +{ + memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); +} + +static void intel_engine_init_requests(struct intel_engine_cs *engine) +{ + init_request_active(&engine->last_request, NULL); + INIT_LIST_HEAD(&engine->request_list); +} + +/** + * intel_engines_setup_common - setup engine state not requiring hw access + * @engine: Engine to setup. + * + * Initializes @engine@ structure members shared between legacy and execlists + * submission modes which do not require hardware access. + * + * Typically done early in the submission mode specific engine setup stage. + */ +void intel_engine_setup_common(struct intel_engine_cs *engine) +{ + INIT_LIST_HEAD(&engine->execlist_queue); + spin_lock_init(&engine->execlist_lock); + + engine->fence_context = fence_context_alloc(1); + + intel_engine_init_requests(engine); + intel_engine_init_hangcheck(engine); + i915_gem_batch_pool_init(engine, &engine->batch_pool); + + intel_engine_init_cmd_parser(engine); +} + +int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + WARN_ON(engine->scratch); + + obj = i915_gem_object_create_stolen(&engine->i915->drm, size); + if (!obj) + obj = i915_gem_object_create(&engine->i915->drm, size); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate scratch page\n"); + return PTR_ERR(obj); + } + + vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + + ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH); + if (ret) + goto err_unref; + + engine->scratch = vma; + DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", + engine->name, i915_ggtt_offset(vma)); + return 0; + +err_unref: + i915_gem_object_put(obj); + return ret; +} + +static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) +{ + i915_vma_unpin_and_release(&engine->scratch); +} + +/** + * intel_engines_init_common - initialize cengine state which might require hw access + * @engine: Engine to initialize. + * + * Initializes @engine@ structure members shared between legacy and execlists + * submission modes which do require hardware access. + * + * Typcally done at later stages of submission mode specific engine setup. + * + * Returns zero on success or an error code on failure. + */ +int intel_engine_init_common(struct intel_engine_cs *engine) +{ + int ret; + + ret = intel_engine_init_breadcrumbs(engine); + if (ret) + return ret; + + return 0; +} + +/** + * intel_engines_cleanup_common - cleans up the engine state created by + * the common initiailizers. + * @engine: Engine to cleanup. + * + * This cleans up everything created by the common helpers. + */ +void intel_engine_cleanup_common(struct intel_engine_cs *engine) +{ + intel_engine_cleanup_scratch(engine); + + intel_engine_fini_breadcrumbs(engine); + intel_engine_cleanup_cmd_parser(engine); + i915_gem_batch_pool_fini(&engine->batch_pool); +} diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 6a7ad3ed1463..faa67624e1ed 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -190,9 +190,13 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv) dpfc_ctl |= DPFC_CTL_LIMIT_2X; else dpfc_ctl |= DPFC_CTL_LIMIT_1X; - dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fb.fence_reg; - I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset); + if (params->fb.fence_reg != I915_FENCE_REG_NONE) { + dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fb.fence_reg; + I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset); + } else { + I915_WRITE(DPFC_FENCE_YOFF, 0); + } /* enable it... */ I915_WRITE(DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); @@ -244,21 +248,29 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) dpfc_ctl |= DPFC_CTL_LIMIT_1X; break; } - dpfc_ctl |= DPFC_CTL_FENCE_EN; - if (IS_GEN5(dev_priv)) - dpfc_ctl |= params->fb.fence_reg; + + if (params->fb.fence_reg != I915_FENCE_REG_NONE) { + dpfc_ctl |= DPFC_CTL_FENCE_EN; + if (IS_GEN5(dev_priv)) + dpfc_ctl |= params->fb.fence_reg; + if (IS_GEN6(dev_priv)) { + I915_WRITE(SNB_DPFC_CTL_SA, + SNB_CPU_FENCE_ENABLE | params->fb.fence_reg); + I915_WRITE(DPFC_CPU_FENCE_OFFSET, + params->crtc.fence_y_offset); + } + } else { + if (IS_GEN6(dev_priv)) { + I915_WRITE(SNB_DPFC_CTL_SA, 0); + I915_WRITE(DPFC_CPU_FENCE_OFFSET, 0); + } + } I915_WRITE(ILK_DPFC_FENCE_YOFF, params->crtc.fence_y_offset); I915_WRITE(ILK_FBC_RT_BASE, params->fb.ggtt_offset | ILK_FBC_RT_VALID); /* enable it... */ I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); - if (IS_GEN6(dev_priv)) { - I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | params->fb.fence_reg); - I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset); - } - intel_fbc_recompress(dev_priv); } @@ -305,7 +317,15 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) break; } - dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; + if (params->fb.fence_reg != I915_FENCE_REG_NONE) { + dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; + I915_WRITE(SNB_DPFC_CTL_SA, + SNB_CPU_FENCE_ENABLE | params->fb.fence_reg); + I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset); + } else { + I915_WRITE(SNB_DPFC_CTL_SA,0); + I915_WRITE(DPFC_CPU_FENCE_OFFSET, 0); + } if (dev_priv->fbc.false_color) dpfc_ctl |= FBC_CTL_FALSE_COLOR; @@ -324,10 +344,6 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); - I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | params->fb.fence_reg); - I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset); - intel_fbc_recompress(dev_priv); } @@ -494,7 +510,7 @@ static bool multiple_pipes_ok(struct intel_crtc *crtc, if (!no_fbc_on_multiple_pipes(dev_priv)) return true; - if (plane_state->visible) + if (plane_state->base.visible) fbc->visible_pipes_mask |= (1 << pipe); else fbc->visible_pipes_mask &= ~(1 << pipe); @@ -709,6 +725,14 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc) return effective_w <= max_w && effective_h <= max_h; } +/* XXX replace me when we have VMA tracking for intel_plane_state */ +static int get_fence_id(struct drm_framebuffer *fb) +{ + struct i915_vma *vma = i915_gem_object_to_ggtt(intel_fb_obj(fb), NULL); + + return vma && vma->fence ? vma->fence->id : I915_FENCE_REG_NONE; +} + static void intel_fbc_update_state_cache(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) @@ -725,9 +749,9 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, ilk_pipe_pixel_rate(crtc_state); cache->plane.rotation = plane_state->base.rotation; - cache->plane.src_w = drm_rect_width(&plane_state->src) >> 16; - cache->plane.src_h = drm_rect_height(&plane_state->src) >> 16; - cache->plane.visible = plane_state->visible; + cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; + cache->plane.src_h = drm_rect_height(&plane_state->base.src) >> 16; + cache->plane.visible = plane_state->base.visible; if (!cache->plane.visible) return; @@ -737,11 +761,11 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, /* FIXME: We lack the proper locking here, so only run this on the * platforms that need. */ if (IS_GEN(dev_priv, 5, 6)) - cache->fb.ilk_ggtt_offset = i915_gem_obj_ggtt_offset(obj); + cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL); cache->fb.pixel_format = fb->pixel_format; cache->fb.stride = fb->pitches[0]; - cache->fb.fence_reg = obj->fence_reg; - cache->fb.tiling_mode = obj->tiling_mode; + cache->fb.fence_reg = get_fence_id(fb); + cache->fb.tiling_mode = i915_gem_object_get_tiling(obj); } static bool intel_fbc_can_activate(struct intel_crtc *crtc) @@ -768,6 +792,10 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) /* The use of a CPU fence is mandatory in order to detect writes * by the CPU to the scanout and trigger updates to the FBC. + * + * Note that is possible for a tiled surface to be unmappable (and + * so have no fence associated with it) due to aperture constaints + * at the time of pinning. */ if (cache->fb.tiling_mode != I915_TILING_X || cache->fb.fence_reg == I915_FENCE_REG_NONE) { @@ -775,7 +803,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) return false; } if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_G4X(dev_priv) && - cache->plane.rotation != BIT(DRM_ROTATE_0)) { + cache->plane.rotation != DRM_ROTATE_0) { fbc->no_fbc_reason = "rotation unsupported"; return false; } @@ -1050,7 +1078,7 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, struct intel_plane_state *intel_plane_state = to_intel_plane_state(plane_state); - if (!intel_plane_state->visible) + if (!intel_plane_state->base.visible) continue; for_each_crtc_in_state(state, crtc, crtc_state, j) { @@ -1075,6 +1103,8 @@ out: /** * intel_fbc_enable: tries to enable FBC on the CRTC * @crtc: the CRTC + * @crtc_state: corresponding &drm_crtc_state for @crtc + * @plane_state: corresponding &drm_plane_state for the primary plane of @crtc * * This function checks if the given CRTC was chosen for FBC, then enables it if * possible. Notice that it doesn't activate FBC. It is valid to call @@ -1163,11 +1193,8 @@ void intel_fbc_disable(struct intel_crtc *crtc) return; mutex_lock(&fbc->lock); - if (fbc->crtc == crtc) { - WARN_ON(!fbc->enabled); - WARN_ON(fbc->active); + if (fbc->crtc == crtc) __intel_fbc_disable(dev_priv); - } mutex_unlock(&fbc->lock); cancel_work_sync(&fbc->work.work); @@ -1212,7 +1239,7 @@ void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv) for_each_intel_crtc(&dev_priv->drm, crtc) if (intel_crtc_active(&crtc->base) && - to_intel_plane_state(crtc->base.primary->state)->visible) + to_intel_plane_state(crtc->base.primary->state)->base.visible) dev_priv->fbc.visible_pipes_mask |= (1 << crtc->pipe); } @@ -1230,12 +1257,29 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) if (i915.enable_fbc >= 0) return !!i915.enable_fbc; + if (!HAS_FBC(dev_priv)) + return 0; + if (IS_BROADWELL(dev_priv)) return 1; return 0; } +static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv) +{ +#ifdef CONFIG_INTEL_IOMMU + /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */ + if (intel_iommu_gfx_mapped && + (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) { + DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n"); + return true; + } +#endif + + return false; +} + /** * intel_fbc_init - Initialize FBC * @dev_priv: the i915 device @@ -1253,6 +1297,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) fbc->active = false; fbc->work.scheduled = false; + if (need_fbc_vtd_wa(dev_priv)) + mkwrite_device_info(dev_priv)->has_fbc = false; + i915.enable_fbc = intel_sanitize_fbc_option(dev_priv); DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 86b00c6db1a6..b7098f98bb67 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -34,7 +34,6 @@ #include <linux/tty.h> #include <linux/sysrq.h> #include <linux/delay.h> -#include <linux/fb.h> #include <linux/init.h> #include <linux/vga_switcheroo.h> @@ -42,6 +41,7 @@ #include <drm/drm_crtc.h> #include <drm/drm_fb_helper.h> #include "intel_drv.h" +#include "intel_frontbuffer.h" #include <drm/i915_drm.h> #include "i915_drv.h" @@ -159,7 +159,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, fb = __intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) { - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); ret = PTR_ERR(fb); goto out; } @@ -183,13 +183,13 @@ static int intelfb_create(struct drm_fb_helper *helper, struct intel_framebuffer *intel_fb = ifbdev->fb; struct drm_device *dev = helper->dev; struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; struct i915_ggtt *ggtt = &dev_priv->ggtt; struct fb_info *info; struct drm_framebuffer *fb; struct i915_vma *vma; - struct drm_i915_gem_object *obj; bool prealloc = false; - void *vaddr; + void __iomem *vaddr; int ret; if (intel_fb && @@ -215,17 +215,17 @@ static int intelfb_create(struct drm_fb_helper *helper, sizes->fb_height = intel_fb->base.height; } - obj = intel_fb->obj; - mutex_lock(&dev->struct_mutex); /* Pin the GGTT vma for our access via info->screen_base. * This also validates that any existing fb inherited from the * BIOS is suitable for own access. */ - ret = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0)); - if (ret) + vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto out_unlock; + } info = drm_fb_helper_alloc_fbi(helper); if (IS_ERR(info)) { @@ -245,13 +245,11 @@ static int intelfb_create(struct drm_fb_helper *helper, info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT; info->fbops = &intelfb_ops; - vma = i915_gem_obj_to_ggtt(obj); - /* setup aperture base/size for vesafb takeover */ info->apertures->ranges[0].base = dev->mode_config.fb_base; info->apertures->ranges[0].size = ggtt->mappable_end; - info->fix.smem_start = dev->mode_config.fb_base + vma->node.start; + info->fix.smem_start = dev->mode_config.fb_base + i915_ggtt_offset(vma); info->fix.smem_len = vma->node.size; vaddr = i915_vma_pin_iomap(vma); @@ -273,23 +271,23 @@ static int intelfb_create(struct drm_fb_helper *helper, * If the object is stolen however, it will be full of whatever * garbage was left in there. */ - if (ifbdev->fb->obj->stolen && !prealloc) + if (intel_fb->obj->stolen && !prealloc) memset_io(info->screen_base, 0, info->screen_size); /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ - DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx, bo %p\n", - fb->width, fb->height, - i915_gem_obj_ggtt_offset(obj), obj); + DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n", + fb->width, fb->height, i915_ggtt_offset(vma)); + ifbdev->vma = vma; mutex_unlock(&dev->struct_mutex); - vga_switcheroo_client_fb_set(dev->pdev, info); + vga_switcheroo_client_fb_set(pdev, info); return 0; out_destroy_fbi: drm_fb_helper_release_fbi(helper); out_unpin: - intel_unpin_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0)); + intel_unpin_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); out_unlock: mutex_unlock(&dev->struct_mutex); return ret; @@ -554,7 +552,7 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) if (ifbdev->fb) { mutex_lock(&ifbdev->helper.dev->struct_mutex); - intel_unpin_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0)); + intel_unpin_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); mutex_unlock(&ifbdev->helper.dev->struct_mutex); drm_framebuffer_remove(&ifbdev->fb->base); @@ -768,7 +766,7 @@ void intel_fbdev_fini(struct drm_device *dev) if (!ifbdev) return; - flush_work(&dev_priv->fbdev_suspend_work); + cancel_work_sync(&dev_priv->fbdev_suspend_work); if (!current_is_async()) intel_fbdev_sync(ifbdev); @@ -782,7 +780,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous struct intel_fbdev *ifbdev = dev_priv->fbdev; struct fb_info *info; - if (!ifbdev) + if (!ifbdev || !ifbdev->fb) return; info = ifbdev->helper.fbdev; @@ -827,31 +825,28 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous void intel_fbdev_output_poll_changed(struct drm_device *dev) { - struct drm_i915_private *dev_priv = to_i915(dev); - if (dev_priv->fbdev) - drm_fb_helper_hotplug_event(&dev_priv->fbdev->helper); + struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; + + if (ifbdev && ifbdev->fb) + drm_fb_helper_hotplug_event(&ifbdev->helper); } void intel_fbdev_restore_mode(struct drm_device *dev) { - int ret; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_fbdev *ifbdev = dev_priv->fbdev; - struct drm_fb_helper *fb_helper; + struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; if (!ifbdev) return; intel_fbdev_sync(ifbdev); + if (!ifbdev->fb) + return; - fb_helper = &ifbdev->helper; - - ret = drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper); - if (ret) { + if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper)) { DRM_DEBUG("failed to restore crtc mode\n"); } else { - mutex_lock(&fb_helper->dev->struct_mutex); + mutex_lock(&dev->struct_mutex); intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); } } diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index ac85357010b4..966de4c7c7a2 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -63,47 +63,30 @@ #include <drm/drmP.h> #include "intel_drv.h" +#include "intel_frontbuffer.h" #include "i915_drv.h" -/** - * intel_fb_obj_invalidate - invalidate frontbuffer object - * @obj: GEM object to invalidate - * @origin: which operation caused the invalidation - * - * This function gets called every time rendering on the given object starts and - * frontbuffer caching (fbc, low refresh rate for DRRS, panel self refresh) must - * be invalidated. For ORIGIN_CS any subsequent invalidation will be delayed - * until the rendering completes or a flip on this frontbuffer plane is - * scheduled. - */ -void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin) +void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, + enum fb_op_origin origin, + unsigned int frontbuffer_bits) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - - if (!obj->frontbuffer_bits) - return; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); if (origin == ORIGIN_CS) { - mutex_lock(&dev_priv->fb_tracking.lock); - dev_priv->fb_tracking.busy_bits - |= obj->frontbuffer_bits; - dev_priv->fb_tracking.flip_bits - &= ~obj->frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); + dev_priv->fb_tracking.busy_bits |= frontbuffer_bits; + dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits; + spin_unlock(&dev_priv->fb_tracking.lock); } - intel_psr_invalidate(dev, obj->frontbuffer_bits); - intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits); - intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin); + intel_psr_invalidate(dev_priv, frontbuffer_bits); + intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits); + intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin); } /** * intel_frontbuffer_flush - flush frontbuffer - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * @origin: which operation caused the flush * @@ -113,64 +96,45 @@ void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, * * Can be called without any locks held. */ -static void intel_frontbuffer_flush(struct drm_device *dev, +static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin) { - struct drm_i915_private *dev_priv = to_i915(dev); - /* Delay flushing when rings are still busy.*/ - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); if (!frontbuffer_bits) return; - intel_edp_drrs_flush(dev, frontbuffer_bits); - intel_psr_flush(dev, frontbuffer_bits, origin); + intel_edp_drrs_flush(dev_priv, frontbuffer_bits); + intel_psr_flush(dev_priv, frontbuffer_bits, origin); intel_fbc_flush(dev_priv, frontbuffer_bits, origin); } -/** - * intel_fb_obj_flush - flush frontbuffer object - * @obj: GEM object to flush - * @retire: set when retiring asynchronous rendering - * @origin: which operation caused the flush - * - * This function gets called every time rendering on the given object has - * completed and frontbuffer caching can be started again. If @retire is true - * then any delayed flushes will be unblocked. - */ -void intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin) +void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, + bool retire, + enum fb_op_origin origin, + unsigned int frontbuffer_bits) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - unsigned frontbuffer_bits; - - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - - if (!obj->frontbuffer_bits) - return; - - frontbuffer_bits = obj->frontbuffer_bits; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); if (retire) { - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); /* Filter out new bits since rendering started. */ frontbuffer_bits &= dev_priv->fb_tracking.busy_bits; - dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); } - intel_frontbuffer_flush(dev, frontbuffer_bits, origin); + if (frontbuffer_bits) + intel_frontbuffer_flush(dev_priv, frontbuffer_bits, origin); } /** * intel_frontbuffer_flip_prepare - prepare asynchronous frontbuffer flip - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after scheduling a flip on @obj. The actual @@ -180,23 +144,21 @@ void intel_fb_obj_flush(struct drm_i915_gem_object *obj, * * Can be called without any locks held. */ -void intel_frontbuffer_flip_prepare(struct drm_device *dev, +void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); - - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); dev_priv->fb_tracking.flip_bits |= frontbuffer_bits; /* Remove stale busy bits due to the old buffer. */ dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); - intel_psr_single_frame_update(dev, frontbuffer_bits); + intel_psr_single_frame_update(dev_priv, frontbuffer_bits); } /** * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flip - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after the flip has been latched and will complete @@ -204,23 +166,23 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev, * * Can be called without any locks held. */ -void intel_frontbuffer_flip_complete(struct drm_device *dev, +void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); - - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); /* Mask any cancelled flips. */ frontbuffer_bits &= dev_priv->fb_tracking.flip_bits; dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); - intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP); + if (frontbuffer_bits) + intel_frontbuffer_flush(dev_priv, + frontbuffer_bits, ORIGIN_FLIP); } /** * intel_frontbuffer_flip - synchronous frontbuffer flip - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after scheduling a flip on @obj. This is for @@ -229,15 +191,13 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev, * * Can be called without any locks held. */ -void intel_frontbuffer_flip(struct drm_device *dev, +void intel_frontbuffer_flip(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); - - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); /* Remove stale busy bits due to the old buffer. */ dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); - intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP); + intel_frontbuffer_flush(dev_priv, frontbuffer_bits, ORIGIN_FLIP); } diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h new file mode 100644 index 000000000000..76ceb539f9f0 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_frontbuffer.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2014-2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __INTEL_FRONTBUFFER_H__ +#define __INTEL_FRONTBUFFER_H__ + +struct drm_i915_private; +struct drm_i915_gem_object; + +void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv, + unsigned frontbuffer_bits); +void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv, + unsigned frontbuffer_bits); +void intel_frontbuffer_flip(struct drm_i915_private *dev_priv, + unsigned frontbuffer_bits); + +void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, + enum fb_op_origin origin, + unsigned int frontbuffer_bits); +void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, + bool retire, + enum fb_op_origin origin, + unsigned int frontbuffer_bits); + +/** + * intel_fb_obj_invalidate - invalidate frontbuffer object + * @obj: GEM object to invalidate + * @origin: which operation caused the invalidation + * + * This function gets called every time rendering on the given object starts and + * frontbuffer caching (fbc, low refresh rate for DRRS, panel self refresh) must + * be invalidated. For ORIGIN_CS any subsequent invalidation will be delayed + * until the rendering completes or a flip on this frontbuffer plane is + * scheduled. + */ +static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + unsigned int frontbuffer_bits; + + frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); + if (!frontbuffer_bits) + return; + + __intel_fb_obj_invalidate(obj, origin, frontbuffer_bits); +} + +/** + * intel_fb_obj_flush - flush frontbuffer object + * @obj: GEM object to flush + * @retire: set when retiring asynchronous rendering + * @origin: which operation caused the flush + * + * This function gets called every time rendering on the given object has + * completed and frontbuffer caching can be started again. If @retire is true + * then any delayed flushes will be unblocked. + */ +static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, + bool retire, + enum fb_op_origin origin) +{ + unsigned int frontbuffer_bits; + + frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); + if (!frontbuffer_bits) + return; + + __intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits); +} + +#endif /* __INTEL_FRONTBUFFER_H__ */ diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 3e3e743740c0..5cdf7aa75be5 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -63,26 +63,27 @@ struct drm_i915_gem_request; * retcode: errno from last guc_submit() */ struct i915_guc_client { - struct drm_i915_gem_object *client_obj; + struct i915_vma *vma; void *client_base; /* first page (only) of above */ struct i915_gem_context *owner; struct intel_guc *guc; + + uint32_t engines; /* bitmap of (host) engine ids */ uint32_t priority; uint32_t ctx_index; - uint32_t proc_desc_offset; + uint32_t doorbell_offset; uint32_t cookie; uint16_t doorbell_id; - uint16_t padding; /* Maintain alignment */ + uint16_t padding[3]; /* Maintain alignment */ + spinlock_t wq_lock; uint32_t wq_offset; uint32_t wq_size; uint32_t wq_tail; - uint32_t unused; /* Was 'wq_head' */ - + uint32_t wq_rsvd; uint32_t no_wq_space; - uint32_t q_fail; /* No longer used */ uint32_t b_fail; int retcode; @@ -125,11 +126,10 @@ struct intel_guc_fw { struct intel_guc { struct intel_guc_fw guc_fw; uint32_t log_flags; - struct drm_i915_gem_object *log_obj; - - struct drm_i915_gem_object *ads_obj; + struct i915_vma *log_vma; - struct drm_i915_gem_object *ctx_pool_obj; + struct i915_vma *ads_vma; + struct i915_vma *ctx_pool_vma; struct ida ctx_ids; struct i915_guc_client *execbuf_client; @@ -159,8 +159,8 @@ extern int intel_guc_resume(struct drm_device *dev); /* i915_guc_submission.c */ int i915_guc_submission_init(struct drm_i915_private *dev_priv); int i915_guc_submission_enable(struct drm_i915_private *dev_priv); -int i915_guc_wq_check_space(struct drm_i915_gem_request *rq); -int i915_guc_submit(struct drm_i915_gem_request *rq); +int i915_guc_wq_reserve(struct drm_i915_gem_request *rq); +void i915_guc_wq_unreserve(struct drm_i915_gem_request *request); void i915_guc_submission_disable(struct drm_i915_private *dev_priv); void i915_guc_submission_fini(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 944786d7075b..e40db2d2ae99 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -155,6 +155,7 @@ * * +-------------------------------+ * | guc_css_header | + * | | * | contains major/minor version | * +-------------------------------+ * | uCode | @@ -176,10 +177,10 @@ * * 1. Header, uCode and RSA are must-have components. * 2. All firmware components, if they present, are in the sequence illustrated - * in the layout table above. + * in the layout table above. * 3. Length info of each component can be found in header, in dwords. * 4. Modulus and exponent key are not required by driver. They may not appear - * in fw. So driver will load a truncated firmware in this case. + * in fw. So driver will load a truncated firmware in this case. */ struct guc_css_header { diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 605c69658d2c..6fd39efb7894 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -59,13 +59,25 @@ * */ -#define I915_SKL_GUC_UCODE "i915/skl_guc_ver6_1.bin" +#define SKL_FW_MAJOR 6 +#define SKL_FW_MINOR 1 + +#define BXT_FW_MAJOR 8 +#define BXT_FW_MINOR 7 + +#define KBL_FW_MAJOR 9 +#define KBL_FW_MINOR 14 + +#define GUC_FW_PATH(platform, major, minor) \ + "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin" + +#define I915_SKL_GUC_UCODE GUC_FW_PATH(skl, SKL_FW_MAJOR, SKL_FW_MINOR) MODULE_FIRMWARE(I915_SKL_GUC_UCODE); -#define I915_BXT_GUC_UCODE "i915/bxt_guc_ver8_7.bin" +#define I915_BXT_GUC_UCODE GUC_FW_PATH(bxt, BXT_FW_MAJOR, BXT_FW_MINOR) MODULE_FIRMWARE(I915_BXT_GUC_UCODE); -#define I915_KBL_GUC_UCODE "i915/kbl_guc_ver9_14.bin" +#define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR) MODULE_FIRMWARE(I915_KBL_GUC_UCODE); /* User-friendly representation of an enum */ @@ -85,7 +97,7 @@ const char *intel_guc_fw_status_repr(enum intel_guc_fw_status status) } }; -static void direct_interrupts_to_host(struct drm_i915_private *dev_priv) +static void guc_interrupts_release(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; int irqs; @@ -102,7 +114,7 @@ static void direct_interrupts_to_host(struct drm_i915_private *dev_priv) I915_WRITE(GUC_WD_VECS_IER, 0); } -static void direct_interrupts_to_guc(struct drm_i915_private *dev_priv) +static void guc_interrupts_capture(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; int irqs; @@ -122,13 +134,28 @@ static void direct_interrupts_to_guc(struct drm_i915_private *dev_priv) I915_WRITE(GUC_WD_VECS_IER, ~irqs); /* - * If GuC has routed PM interrupts to itself, don't keep it. - * and keep other interrupts those are unmasked by GuC. - */ + * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all + * (unmasked) PM interrupts to the GuC. All other bits of this + * register *disable* generation of a specific interrupt. + * + * 'pm_intr_keep' indicates bits that are NOT to be set when + * writing to the PM interrupt mask register, i.e. interrupts + * that must not be disabled. + * + * If the GuC is handling these interrupts, then we must not let + * the PM code disable ANY interrupt that the GuC is expecting. + * So for each ENABLED (0) bit in this register, we must SET the + * bit in pm_intr_keep so that it's left enabled for the GuC. + * + * OTOH the REDIRECT_TO_GUC bit is initially SET in pm_intr_keep + * (so interrupts go to the DISPLAY unit at first); but here we + * need to CLEAR that bit, which will result in the register bit + * being left SET! + */ tmp = I915_READ(GEN6_PMINTRMSK); - if (tmp & GEN8_PMINTR_REDIRECT_TO_NON_DISP) { - dev_priv->rps.pm_intr_keep |= ~(tmp & ~GEN8_PMINTR_REDIRECT_TO_NON_DISP); - dev_priv->rps.pm_intr_keep &= ~GEN8_PMINTR_REDIRECT_TO_NON_DISP; + if (tmp & GEN8_PMINTR_REDIRECT_TO_GUC) { + dev_priv->rps.pm_intr_keep |= ~tmp; + dev_priv->rps.pm_intr_keep &= ~GEN8_PMINTR_REDIRECT_TO_GUC; } } @@ -140,17 +167,24 @@ static u32 get_gttype(struct drm_i915_private *dev_priv) static u32 get_core_family(struct drm_i915_private *dev_priv) { - switch (INTEL_INFO(dev_priv)->gen) { + u32 gen = INTEL_GEN(dev_priv); + + switch (gen) { case 9: return GFXCORE_FAMILY_GEN9; default: - DRM_ERROR("GUC: unsupported core family\n"); + WARN(1, "GEN%d does not support GuC operation!\n", gen); return GFXCORE_FAMILY_UNKNOWN; } } -static void set_guc_init_params(struct drm_i915_private *dev_priv) +/* + * Initialise the GuC parameter block before starting the firmware + * transfer. These parameters are read by the firmware on startup + * and cannot be changed thereafter. + */ +static void guc_params_init(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; u32 params[GUC_CTL_MAX_DWORDS]; @@ -181,16 +215,15 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv) i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT; } - if (guc->ads_obj) { - u32 ads = (u32)i915_gem_obj_ggtt_offset(guc->ads_obj) - >> PAGE_SHIFT; + if (guc->ads_vma) { + u32 ads = i915_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; } /* If GuC submission is enabled, set up additional parameters here */ if (i915.enable_guc_submission) { - u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj); + u32 pgs = i915_ggtt_offset(dev_priv->guc.ctx_pool_vma); u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; pgs >>= PAGE_SHIFT; @@ -238,12 +271,12 @@ static inline bool guc_ucode_response(struct drm_i915_private *dev_priv, * Note that GuC needs the CSS header plus uKernel code to be copied by the * DMA engine in one operation, whereas the RSA signature is loaded via MMIO. */ -static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv) +static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv, + struct i915_vma *vma) { struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; - struct drm_i915_gem_object *fw_obj = guc_fw->guc_fw_obj; unsigned long offset; - struct sg_table *sg = fw_obj->pages; + struct sg_table *sg = vma->pages; u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT]; int i, ret = 0; @@ -260,7 +293,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv) I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); /* Set the source address for the new blob */ - offset = i915_gem_obj_ggtt_offset(fw_obj) + guc_fw->header_offset; + offset = i915_ggtt_offset(vma) + guc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); @@ -315,6 +348,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) { struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; struct drm_device *dev = &dev_priv->drm; + struct i915_vma *vma; int ret; ret = i915_gem_object_set_to_gtt_domain(guc_fw->guc_fw_obj, false); @@ -323,10 +357,10 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) return ret; } - ret = i915_gem_obj_ggtt_pin(guc_fw->guc_fw_obj, 0, 0); - if (ret) { - DRM_DEBUG_DRIVER("pin failed %d\n", ret); - return ret; + vma = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma)); + return PTR_ERR(vma); } /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ @@ -349,7 +383,9 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) } /* WaC6DisallowByGfxPause*/ - I915_WRITE(GEN6_GFXPAUSE, 0x30FFF); + if (IS_SKL_REVID(dev, 0, SKL_REVID_C0) || + IS_BXT_REVID(dev, 0, BXT_REVID_B0)) + I915_WRITE(GEN6_GFXPAUSE, 0x30FFF); if (IS_BROXTON(dev)) I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); @@ -361,13 +397,13 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) I915_WRITE(GEN7_MISCCPCTL, (GEN8_DOP_CLOCK_GATE_GUC_ENABLE | I915_READ(GEN7_MISCCPCTL))); - /* allows for 5us before GT can go to RC6 */ + /* allows for 5us (in 10ns units) before GT can go to RC6 */ I915_WRITE(GUC_ARAT_C6DIS, 0x1FF); } - set_guc_init_params(dev_priv); + guc_params_init(dev_priv); - ret = guc_ucode_xfer_dma(dev_priv); + ret = guc_ucode_xfer_dma(dev_priv, vma); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -375,12 +411,12 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) * We keep the object pages for reuse during resume. But we can unpin it * now that DMA has completed, so it doesn't continue to take up space. */ - i915_gem_object_ggtt_unpin(guc_fw->guc_fw_obj); + i915_vma_unpin(vma); return ret; } -static int i915_reset_guc(struct drm_i915_private *dev_priv) +static int guc_hw_reset(struct drm_i915_private *dev_priv) { int ret; u32 guc_status; @@ -433,7 +469,7 @@ int intel_guc_setup(struct drm_device *dev) goto fail; } else if (*fw_path == '\0') { /* Device has a GuC but we don't know what f/w to load? */ - DRM_INFO("No GuC firmware known for this platform\n"); + WARN(1, "No GuC firmware known for this platform!\n"); err = -ENODEV; goto fail; } @@ -447,7 +483,7 @@ int intel_guc_setup(struct drm_device *dev) goto fail; } - direct_interrupts_to_host(dev_priv); + guc_interrupts_release(dev_priv); guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING; @@ -470,11 +506,9 @@ int intel_guc_setup(struct drm_device *dev) * Always reset the GuC just before (re)loading, so * that the state and timing are fairly predictable */ - err = i915_reset_guc(dev_priv); - if (err) { - DRM_ERROR("GuC reset failed: %d\n", err); + err = guc_hw_reset(dev_priv); + if (err) goto fail; - } err = guc_ucode_xfer(dev_priv); if (!err) @@ -497,7 +531,7 @@ int intel_guc_setup(struct drm_device *dev) err = i915_guc_submission_enable(dev_priv); if (err) goto fail; - direct_interrupts_to_guc(dev_priv); + guc_interrupts_capture(dev_priv); } return 0; @@ -506,7 +540,7 @@ fail: if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING) guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL; - direct_interrupts_to_host(dev_priv); + guc_interrupts_release(dev_priv); i915_guc_submission_disable(dev_priv); i915_guc_submission_fini(dev_priv); @@ -532,15 +566,15 @@ fail: else if (err == 0) DRM_INFO("GuC firmware load skipped\n"); else if (ret != -EIO) - DRM_INFO("GuC firmware load failed: %d\n", err); + DRM_NOTE("GuC firmware load failed: %d\n", err); else - DRM_ERROR("GuC firmware load failed: %d\n", err); + DRM_WARN("GuC firmware load failed: %d\n", err); if (i915.enable_guc_submission) { if (fw_path == NULL) DRM_INFO("GuC submission without firmware not supported\n"); if (ret == 0) - DRM_INFO("Falling back from GuC submission to execlist mode\n"); + DRM_NOTE("Falling back from GuC submission to execlist mode\n"); else DRM_ERROR("GuC init failed: %d\n", ret); } @@ -551,6 +585,7 @@ fail: static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) { + struct pci_dev *pdev = dev->pdev; struct drm_i915_gem_object *obj; const struct firmware *fw; struct guc_css_header *css; @@ -560,7 +595,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) DRM_DEBUG_DRIVER("before requesting firmware: GuC fw fetch status %s\n", intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status)); - err = request_firmware(&fw, guc_fw->guc_fw_path, &dev->pdev->dev); + err = request_firmware(&fw, guc_fw->guc_fw_path, &pdev->dev); if (err) goto fail; if (!fw) @@ -571,7 +606,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* Check the size of the blob before examining buffer contents */ if (fw->size < sizeof(struct guc_css_header)) { - DRM_ERROR("Firmware header is missing\n"); + DRM_NOTE("Firmware header is missing\n"); goto fail; } @@ -583,7 +618,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) css->key_size_dw - css->exponent_size_dw) * sizeof(u32); if (guc_fw->header_size != sizeof(struct guc_css_header)) { - DRM_ERROR("CSS header definition mismatch\n"); + DRM_NOTE("CSS header definition mismatch\n"); goto fail; } @@ -593,7 +628,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* now RSA */ if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_ERROR("RSA key size is bad\n"); + DRM_NOTE("RSA key size is bad\n"); goto fail; } guc_fw->rsa_offset = guc_fw->ucode_offset + guc_fw->ucode_size; @@ -602,14 +637,14 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) /* At least, it should have header, uCode and RSA. Size of all three. */ size = guc_fw->header_size + guc_fw->ucode_size + guc_fw->rsa_size; if (fw->size < size) { - DRM_ERROR("Missing firmware components\n"); + DRM_NOTE("Missing firmware components\n"); goto fail; } /* Header and uCode will be loaded to WOPCM. Size of the two. */ size = guc_fw->header_size + guc_fw->ucode_size; if (size > guc_wopcm_size(to_i915(dev))) { - DRM_ERROR("Firmware is too large to fit in WOPCM\n"); + DRM_NOTE("Firmware is too large to fit in WOPCM\n"); goto fail; } @@ -624,7 +659,7 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) if (guc_fw->guc_fw_major_found != guc_fw->guc_fw_major_wanted || guc_fw->guc_fw_minor_found < guc_fw->guc_fw_minor_wanted) { - DRM_ERROR("GuC firmware version %d.%d, required %d.%d\n", + DRM_NOTE("GuC firmware version %d.%d, required %d.%d\n", guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found, guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted); err = -ENOEXEC; @@ -654,15 +689,15 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw) return; fail: + DRM_WARN("Failed to fetch valid GuC firmware from %s (error %d)\n", + guc_fw->guc_fw_path, err); DRM_DEBUG_DRIVER("GuC fw fetch status FAIL; err %d, fw %p, obj %p\n", err, fw, guc_fw->guc_fw_obj); - DRM_ERROR("Failed to fetch GuC firmware from %s (error %d)\n", - guc_fw->guc_fw_path, err); mutex_lock(&dev->struct_mutex); obj = guc_fw->guc_fw_obj; if (obj) - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); guc_fw->guc_fw_obj = NULL; mutex_unlock(&dev->struct_mutex); @@ -695,16 +730,16 @@ void intel_guc_init(struct drm_device *dev) fw_path = NULL; } else if (IS_SKYLAKE(dev)) { fw_path = I915_SKL_GUC_UCODE; - guc_fw->guc_fw_major_wanted = 6; - guc_fw->guc_fw_minor_wanted = 1; + guc_fw->guc_fw_major_wanted = SKL_FW_MAJOR; + guc_fw->guc_fw_minor_wanted = SKL_FW_MINOR; } else if (IS_BROXTON(dev)) { fw_path = I915_BXT_GUC_UCODE; - guc_fw->guc_fw_major_wanted = 8; - guc_fw->guc_fw_minor_wanted = 7; + guc_fw->guc_fw_major_wanted = BXT_FW_MAJOR; + guc_fw->guc_fw_minor_wanted = BXT_FW_MINOR; } else if (IS_KABYLAKE(dev)) { fw_path = I915_KBL_GUC_UCODE; - guc_fw->guc_fw_major_wanted = 9; - guc_fw->guc_fw_minor_wanted = 14; + guc_fw->guc_fw_major_wanted = KBL_FW_MAJOR; + guc_fw->guc_fw_minor_wanted = KBL_FW_MINOR; } else { fw_path = ""; /* unknown device */ } @@ -738,12 +773,12 @@ void intel_guc_fini(struct drm_device *dev) struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; mutex_lock(&dev->struct_mutex); - direct_interrupts_to_host(dev_priv); + guc_interrupts_release(dev_priv); i915_guc_submission_disable(dev_priv); i915_guc_submission_fini(dev_priv); if (guc_fw->guc_fw_obj) - drm_gem_object_unreference(&guc_fw->guc_fw_obj->base); + i915_gem_object_put(guc_fw->guc_fw_obj); guc_fw->guc_fw_obj = NULL; mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 4df9f384910c..f40a35f2913a 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -985,7 +985,9 @@ static void intel_enable_hdmi_audio(struct intel_encoder *encoder) intel_audio_codec_enable(encoder); } -static void g4x_enable_hdmi(struct intel_encoder *encoder) +static void g4x_enable_hdmi(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -1006,7 +1008,9 @@ static void g4x_enable_hdmi(struct intel_encoder *encoder) intel_enable_hdmi_audio(encoder); } -static void ibx_enable_hdmi(struct intel_encoder *encoder) +static void ibx_enable_hdmi(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -1055,7 +1059,9 @@ static void ibx_enable_hdmi(struct intel_encoder *encoder) intel_enable_hdmi_audio(encoder); } -static void cpt_enable_hdmi(struct intel_encoder *encoder) +static void cpt_enable_hdmi(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -1108,11 +1114,15 @@ static void cpt_enable_hdmi(struct intel_encoder *encoder) intel_enable_hdmi_audio(encoder); } -static void vlv_enable_hdmi(struct intel_encoder *encoder) +static void vlv_enable_hdmi(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { } -static void intel_disable_hdmi(struct intel_encoder *encoder) +static void intel_disable_hdmi(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -1164,17 +1174,21 @@ static void intel_disable_hdmi(struct intel_encoder *encoder) intel_dp_dual_mode_set_tmds_output(intel_hdmi, false); } -static void g4x_disable_hdmi(struct intel_encoder *encoder) +static void g4x_disable_hdmi(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); if (crtc->config->has_audio) intel_audio_codec_disable(encoder); - intel_disable_hdmi(encoder); + intel_disable_hdmi(encoder, old_crtc_state, old_conn_state); } -static void pch_disable_hdmi(struct intel_encoder *encoder) +static void pch_disable_hdmi(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); @@ -1182,9 +1196,11 @@ static void pch_disable_hdmi(struct intel_encoder *encoder) intel_audio_codec_disable(encoder); } -static void pch_post_disable_hdmi(struct intel_encoder *encoder) +static void pch_post_disable_hdmi(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { - intel_disable_hdmi(encoder); + intel_disable_hdmi(encoder, old_crtc_state, old_conn_state); } static int intel_hdmi_source_max_tmds_clock(struct drm_i915_private *dev_priv) @@ -1204,10 +1220,17 @@ static int hdmi_port_clock_limit(struct intel_hdmi *hdmi, int max_tmds_clock = intel_hdmi_source_max_tmds_clock(to_i915(dev)); if (respect_downstream_limits) { + struct intel_connector *connector = hdmi->attached_connector; + const struct drm_display_info *info = &connector->base.display_info; + if (hdmi->dp_dual_mode.max_tmds_clock) max_tmds_clock = min(max_tmds_clock, hdmi->dp_dual_mode.max_tmds_clock); - if (!hdmi->has_hdmi_sink) + + if (info->max_tmds_clock) + max_tmds_clock = min(max_tmds_clock, + info->max_tmds_clock); + else if (!hdmi->has_hdmi_sink) max_tmds_clock = min(max_tmds_clock, 165000); } @@ -1285,7 +1308,8 @@ static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state) } bool intel_hdmi_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); struct drm_device *dev = encoder->base.dev; @@ -1422,24 +1446,22 @@ intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector, bool has_edid) } static bool -intel_hdmi_set_edid(struct drm_connector *connector, bool force) +intel_hdmi_set_edid(struct drm_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); - struct edid *edid = NULL; + struct edid *edid; bool connected = false; - if (force) { - intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); + intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); - edid = drm_get_edid(connector, - intel_gmbus_get_adapter(dev_priv, - intel_hdmi->ddc_bus)); + edid = drm_get_edid(connector, + intel_gmbus_get_adapter(dev_priv, + intel_hdmi->ddc_bus)); - intel_hdmi_dp_dual_mode_detect(connector, edid != NULL); + intel_hdmi_dp_dual_mode_detect(connector, edid != NULL); - intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); - } + intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); to_intel_connector(connector)->detect_edid = edid; if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) { @@ -1465,37 +1487,16 @@ static enum drm_connector_status intel_hdmi_detect(struct drm_connector *connector, bool force) { enum drm_connector_status status; - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); struct drm_i915_private *dev_priv = to_i915(connector->dev); - bool live_status = false; - unsigned int try; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); - for (try = 0; !live_status && try < 9; try++) { - if (try) - msleep(10); - live_status = intel_digital_port_connected(dev_priv, - hdmi_to_dig_port(intel_hdmi)); - } - - if (!live_status) { - DRM_DEBUG_KMS("HDMI live status down\n"); - /* - * Live status register is not reliable on all intel platforms. - * So consider live_status only for certain platforms, for - * others, read EDID to determine presence of sink. - */ - if (INTEL_INFO(dev_priv)->gen < 7 || IS_IVYBRIDGE(dev_priv)) - live_status = true; - } - intel_hdmi_unset_edid(connector); - if (intel_hdmi_set_edid(connector, live_status)) { + if (intel_hdmi_set_edid(connector)) { struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); hdmi_to_dig_port(intel_hdmi)->base.type = INTEL_OUTPUT_HDMI; @@ -1521,7 +1522,7 @@ intel_hdmi_force(struct drm_connector *connector) if (connector->status != connector_status_connected) return; - intel_hdmi_set_edid(connector, true); + intel_hdmi_set_edid(connector); hdmi_to_dig_port(intel_hdmi)->base.type = INTEL_OUTPUT_HDMI; } @@ -1638,7 +1639,9 @@ done: return 0; } -static void intel_hdmi_pre_enable(struct intel_encoder *encoder) +static void intel_hdmi_pre_enable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); @@ -1651,7 +1654,9 @@ static void intel_hdmi_pre_enable(struct intel_encoder *encoder) adjusted_mode); } -static void vlv_hdmi_pre_enable(struct intel_encoder *encoder) +static void vlv_hdmi_pre_enable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); struct intel_hdmi *intel_hdmi = &dport->hdmi; @@ -1671,37 +1676,47 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder) intel_crtc->config->has_hdmi_sink, adjusted_mode); - g4x_enable_hdmi(encoder); + g4x_enable_hdmi(encoder, pipe_config, conn_state); vlv_wait_port_ready(dev_priv, dport, 0x0); } -static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder) +static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { intel_hdmi_prepare(encoder); vlv_phy_pre_pll_enable(encoder); } -static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder) +static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { intel_hdmi_prepare(encoder); chv_phy_pre_pll_enable(encoder); } -static void chv_hdmi_post_pll_disable(struct intel_encoder *encoder) +static void chv_hdmi_post_pll_disable(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { chv_phy_post_pll_disable(encoder); } -static void vlv_hdmi_post_disable(struct intel_encoder *encoder) +static void vlv_hdmi_post_disable(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { /* Reset lanes to avoid HDMI flicker (VLV w/a) */ vlv_phy_reset_lanes(encoder); } -static void chv_hdmi_post_disable(struct intel_encoder *encoder) +static void chv_hdmi_post_disable(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -1714,7 +1729,9 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder) mutex_unlock(&dev_priv->sb_lock); } -static void chv_hdmi_pre_enable(struct intel_encoder *encoder) +static void chv_hdmi_pre_enable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); struct intel_hdmi *intel_hdmi = &dport->hdmi; @@ -1734,7 +1751,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder) intel_crtc->config->has_hdmi_sink, adjusted_mode); - g4x_enable_hdmi(encoder); + g4x_enable_hdmi(encoder, pipe_config, conn_state); vlv_wait_port_ready(dev_priv, dport, 0x0); diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index f48957ea100d..334d47b5811a 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -477,7 +477,8 @@ void intel_hpd_init(struct drm_i915_private *dev_priv) spin_unlock_irq(&dev_priv->irq_lock); } -void i915_hpd_poll_init_work(struct work_struct *work) { +static void i915_hpd_poll_init_work(struct work_struct *work) +{ struct drm_i915_private *dev_priv = container_of(work, struct drm_i915_private, hotplug.poll_init_work); @@ -525,7 +526,6 @@ void i915_hpd_poll_init_work(struct work_struct *work) { /** * intel_hpd_poll_init - enables/disables polling for connectors with hpd * @dev_priv: i915 device instance - * @enabled: Whether to enable or disable polling * * This function enables polling for all connectors, regardless of whether or * not they support hotplug detection. Under certain conditions HPD may not be diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 1f266d7df2ec..79aab9ad6faa 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -255,67 +255,59 @@ intel_gpio_setup(struct intel_gmbus *bus, unsigned int pin) algo->data = bus; } -static int -gmbus_wait_hw_status(struct drm_i915_private *dev_priv, - u32 gmbus2_status, - u32 gmbus4_irq_en) +static int gmbus_wait(struct drm_i915_private *dev_priv, u32 status, u32 irq_en) { - int i; - u32 gmbus2 = 0; DEFINE_WAIT(wait); - - if (!HAS_GMBUS_IRQ(dev_priv)) - gmbus4_irq_en = 0; + u32 gmbus2; + int ret; /* Important: The hw handles only the first bit, so set only one! Since * we also need to check for NAKs besides the hw ready/idle signal, we - * need to wake up periodically and check that ourselves. */ - I915_WRITE(GMBUS4, gmbus4_irq_en); - - for (i = 0; i < msecs_to_jiffies_timeout(50); i++) { - prepare_to_wait(&dev_priv->gmbus_wait_queue, &wait, - TASK_UNINTERRUPTIBLE); + * need to wake up periodically and check that ourselves. + */ + if (!HAS_GMBUS_IRQ(dev_priv)) + irq_en = 0; - gmbus2 = I915_READ_NOTRACE(GMBUS2); - if (gmbus2 & (GMBUS_SATOER | gmbus2_status)) - break; + add_wait_queue(&dev_priv->gmbus_wait_queue, &wait); + I915_WRITE_FW(GMBUS4, irq_en); - schedule_timeout(1); - } - finish_wait(&dev_priv->gmbus_wait_queue, &wait); + status |= GMBUS_SATOER; + ret = wait_for_us((gmbus2 = I915_READ_FW(GMBUS2)) & status, 2); + if (ret) + ret = wait_for((gmbus2 = I915_READ_FW(GMBUS2)) & status, 50); - I915_WRITE(GMBUS4, 0); + I915_WRITE_FW(GMBUS4, 0); + remove_wait_queue(&dev_priv->gmbus_wait_queue, &wait); if (gmbus2 & GMBUS_SATOER) return -ENXIO; - if (gmbus2 & gmbus2_status) - return 0; - return -ETIMEDOUT; + + return ret; } static int gmbus_wait_idle(struct drm_i915_private *dev_priv) { + DEFINE_WAIT(wait); + u32 irq_enable; int ret; - if (!HAS_GMBUS_IRQ(dev_priv)) - return intel_wait_for_register(dev_priv, - GMBUS2, GMBUS_ACTIVE, 0, - 10); - /* Important: The hw handles only the first bit, so set only one! */ - I915_WRITE(GMBUS4, GMBUS_IDLE_EN); + irq_enable = 0; + if (HAS_GMBUS_IRQ(dev_priv)) + irq_enable = GMBUS_IDLE_EN; - ret = wait_event_timeout(dev_priv->gmbus_wait_queue, - (I915_READ_NOTRACE(GMBUS2) & GMBUS_ACTIVE) == 0, - msecs_to_jiffies_timeout(10)); + add_wait_queue(&dev_priv->gmbus_wait_queue, &wait); + I915_WRITE_FW(GMBUS4, irq_enable); - I915_WRITE(GMBUS4, 0); + ret = intel_wait_for_register_fw(dev_priv, + GMBUS2, GMBUS_ACTIVE, 0, + 10); - if (ret) - return 0; - else - return -ETIMEDOUT; + I915_WRITE_FW(GMBUS4, 0); + remove_wait_queue(&dev_priv->gmbus_wait_queue, &wait); + + return ret; } static int @@ -323,22 +315,21 @@ gmbus_xfer_read_chunk(struct drm_i915_private *dev_priv, unsigned short addr, u8 *buf, unsigned int len, u32 gmbus1_index) { - I915_WRITE(GMBUS1, - gmbus1_index | - GMBUS_CYCLE_WAIT | - (len << GMBUS_BYTE_COUNT_SHIFT) | - (addr << GMBUS_SLAVE_ADDR_SHIFT) | - GMBUS_SLAVE_READ | GMBUS_SW_RDY); + I915_WRITE_FW(GMBUS1, + gmbus1_index | + GMBUS_CYCLE_WAIT | + (len << GMBUS_BYTE_COUNT_SHIFT) | + (addr << GMBUS_SLAVE_ADDR_SHIFT) | + GMBUS_SLAVE_READ | GMBUS_SW_RDY); while (len) { int ret; u32 val, loop = 0; - ret = gmbus_wait_hw_status(dev_priv, GMBUS_HW_RDY, - GMBUS_HW_RDY_EN); + ret = gmbus_wait(dev_priv, GMBUS_HW_RDY, GMBUS_HW_RDY_EN); if (ret) return ret; - val = I915_READ(GMBUS3); + val = I915_READ_FW(GMBUS3); do { *buf++ = val & 0xff; val >>= 8; @@ -385,12 +376,12 @@ gmbus_xfer_write_chunk(struct drm_i915_private *dev_priv, len -= 1; } - I915_WRITE(GMBUS3, val); - I915_WRITE(GMBUS1, - GMBUS_CYCLE_WAIT | - (chunk_size << GMBUS_BYTE_COUNT_SHIFT) | - (addr << GMBUS_SLAVE_ADDR_SHIFT) | - GMBUS_SLAVE_WRITE | GMBUS_SW_RDY); + I915_WRITE_FW(GMBUS3, val); + I915_WRITE_FW(GMBUS1, + GMBUS_CYCLE_WAIT | + (chunk_size << GMBUS_BYTE_COUNT_SHIFT) | + (addr << GMBUS_SLAVE_ADDR_SHIFT) | + GMBUS_SLAVE_WRITE | GMBUS_SW_RDY); while (len) { int ret; @@ -399,10 +390,9 @@ gmbus_xfer_write_chunk(struct drm_i915_private *dev_priv, val |= *buf++ << (8 * loop); } while (--len && ++loop < 4); - I915_WRITE(GMBUS3, val); + I915_WRITE_FW(GMBUS3, val); - ret = gmbus_wait_hw_status(dev_priv, GMBUS_HW_RDY, - GMBUS_HW_RDY_EN); + ret = gmbus_wait(dev_priv, GMBUS_HW_RDY, GMBUS_HW_RDY_EN); if (ret) return ret; } @@ -460,13 +450,13 @@ gmbus_xfer_index_read(struct drm_i915_private *dev_priv, struct i2c_msg *msgs) /* GMBUS5 holds 16-bit index */ if (gmbus5) - I915_WRITE(GMBUS5, gmbus5); + I915_WRITE_FW(GMBUS5, gmbus5); ret = gmbus_xfer_read(dev_priv, &msgs[1], gmbus1_index); /* Clear GMBUS5 after each index transfer */ if (gmbus5) - I915_WRITE(GMBUS5, 0); + I915_WRITE_FW(GMBUS5, 0); return ret; } @@ -478,11 +468,15 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) struct intel_gmbus, adapter); struct drm_i915_private *dev_priv = bus->dev_priv; + const unsigned int fw = + intel_uncore_forcewake_for_reg(dev_priv, GMBUS0, + FW_REG_READ | FW_REG_WRITE); int i = 0, inc, try = 0; int ret = 0; + intel_uncore_forcewake_get(dev_priv, fw); retry: - I915_WRITE(GMBUS0, bus->reg0); + I915_WRITE_FW(GMBUS0, bus->reg0); for (; i < num; i += inc) { inc = 1; @@ -496,8 +490,8 @@ retry: } if (!ret) - ret = gmbus_wait_hw_status(dev_priv, GMBUS_HW_WAIT_PHASE, - GMBUS_HW_WAIT_EN); + ret = gmbus_wait(dev_priv, + GMBUS_HW_WAIT_PHASE, GMBUS_HW_WAIT_EN); if (ret == -ETIMEDOUT) goto timeout; else if (ret) @@ -508,7 +502,7 @@ retry: * a STOP on the very first cycle. To simplify the code we * unconditionally generate the STOP condition with an additional gmbus * cycle. */ - I915_WRITE(GMBUS1, GMBUS_CYCLE_STOP | GMBUS_SW_RDY); + I915_WRITE_FW(GMBUS1, GMBUS_CYCLE_STOP | GMBUS_SW_RDY); /* Mark the GMBUS interface as disabled after waiting for idle. * We will re-enable it at the start of the next xfer, @@ -519,7 +513,7 @@ retry: adapter->name); ret = -ETIMEDOUT; } - I915_WRITE(GMBUS0, 0); + I915_WRITE_FW(GMBUS0, 0); ret = ret ?: i; goto out; @@ -548,9 +542,9 @@ clear_err: * of resetting the GMBUS controller and so clearing the * BUS_ERROR raised by the slave's NAK. */ - I915_WRITE(GMBUS1, GMBUS_SW_CLR_INT); - I915_WRITE(GMBUS1, 0); - I915_WRITE(GMBUS0, 0); + I915_WRITE_FW(GMBUS1, GMBUS_SW_CLR_INT); + I915_WRITE_FW(GMBUS1, 0); + I915_WRITE_FW(GMBUS0, 0); DRM_DEBUG_KMS("GMBUS [%s] NAK for addr: %04x %c(%d)\n", adapter->name, msgs[i].addr, @@ -573,7 +567,7 @@ clear_err: timeout: DRM_DEBUG_KMS("GMBUS [%s] timed out, falling back to bit banging on pin %d\n", bus->adapter.name, bus->reg0 & 0xff); - I915_WRITE(GMBUS0, 0); + I915_WRITE_FW(GMBUS0, 0); /* * Hardware may not support GMBUS over these pins? Try GPIO bitbanging @@ -582,6 +576,7 @@ timeout: ret = -EAGAIN; out: + intel_uncore_forcewake_put(dev_priv, fw); return ret; } @@ -633,6 +628,7 @@ static const struct i2c_algorithm gmbus_algorithm = { int intel_setup_gmbus(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; struct intel_gmbus *bus; unsigned int pin; int ret; @@ -663,7 +659,7 @@ int intel_setup_gmbus(struct drm_device *dev) "i915 gmbus %s", get_gmbus_pin(dev_priv, pin)->name); - bus->adapter.dev.parent = &dev->pdev->dev; + bus->adapter.dev.parent = &pdev->dev; bus->dev_priv = dev_priv; bus->adapter.algo = &gmbus_algorithm; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 414ddda43922..0adb879833ff 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -156,6 +156,11 @@ #define GEN8_CTX_STATUS_COMPLETE (1 << 4) #define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) +#define GEN8_CTX_STATUS_COMPLETED_MASK \ + (GEN8_CTX_STATUS_ACTIVE_IDLE | \ + GEN8_CTX_STATUS_PREEMPTED | \ + GEN8_CTX_STATUS_ELEMENT_SWITCH) + #define CTX_LRI_HEADER_0 0x01 #define CTX_CONTEXT_CONTROL 0x02 #define CTX_RING_HEAD 0x04 @@ -221,10 +226,16 @@ enum { /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ +#define WA_TAIL_DWORDS 2 + static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); static int intel_lr_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine); +static void execlists_init_reg_state(u32 *reg_state, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_ring *ring); /** * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists @@ -263,12 +274,10 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv)) - engine->idle_lite_restore_wa = ~0; - - engine->disable_lite_restore_wa = (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) || - IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) && - (engine->id == VCS || engine->id == VCS2); + engine->disable_lite_restore_wa = + (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) || + IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) && + (engine->id == VCS || engine->id == VCS2); engine->ctx_desc_template = GEN8_CTX_VALID; if (IS_GEN8(dev_priv)) @@ -288,7 +297,6 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) /** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context - * * @ctx: Context to work on * @engine: Engine the descriptor will be used with * @@ -297,12 +305,13 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) * expensive to calculate, we'll just do it once and cache the result, * which remains valid until the context is unpinned. * - * This is what a descriptor looks like, from LSB to MSB: - * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) - * bits 12-31: LRCA, GTT address of (the HWSP of) this context - * bits 32-52: ctx ID, a globally unique tag - * bits 53-54: mbz, reserved for use by hardware - * bits 55-63: group ID, currently unused and set to 0 + * This is what a descriptor looks like, from LSB to MSB:: + * + * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) + * bits 12-31: LRCA, GTT address of (the HWSP of) this context + * bits 32-52: ctx ID, a globally unique tag + * bits 53-54: mbz, reserved for use by hardware + * bits 55-63: group ID, currently unused and set to 0 */ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, @@ -315,7 +324,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, desc = ctx->desc_template; /* bits 3-4 */ desc |= engine->ctx_desc_template; /* bits 0-11 */ - desc |= ce->lrc_vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE; + desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE; /* bits 12-31 */ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ @@ -328,34 +337,18 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, return ctx->engine[engine->id].lrc_desc; } -static void execlists_elsp_write(struct drm_i915_gem_request *rq0, - struct drm_i915_gem_request *rq1) +static inline void +execlists_context_status_change(struct drm_i915_gem_request *rq, + unsigned long status) { + /* + * Only used when GVT-g is enabled now. When GVT-g is disabled, + * The compiler should eliminate this function as dead-code. + */ + if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) + return; - struct intel_engine_cs *engine = rq0->engine; - struct drm_i915_private *dev_priv = rq0->i915; - uint64_t desc[2]; - - if (rq1) { - desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine); - rq1->elsp_submitted++; - } else { - desc[1] = 0; - } - - desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine); - rq0->elsp_submitted++; - - /* You must always write both descriptors in the order below. */ - I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[1])); - I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[1])); - - I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[0])); - /* The context is automatically loaded after the following */ - I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[0])); - - /* ELSP is a wo register, use another nearby reg for posting */ - POSTING_READ_FW(RING_EXECLIST_STATUS_LO(engine)); + atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq); } static void @@ -367,13 +360,13 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) ASSIGN_CTX_PDP(ppgtt, reg_state, 0); } -static void execlists_update_context(struct drm_i915_gem_request *rq) +static u64 execlists_update_context(struct drm_i915_gem_request *rq) { - struct intel_engine_cs *engine = rq->engine; + struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; - uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state; + u32 *reg_state = ce->lrc_reg_state; - reg_state[CTX_RING_TAIL+1] = rq->tail; + reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail); /* True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. @@ -382,321 +375,236 @@ static void execlists_update_context(struct drm_i915_gem_request *rq) */ if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) execlists_update_context_pdps(ppgtt, reg_state); + + return ce->lrc_desc; } -static void execlists_submit_requests(struct drm_i915_gem_request *rq0, - struct drm_i915_gem_request *rq1) +static void execlists_submit_ports(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = rq0->i915; - unsigned int fw_domains = rq0->engine->fw_domains; - - execlists_update_context(rq0); + struct drm_i915_private *dev_priv = engine->i915; + struct execlist_port *port = engine->execlist_port; + u32 __iomem *elsp = + dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + u64 desc[2]; - if (rq1) - execlists_update_context(rq1); + if (!port[0].count) + execlists_context_status_change(port[0].request, + INTEL_CONTEXT_SCHEDULE_IN); + desc[0] = execlists_update_context(port[0].request); + engine->preempt_wa = port[0].count++; /* bdw only? fixed on skl? */ - spin_lock_irq(&dev_priv->uncore.lock); - intel_uncore_forcewake_get__locked(dev_priv, fw_domains); + if (port[1].request) { + GEM_BUG_ON(port[1].count); + execlists_context_status_change(port[1].request, + INTEL_CONTEXT_SCHEDULE_IN); + desc[1] = execlists_update_context(port[1].request); + port[1].count = 1; + } else { + desc[1] = 0; + } + GEM_BUG_ON(desc[0] == desc[1]); - execlists_elsp_write(rq0, rq1); + /* You must always write both descriptors in the order below. */ + writel(upper_32_bits(desc[1]), elsp); + writel(lower_32_bits(desc[1]), elsp); - intel_uncore_forcewake_put__locked(dev_priv, fw_domains); - spin_unlock_irq(&dev_priv->uncore.lock); + writel(upper_32_bits(desc[0]), elsp); + /* The context is automatically loaded after the following */ + writel(lower_32_bits(desc[0]), elsp); } -static inline void execlists_context_status_change( - struct drm_i915_gem_request *rq, - unsigned long status) +static bool ctx_single_port_submission(const struct i915_gem_context *ctx) { - /* - * Only used when GVT-g is enabled now. When GVT-g is disabled, - * The compiler should eliminate this function as dead-code. - */ - if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) - return; - - atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq); + return (IS_ENABLED(CONFIG_DRM_I915_GVT) && + ctx->execlists_force_single_submission); } -static void execlists_context_unqueue(struct intel_engine_cs *engine) +static bool can_merge_ctx(const struct i915_gem_context *prev, + const struct i915_gem_context *next) { - struct drm_i915_gem_request *req0 = NULL, *req1 = NULL; - struct drm_i915_gem_request *cursor, *tmp; + if (prev != next) + return false; - assert_spin_locked(&engine->execlist_lock); + if (ctx_single_port_submission(prev)) + return false; - /* - * If irqs are not active generate a warning as batches that finish - * without the irqs may get lost and a GPU Hang may occur. - */ - WARN_ON(!intel_irqs_enabled(engine->i915)); - - /* Try to read in pairs */ - list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue, - execlist_link) { - if (!req0) { - req0 = cursor; - } else if (req0->ctx == cursor->ctx) { - /* Same ctx: ignore first request, as second request - * will update tail past first request's workload */ - cursor->elsp_submitted = req0->elsp_submitted; - list_del(&req0->execlist_link); - i915_gem_request_unreference(req0); - req0 = cursor; - } else { - if (IS_ENABLED(CONFIG_DRM_I915_GVT)) { - /* - * req0 (after merged) ctx requires single - * submission, stop picking - */ - if (req0->ctx->execlists_force_single_submission) - break; - /* - * req0 ctx doesn't require single submission, - * but next req ctx requires, stop picking - */ - if (cursor->ctx->execlists_force_single_submission) - break; - } - req1 = cursor; - WARN_ON(req1->elsp_submitted); - break; - } - } + return true; +} - if (unlikely(!req0)) - return; +static void execlists_dequeue(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_request *cursor, *last; + struct execlist_port *port = engine->execlist_port; + bool submit = false; + + last = port->request; + if (last) + /* WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL + * as we resubmit the request. See gen8_emit_request() + * for where we prepare the padding after the end of the + * request. + */ + last->tail = last->wa_tail; - execlists_context_status_change(req0, INTEL_CONTEXT_SCHEDULE_IN); + GEM_BUG_ON(port[1].request); - if (req1) - execlists_context_status_change(req1, - INTEL_CONTEXT_SCHEDULE_IN); + /* Hardware submission is through 2 ports. Conceptually each port + * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is + * static for a context, and unique to each, so we only execute + * requests belonging to a single context from each ring. RING_HEAD + * is maintained by the CS in the context image, it marks the place + * where it got up to last time, and through RING_TAIL we tell the CS + * where we want to execute up to this time. + * + * In this list the requests are in order of execution. Consecutive + * requests from the same context are adjacent in the ringbuffer. We + * can combine these requests into a single RING_TAIL update: + * + * RING_HEAD...req1...req2 + * ^- RING_TAIL + * since to execute req2 the CS must first execute req1. + * + * Our goal then is to point each port to the end of a consecutive + * sequence of requests as being the most optimal (fewest wake ups + * and context switches) submission. + */ - if (req0->elsp_submitted & engine->idle_lite_restore_wa) { - /* - * WaIdleLiteRestore: make sure we never cause a lite restore - * with HEAD==TAIL. + spin_lock(&engine->execlist_lock); + list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) { + /* Can we combine this request with the current port? It has to + * be the same context/ringbuffer and not have any exceptions + * (e.g. GVT saying never to combine contexts). * - * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL as we - * resubmit the request. See gen8_emit_request() for where we - * prepare the padding after the end of the request. + * If we can combine the requests, we can execute both by + * updating the RING_TAIL to point to the end of the second + * request, and so we never need to tell the hardware about + * the first. */ - struct intel_ringbuffer *ringbuf; + if (last && !can_merge_ctx(cursor->ctx, last->ctx)) { + /* If we are on the second port and cannot combine + * this request with the last, then we are done. + */ + if (port != engine->execlist_port) + break; + + /* If GVT overrides us we only ever submit port[0], + * leaving port[1] empty. Note that we also have + * to be careful that we don't queue the same + * context (even though a different request) to + * the second port. + */ + if (ctx_single_port_submission(cursor->ctx)) + break; + + GEM_BUG_ON(last->ctx == cursor->ctx); + + i915_gem_request_assign(&port->request, last); + port++; + } + last = cursor; + submit = true; + } + if (submit) { + /* Decouple all the requests submitted from the queue */ + engine->execlist_queue.next = &cursor->execlist_link; + cursor->execlist_link.prev = &engine->execlist_queue; - ringbuf = req0->ctx->engine[engine->id].ringbuf; - req0->tail += 8; - req0->tail &= ringbuf->size - 1; + i915_gem_request_assign(&port->request, last); } + spin_unlock(&engine->execlist_lock); - execlists_submit_requests(req0, req1); + if (submit) + execlists_submit_ports(engine); } -static unsigned int -execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id) +static bool execlists_elsp_idle(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *head_req; - - assert_spin_locked(&engine->execlist_lock); - - head_req = list_first_entry_or_null(&engine->execlist_queue, - struct drm_i915_gem_request, - execlist_link); - - if (WARN_ON(!head_req || (head_req->ctx_hw_id != ctx_id))) - return 0; - - WARN(head_req->elsp_submitted == 0, "Never submitted head request\n"); - - if (--head_req->elsp_submitted > 0) - return 0; - - execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT); - - list_del(&head_req->execlist_link); - i915_gem_request_unreference(head_req); - - return 1; + return !engine->execlist_port[0].request; } -static u32 -get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer, - u32 *context_id) +static bool execlists_elsp_ready(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - u32 status; + int port; - read_pointer %= GEN8_CSB_ENTRIES; + port = 1; /* wait for a free slot */ + if (engine->disable_lite_restore_wa || engine->preempt_wa) + port = 0; /* wait for GPU to be idle before continuing */ - status = I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(engine, read_pointer)); - - if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) - return 0; - - *context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(engine, - read_pointer)); - - return status; + return !engine->execlist_port[port].request; } -/** - * intel_lrc_irq_handler() - handle Context Switch interrupts - * @data: tasklet handler passed in unsigned long - * +/* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. */ static void intel_lrc_irq_handler(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct execlist_port *port = engine->execlist_port; struct drm_i915_private *dev_priv = engine->i915; - u32 status_pointer; - unsigned int read_pointer, write_pointer; - u32 csb[GEN8_CSB_ENTRIES][2]; - unsigned int csb_read = 0, i; - unsigned int submit_contexts = 0; intel_uncore_forcewake_get(dev_priv, engine->fw_domains); - status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(engine)); - - read_pointer = engine->next_context_status_buffer; - write_pointer = GEN8_CSB_WRITE_PTR(status_pointer); - if (read_pointer > write_pointer) - write_pointer += GEN8_CSB_ENTRIES; - - while (read_pointer < write_pointer) { - if (WARN_ON_ONCE(csb_read == GEN8_CSB_ENTRIES)) - break; - csb[csb_read][0] = get_context_status(engine, ++read_pointer, - &csb[csb_read][1]); - csb_read++; - } - - engine->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES; - - /* Update the read pointer to the old write pointer. Manual ringbuffer - * management ftw </sarcasm> */ - I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(engine), - _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, - engine->next_context_status_buffer << 8)); - - intel_uncore_forcewake_put(dev_priv, engine->fw_domains); - - spin_lock(&engine->execlist_lock); + if (!execlists_elsp_idle(engine)) { + u32 __iomem *csb_mmio = + dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); + u32 __iomem *buf = + dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)); + unsigned int csb, head, tail; + + csb = readl(csb_mmio); + head = GEN8_CSB_READ_PTR(csb); + tail = GEN8_CSB_WRITE_PTR(csb); + if (tail < head) + tail += GEN8_CSB_ENTRIES; + while (head < tail) { + unsigned int idx = ++head % GEN8_CSB_ENTRIES; + unsigned int status = readl(buf + 2 * idx); + + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) + continue; + + GEM_BUG_ON(port[0].count == 0); + if (--port[0].count == 0) { + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); + execlists_context_status_change(port[0].request, + INTEL_CONTEXT_SCHEDULE_OUT); + + i915_gem_request_put(port[0].request); + port[0] = port[1]; + memset(&port[1], 0, sizeof(port[1])); + + engine->preempt_wa = false; + } - for (i = 0; i < csb_read; i++) { - if (unlikely(csb[i][0] & GEN8_CTX_STATUS_PREEMPTED)) { - if (csb[i][0] & GEN8_CTX_STATUS_LITE_RESTORE) { - if (execlists_check_remove_request(engine, csb[i][1])) - WARN(1, "Lite Restored request removed from queue\n"); - } else - WARN(1, "Preemption without Lite Restore\n"); + GEM_BUG_ON(port[0].count == 0 && + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); } - if (csb[i][0] & (GEN8_CTX_STATUS_ACTIVE_IDLE | - GEN8_CTX_STATUS_ELEMENT_SWITCH)) - submit_contexts += - execlists_check_remove_request(engine, csb[i][1]); - } - - if (submit_contexts) { - if (!engine->disable_lite_restore_wa || - (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE)) - execlists_context_unqueue(engine); + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, + GEN8_CSB_WRITE_PTR(csb) << 8), + csb_mmio); } - spin_unlock(&engine->execlist_lock); + if (execlists_elsp_ready(engine)) + execlists_dequeue(engine); - if (unlikely(submit_contexts > 2)) - DRM_ERROR("More than two context complete events?\n"); + intel_uncore_forcewake_put(dev_priv, engine->fw_domains); } -static void execlists_context_queue(struct drm_i915_gem_request *request) +static void execlists_submit_request(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; - struct drm_i915_gem_request *cursor; - int num_elements = 0; - - spin_lock_bh(&engine->execlist_lock); + unsigned long flags; - list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) - if (++num_elements > 2) - break; + spin_lock_irqsave(&engine->execlist_lock, flags); - if (num_elements > 2) { - struct drm_i915_gem_request *tail_req; - - tail_req = list_last_entry(&engine->execlist_queue, - struct drm_i915_gem_request, - execlist_link); - - if (request->ctx == tail_req->ctx) { - WARN(tail_req->elsp_submitted != 0, - "More than 2 already-submitted reqs queued\n"); - list_del(&tail_req->execlist_link); - i915_gem_request_unreference(tail_req); - } - } - - i915_gem_request_reference(request); list_add_tail(&request->execlist_link, &engine->execlist_queue); - request->ctx_hw_id = request->ctx->hw_id; - if (num_elements == 0) - execlists_context_unqueue(engine); + if (execlists_elsp_idle(engine)) + tasklet_hi_schedule(&engine->irq_tasklet); - spin_unlock_bh(&engine->execlist_lock); -} - -static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - uint32_t flush_domains; - int ret; - - flush_domains = 0; - if (engine->gpu_caches_dirty) - flush_domains = I915_GEM_GPU_DOMAINS; - - ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - if (ret) - return ret; - - engine->gpu_caches_dirty = false; - return 0; -} - -static int execlists_move_to_gpu(struct drm_i915_gem_request *req, - struct list_head *vmas) -{ - const unsigned other_rings = ~intel_engine_flag(req->engine); - struct i915_vma *vma; - uint32_t flush_domains = 0; - bool flush_chipset = false; - int ret; - - list_for_each_entry(vma, vmas, exec_list) { - struct drm_i915_gem_object *obj = vma->obj; - - if (obj->active & other_rings) { - ret = i915_gem_object_sync(obj, req->engine, &req); - if (ret) - return ret; - } - - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - flush_chipset |= i915_gem_clflush_object(obj, false); - - flush_domains |= obj->base.write_domain; - } - - if (flush_domains & I915_GEM_DOMAIN_GTT) - wmb(); - - /* Unconditionally invalidate gpu caches and ensure that we do flush - * any residual writes from the previous batch. - */ - return logical_ring_invalidate_all_caches(req); + spin_unlock_irqrestore(&engine->execlist_lock, flags); } int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) @@ -717,7 +625,11 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request return ret; } - request->ringbuf = ce->ringbuf; + request->ring = ce->ring; + + ret = intel_lr_context_pin(request->ctx, engine); + if (ret) + return ret; if (i915.enable_guc_submission) { /* @@ -725,23 +637,19 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request * going any further, as the i915_add_request() call * later on mustn't fail ... */ - ret = i915_guc_wq_check_space(request); + ret = i915_guc_wq_reserve(request); if (ret) - return ret; + goto err_unpin; } - ret = intel_lr_context_pin(request->ctx, engine); - if (ret) - return ret; - ret = intel_ring_begin(request, 0); if (ret) - goto err_unpin; + goto err_unreserve; if (!ce->initialised) { ret = engine->init_context(request); if (ret) - goto err_unpin; + goto err_unreserve; ce->initialised = true; } @@ -756,13 +664,16 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request request->reserved_space -= EXECLISTS_REQUEST_SIZE; return 0; +err_unreserve: + if (i915.enable_guc_submission) + i915_guc_wq_unreserve(request); err_unpin: intel_lr_context_unpin(request->ctx, engine); return ret; } /* - * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload + * intel_logical_ring_advance() - advance the tail and prepare for submission * @request: Request to advance the logical ringbuffer of. * * The tail is updated in our logical ringbuffer struct, not in the actual context. What @@ -771,13 +682,13 @@ err_unpin: * point, the tail *inside* the context is updated and the ELSP written to. */ static int -intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) +intel_logical_ring_advance(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ringbuf = request->ringbuf; + struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; - intel_logical_ring_advance(ringbuf); - request->tail = ringbuf->tail; + intel_ring_advance(ring); + request->tail = ring->tail; /* * Here we add two extra NOOPs as padding to avoid @@ -785,9 +696,10 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) * * Caller must reserve WA_TAIL_DWORDS for us! */ - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + request->wa_tail = ring->tail; /* We keep the previous context alive until we retire the following * request. This ensures that any the context object is still pinned @@ -797,168 +709,14 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) */ request->previous_context = engine->last_context; engine->last_context = request->ctx; - - if (i915.enable_guc_submission) - i915_guc_submit(request); - else - execlists_context_queue(request); - - return 0; -} - -/** - * execlists_submission() - submit a batchbuffer for execution, Execlists style - * @params: execbuffer call parameters. - * @args: execbuffer call arguments. - * @vmas: list of vmas. - * - * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts - * away the submission details of the execbuffer ioctl call. - * - * Return: non-zero if the submission fails. - */ -int intel_execlists_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas) -{ - struct drm_device *dev = params->dev; - struct intel_engine_cs *engine = params->engine; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ringbuffer *ringbuf = params->ctx->engine[engine->id].ringbuf; - u64 exec_start; - int instp_mode; - u32 instp_mask; - int ret; - - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; - instp_mask = I915_EXEC_CONSTANTS_MASK; - switch (instp_mode) { - case I915_EXEC_CONSTANTS_REL_GENERAL: - case I915_EXEC_CONSTANTS_ABSOLUTE: - case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) { - DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); - return -EINVAL; - } - - if (instp_mode != dev_priv->relative_constants_mode) { - if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { - DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); - return -EINVAL; - } - - /* The HW changed the meaning on this bit on gen6 */ - instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; - } - break; - default: - DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); - return -EINVAL; - } - - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { - DRM_DEBUG("sol reset is gen7 only\n"); - return -EINVAL; - } - - ret = execlists_move_to_gpu(params->request, vmas); - if (ret) - return ret; - - if (engine == &dev_priv->engine[RCS] && - instp_mode != dev_priv->relative_constants_mode) { - ret = intel_ring_begin(params->request, 4); - if (ret) - return ret; - - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); - intel_logical_ring_emit_reg(ringbuf, INSTPM); - intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode); - intel_logical_ring_advance(ringbuf); - - dev_priv->relative_constants_mode = instp_mode; - } - - exec_start = params->batch_obj_vm_offset + - args->batch_start_offset; - - ret = engine->emit_bb_start(params->request, exec_start, params->dispatch_flags); - if (ret) - return ret; - - trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); - - i915_gem_execbuffer_move_to_active(vmas, params->request); - - return 0; -} - -void intel_execlists_cancel_requests(struct intel_engine_cs *engine) -{ - struct drm_i915_gem_request *req, *tmp; - LIST_HEAD(cancel_list); - - WARN_ON(!mutex_is_locked(&engine->i915->drm.struct_mutex)); - - spin_lock_bh(&engine->execlist_lock); - list_replace_init(&engine->execlist_queue, &cancel_list); - spin_unlock_bh(&engine->execlist_lock); - - list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) { - list_del(&req->execlist_link); - i915_gem_request_unreference(req); - } -} - -void intel_logical_ring_stop(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - if (!intel_engine_initialized(engine)) - return; - - ret = intel_engine_idle(engine); - if (ret) - DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", - engine->name, ret); - - /* TODO: Is this correct with Execlists enabled? */ - I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); - if (intel_wait_for_register(dev_priv, - RING_MI_MODE(engine->mmio_base), - MODE_IDLE, MODE_IDLE, - 1000)) { - DRM_ERROR("%s :timed out trying to stop ring\n", engine->name); - return; - } - I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); -} - -int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - int ret; - - if (!engine->gpu_caches_dirty) - return 0; - - ret = engine->emit_flush(req, 0, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; - - engine->gpu_caches_dirty = false; return 0; } static int intel_lr_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = ctx->i915; struct intel_context *ce = &ctx->engine[engine->id]; void *vaddr; - u32 *lrc_reg_state; int ret; lockdep_assert_held(&ctx->i915->drm.struct_mutex); @@ -966,41 +724,42 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ce->pin_count++) return 0; - ret = i915_gem_obj_ggtt_pin(ce->state, GEN8_LR_CONTEXT_ALIGN, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL); if (ret) goto err; - vaddr = i915_gem_object_pin_map(ce->state); + vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); - goto unpin_ctx_obj; + goto unpin_vma; } - lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - - ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ce->ringbuf); + ret = intel_ring_pin(ce->ring); if (ret) goto unpin_map; - i915_gem_context_reference(ctx); - ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state); intel_lr_context_descriptor_update(ctx, engine); - lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ringbuf->vma->node.start; - ce->lrc_reg_state = lrc_reg_state; - ce->state->dirty = true; + ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; + ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = + i915_ggtt_offset(ce->ring->vma); + + ce->state->obj->dirty = true; /* Invalidate GuC TLB. */ - if (i915.enable_guc_submission) + if (i915.enable_guc_submission) { + struct drm_i915_private *dev_priv = ctx->i915; I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + } + i915_gem_context_get(ctx); return 0; unpin_map: - i915_gem_object_unpin_map(ce->state); -unpin_ctx_obj: - i915_gem_object_ggtt_unpin(ce->state); + i915_gem_object_unpin_map(ce->state->obj); +unpin_vma: + __i915_vma_unpin(ce->state); err: ce->pin_count = 0; return ret; @@ -1017,30 +776,24 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, if (--ce->pin_count) return; - intel_unpin_ringbuffer_obj(ce->ringbuf); + intel_ring_unpin(ce->ring); - i915_gem_object_unpin_map(ce->state); - i915_gem_object_ggtt_unpin(ce->state); + i915_gem_object_unpin_map(ce->state->obj); + i915_vma_unpin(ce->state); - ce->lrc_vma = NULL; - ce->lrc_desc = 0; - ce->lrc_reg_state = NULL; - - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); } static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) { int ret, i; - struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ring *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; if (w->count == 0) return 0; - engine->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1048,17 +801,16 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); for (i = 0; i < w->count; i++) { - intel_logical_ring_emit_reg(ringbuf, w->reg[i].addr); - intel_logical_ring_emit(ringbuf, w->reg[i].value); + intel_ring_emit_reg(ring, w->reg[i].addr); + intel_ring_emit(ring, w->reg[i].value); } - intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_advance(ring); - engine->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1094,7 +846,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) * code duplication. */ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, - uint32_t *const batch, + uint32_t *batch, uint32_t index) { struct drm_i915_private *dev_priv = engine->i915; @@ -1113,7 +865,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256); + wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); wa_ctx_emit(batch, index, 0); wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); @@ -1131,7 +883,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256); + wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); wa_ctx_emit(batch, index, 0); return index; @@ -1156,37 +908,24 @@ static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, return 0; } -/** - * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA - * - * @engine: only applicable for RCS - * @wa_ctx: structure representing wa_ctx - * offset: specifies start of the batch, should be cache-aligned. This is updated - * with the offset value received as input. - * size: size of the batch in DWORDS but HW expects in terms of cachelines - * @batch: page in which WA are loaded - * @offset: This field specifies the start of the batch, it should be - * cache-aligned otherwise it is adjusted accordingly. - * Typically we only have one indirect_ctx and per_ctx batch buffer which are - * initialized at the beginning and shared across all contexts but this field - * helps us to have multiple batches at different offsets and select them based - * on a criteria. At the moment this batch always start at the beginning of the page - * and at this point we don't have multiple wa_ctx batch buffers. - * - * The number of WA applied are not known at the beginning; we use this field - * to return the no of DWORDS written. +/* + * Typically we only have one indirect_ctx and per_ctx batch buffer which are + * initialized at the beginning and shared across all contexts but this field + * helps us to have multiple batches at different offsets and select them based + * on a criteria. At the moment this batch always start at the beginning of the page + * and at this point we don't have multiple wa_ctx batch buffers. * - * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END - * so it adds NOOPs as padding to make it cacheline aligned. - * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together - * makes a complete batch buffer. + * The number of WA applied are not known at the beginning; we use this field + * to return the no of DWORDS written. * - * Return: non-zero if we exceed the PAGE_SIZE limit. + * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END + * so it adds NOOPs as padding to make it cacheline aligned. + * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together + * makes a complete batch buffer. */ - static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { uint32_t scratch_addr; @@ -1205,7 +944,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* Actual scratch location is at 128 bytes offset */ - scratch_addr = engine->scratch.gtt_offset + 2*CACHELINE_BYTES; + scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | @@ -1230,26 +969,18 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); } -/** - * gen8_init_perctx_bb() - initialize per ctx batch with WA - * - * @engine: only applicable for RCS - * @wa_ctx: structure representing wa_ctx - * offset: specifies start of the batch, should be cache-aligned. - * size: size of the batch in DWORDS but HW expects in terms of cachelines - * @batch: page in which WA are loaded - * @offset: This field specifies the start of this batch. - * This batch is started immediately after indirect_ctx batch. Since we ensure - * that indirect_ctx ends on a cacheline this batch is aligned automatically. +/* + * This batch is started immediately after indirect_ctx batch. Since we ensure + * that indirect_ctx ends on a cacheline this batch is aligned automatically. * - * The number of DWORDS written are returned using this field. + * The number of DWORDS written are returned using this field. * * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. */ static int gen8_init_perctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); @@ -1264,7 +995,7 @@ static int gen8_init_perctx_bb(struct intel_engine_cs *engine, static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { int ret; @@ -1282,11 +1013,18 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, return ret; index = ret; + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl */ + wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); + wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2); + wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE( + GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE)); + wa_ctx_emit(batch, index, MI_NOOP); + /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { - uint32_t scratch_addr - = engine->scratch.gtt_offset + 2*CACHELINE_BYTES; + u32 scratch_addr = + i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | @@ -1332,7 +1070,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, static int gen9_init_perctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); @@ -1378,44 +1116,44 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine, static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) { - int ret; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; - engine->wa_ctx.obj = i915_gem_object_create(&engine->i915->drm, - PAGE_ALIGN(size)); - if (IS_ERR(engine->wa_ctx.obj)) { - DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n"); - ret = PTR_ERR(engine->wa_ctx.obj); - engine->wa_ctx.obj = NULL; - return ret; - } + obj = i915_gem_object_create(&engine->i915->drm, PAGE_ALIGN(size)); + if (IS_ERR(obj)) + return PTR_ERR(obj); - ret = i915_gem_obj_ggtt_pin(engine->wa_ctx.obj, PAGE_SIZE, 0); - if (ret) { - DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", - ret); - drm_gem_object_unreference(&engine->wa_ctx.obj->base); - return ret; + vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; } + err = i915_vma_pin(vma, 0, PAGE_SIZE, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err; + + engine->wa_ctx.vma = vma; return 0; + +err: + i915_gem_object_put(obj); + return err; } static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine) { - if (engine->wa_ctx.obj) { - i915_gem_object_ggtt_unpin(engine->wa_ctx.obj); - drm_gem_object_unreference(&engine->wa_ctx.obj->base); - engine->wa_ctx.obj = NULL; - } + i915_vma_unpin_and_release(&engine->wa_ctx.vma); } static int intel_init_workaround_bb(struct intel_engine_cs *engine) { - int ret; + struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; uint32_t *batch; uint32_t offset; struct page *page; - struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; + int ret; WARN_ON(engine->id != RCS); @@ -1427,7 +1165,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) } /* some WA perform writes to scratch page, ensure it is valid */ - if (engine->scratch.obj == NULL) { + if (!engine->scratch) { DRM_ERROR("scratch page not allocated for %s\n", engine->name); return -EINVAL; } @@ -1438,7 +1176,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } - page = i915_gem_object_get_dirty_page(wa_ctx->obj, 0); + page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); batch = kmap_atomic(page); offset = 0; @@ -1485,55 +1223,37 @@ static void lrc_init_hws(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; I915_WRITE(RING_HWS_PGA(engine->mmio_base), - (u32)engine->status_page.gfx_addr); + engine->status_page.ggtt_offset); POSTING_READ(RING_HWS_PGA(engine->mmio_base)); } static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned int next_context_status_buffer_hw; + int ret; + + ret = intel_mocs_init_engine(engine); + if (ret) + return ret; lrc_init_hws(engine); - I915_WRITE_IMR(engine, - ~(engine->irq_enable_mask | engine->irq_keep_mask)); + intel_engine_reset_breadcrumbs(engine); + I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); I915_WRITE(RING_MODE_GEN7(engine), _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); - POSTING_READ(RING_MODE_GEN7(engine)); - - /* - * Instead of resetting the Context Status Buffer (CSB) read pointer to - * zero, we need to read the write pointer from hardware and use its - * value because "this register is power context save restored". - * Effectively, these states have been observed: - * - * | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) | - * BDW | CSB regs not reset | CSB regs reset | - * CHT | CSB regs not reset | CSB regs not reset | - * SKL | ? | ? | - * BXT | ? | ? | - */ - next_context_status_buffer_hw = - GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine))); - - /* - * When the CSB registers are reset (also after power-up / gpu reset), - * CSB write pointer is set to all 1's, which is not valid, use '5' in - * this special case, so the first element read is CSB[0]. - */ - if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK) - next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1); - engine->next_context_status_buffer = next_context_status_buffer_hw; DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name); intel_engine_init_hangcheck(engine); - return intel_mocs_init_engine(engine); + if (!execlists_elsp_idle(engine)) + execlists_submit_ports(engine); + + return 0; } static int gen8_init_render_ring(struct intel_engine_cs *engine) @@ -1569,11 +1289,57 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) return init_workarounds_ring(engine); } +static void reset_common_ring(struct intel_engine_cs *engine, + struct drm_i915_gem_request *request) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct execlist_port *port = engine->execlist_port; + struct intel_context *ce = &request->ctx->engine[engine->id]; + + /* We want a simple context + ring to execute the breadcrumb update. + * We cannot rely on the context being intact across the GPU hang, + * so clear it and rebuild just what we need for the breadcrumb. + * All pending requests for this context will be zapped, and any + * future request will be after userspace has had the opportunity + * to recreate its own state. + */ + execlists_init_reg_state(ce->lrc_reg_state, + request->ctx, engine, ce->ring); + + /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ + ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = + i915_ggtt_offset(ce->ring->vma); + ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix; + + request->ring->head = request->postfix; + request->ring->last_retired_head = -1; + intel_ring_update_space(request->ring); + + if (i915.enable_guc_submission) + return; + + /* Catch up with any missed context-switch interrupts */ + I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0)); + if (request->ctx != port[0].request->ctx) { + i915_gem_request_put(port[0].request); + port[0] = port[1]; + memset(&port[1], 0, sizeof(port[1])); + } + + /* CS is stopped, and we will resubmit both ports on resume */ + GEM_BUG_ON(request->ctx != port[0].request->ctx); + port[0].count = 0; + port[1].count = 0; + + /* Reset WaIdleLiteRestore:bdw,skl as well */ + request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32); +} + static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ringbuf = req->ringbuf; const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; int i, ret; @@ -1581,28 +1347,27 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) if (ret) return ret; - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_lri_cmds)); for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - intel_logical_ring_emit_reg(ringbuf, - GEN8_RING_PDP_UDW(engine, i)); - intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr)); - intel_logical_ring_emit_reg(ringbuf, - GEN8_RING_PDP_LDW(engine, i)); - intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, i)); + intel_ring_emit(ring, upper_32_bits(pd_daddr)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, i)); + intel_ring_emit(ring, lower_32_bits(pd_daddr)); } - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } static int gen8_emit_bb_start(struct drm_i915_gem_request *req, - u64 offset, unsigned dispatch_flags) + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ring *ring = req->ring; bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); int ret; @@ -1629,14 +1394,14 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | - (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); - intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | + (ppgtt<<8) | + (dispatch_flags & I915_DISPATCH_RS ? + MI_BATCH_RESOURCE_STREAMER : 0)); + intel_ring_emit(ring, lower_32_bits(offset)); + intel_ring_emit(ring, upper_32_bits(offset)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -1655,14 +1420,10 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) I915_WRITE_IMR(engine, ~engine->irq_keep_mask); } -static int gen8_emit_flush(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 unused) +static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ringbuffer *ringbuf = request->ringbuf; - struct intel_engine_cs *engine = ringbuf->engine; - struct drm_i915_private *dev_priv = request->i915; - uint32_t cmd; + struct intel_ring *ring = request->ring; + u32 cmd; int ret; ret = intel_ring_begin(request, 4); @@ -1678,30 +1439,30 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, */ cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - if (invalidate_domains & I915_GEM_GPU_DOMAINS) { + if (mode & EMIT_INVALIDATE) { cmd |= MI_INVALIDATE_TLB; - if (engine == &dev_priv->engine[VCS]) + if (request->engine->id == VCS) cmd |= MI_INVALIDATE_BSD; } - intel_logical_ring_emit(ringbuf, cmd); - intel_logical_ring_emit(ringbuf, - I915_GEM_HWS_SCRATCH_ADDR | - MI_FLUSH_DW_USE_GTT); - intel_logical_ring_emit(ringbuf, 0); /* upper addr */ - intel_logical_ring_emit(ringbuf, 0); /* value */ - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, + I915_GEM_HWS_SCRATCH_ADDR | + MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ + intel_ring_advance(ring); return 0; } static int gen8_emit_flush_render(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 flush_domains) + u32 mode) { - struct intel_ringbuffer *ringbuf = request->ringbuf; - struct intel_engine_cs *engine = ringbuf->engine; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + struct intel_ring *ring = request->ring; + struct intel_engine_cs *engine = request->engine; + u32 scratch_addr = + i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; u32 flags = 0; int ret; @@ -1709,14 +1470,14 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, flags |= PIPE_CONTROL_CS_STALL; - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -1751,40 +1512,40 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, return ret; if (vf_flush_wa) { - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); } if (dc_flush_wa) { - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, PIPE_CONTROL_DC_FLUSH_ENABLE); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, PIPE_CONTROL_DC_FLUSH_ENABLE); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); } - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, flags); - intel_logical_ring_emit(ringbuf, scratch_addr); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); if (dc_flush_wa) { - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, PIPE_CONTROL_CS_STALL); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, PIPE_CONTROL_CS_STALL); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); } - intel_logical_ring_advance(ringbuf); + intel_ring_advance(ring); return 0; } @@ -1809,11 +1570,10 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -#define WA_TAIL_DWORDS 2 static int gen8_emit_request(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ringbuf = request->ringbuf; + struct intel_ring *ring = request->ring; int ret; ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS); @@ -1823,21 +1583,20 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - intel_logical_ring_emit(ringbuf, - (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); - intel_logical_ring_emit(ringbuf, - intel_hws_seqno_address(request->engine) | - MI_FLUSH_DW_USE_GTT); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, request->seqno); - intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); - intel_logical_ring_emit(ringbuf, MI_NOOP); - return intel_logical_ring_advance_and_submit(request); + intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); + intel_ring_emit(ring, + intel_hws_seqno_address(request->engine) | + MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, request->fence.seqno); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_emit(ring, MI_NOOP); + return intel_logical_ring_advance(request); } static int gen8_emit_request_render(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ringbuf = request->ringbuf; + struct intel_ring *ring = request->ring; int ret; ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS); @@ -1851,50 +1610,19 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request) * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, - (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - intel_logical_ring_emit(ringbuf, - intel_hws_seqno_address(request->engine)); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request)); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, + (PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE)); + intel_ring_emit(ring, intel_hws_seqno_address(request->engine)); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, i915_gem_request_get_seqno(request)); /* We're thrashing one dword of HWS. */ - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); - intel_logical_ring_emit(ringbuf, MI_NOOP); - return intel_logical_ring_advance_and_submit(request); -} - -static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req) -{ - struct render_state so; - int ret; - - ret = i915_gem_render_state_prepare(req->engine, &so); - if (ret) - return ret; - - if (so.rodata == NULL) - return 0; - - ret = req->engine->emit_bb_start(req, so.ggtt_offset, - I915_DISPATCH_SECURE); - if (ret) - goto out; - - ret = req->engine->emit_bb_start(req, - (so.ggtt_offset + so.aux_batch_offset), - I915_DISPATCH_SECURE); - if (ret) - goto out; - - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); - -out: - i915_gem_render_state_fini(&so); - return ret; + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_emit(ring, MI_NOOP); + return intel_logical_ring_advance(request); } static int gen8_init_rcs_context(struct drm_i915_gem_request *req) @@ -1913,14 +1641,12 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) if (ret) DRM_ERROR("MOCS failed to program: expect performance issues.\n"); - return intel_lr_context_render_state_init(req); + return i915_gem_render_state_init(req); } /** * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer - * * @engine: Engine Command Streamer. - * */ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) { @@ -1939,39 +1665,42 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) dev_priv = engine->i915; if (engine->buffer) { - intel_logical_ring_stop(engine); WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0); } if (engine->cleanup) engine->cleanup(engine); - i915_cmd_parser_fini_ring(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); - - intel_engine_fini_breadcrumbs(engine); + intel_engine_cleanup_common(engine); - if (engine->status_page.obj) { - i915_gem_object_unpin_map(engine->status_page.obj); - engine->status_page.obj = NULL; + if (engine->status_page.vma) { + i915_gem_object_unpin_map(engine->status_page.vma->obj); + engine->status_page.vma = NULL; } intel_lr_context_unpin(dev_priv->kernel_context, engine); - engine->idle_lite_restore_wa = 0; - engine->disable_lite_restore_wa = false; - engine->ctx_desc_template = 0; - lrc_destroy_wa_ctx_obj(engine); engine->i915 = NULL; } +void intel_execlists_enable_submission(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + + for_each_engine(engine, dev_priv) + engine->submit_request = execlists_submit_request; +} + static void logical_ring_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overriden by each engine. */ engine->init_hw = gen8_init_common_ring; - engine->emit_request = gen8_emit_request; + engine->reset_hw = reset_common_ring; engine->emit_flush = gen8_emit_flush; + engine->emit_request = gen8_emit_request; + engine->submit_request = execlists_submit_request; + engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; engine->emit_bb_start = gen8_emit_bb_start; @@ -1980,41 +1709,71 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) } static inline void -logical_ring_default_irqs(struct intel_engine_cs *engine, unsigned shift) +logical_ring_default_irqs(struct intel_engine_cs *engine) { + unsigned shift = engine->irq_shift; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } static int -lrc_setup_hws(struct intel_engine_cs *engine, - struct drm_i915_gem_object *dctx_obj) +lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma) { + const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE; void *hws; /* The HWSP is part of the default context object in LRC mode. */ - engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) + - LRC_PPHWSP_PN * PAGE_SIZE; - hws = i915_gem_object_pin_map(dctx_obj); + hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); if (IS_ERR(hws)) return PTR_ERR(hws); - engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE; - engine->status_page.obj = dctx_obj; + + engine->status_page.page_addr = hws + hws_offset; + engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset; + engine->status_page.vma = vma; return 0; } +static void +logical_ring_setup(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + enum forcewake_domains fw_domains; + + intel_engine_setup_common(engine); + + /* Intentionally left blank. */ + engine->buffer = NULL; + + fw_domains = intel_uncore_forcewake_for_reg(dev_priv, + RING_ELSP(engine), + FW_REG_WRITE); + + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + RING_CONTEXT_STATUS_PTR(engine), + FW_REG_READ | FW_REG_WRITE); + + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + RING_CONTEXT_STATUS_BUF_BASE(engine), + FW_REG_READ); + + engine->fw_domains = fw_domains; + + tasklet_init(&engine->irq_tasklet, + intel_lrc_irq_handler, (unsigned long)engine); + + logical_ring_init_platform_invariants(engine); + logical_ring_default_vfuncs(engine); + logical_ring_default_irqs(engine); +} + static int logical_ring_init(struct intel_engine_cs *engine) { struct i915_gem_context *dctx = engine->i915->kernel_context; int ret; - ret = intel_engine_init_breadcrumbs(engine); - if (ret) - goto error; - - ret = i915_cmd_parser_init_ring(engine); + ret = intel_engine_init_common(engine); if (ret) goto error; @@ -2044,11 +1803,13 @@ error: return ret; } -static int logical_render_ring_init(struct intel_engine_cs *engine) +int logical_render_ring_init(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; int ret; + logical_ring_setup(engine); + if (HAS_L3_DPF(dev_priv)) engine->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; @@ -2058,11 +1819,10 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) else engine->init_hw = gen8_init_render_ring; engine->init_context = gen8_init_rcs_context; - engine->cleanup = intel_fini_pipe_control; engine->emit_flush = gen8_emit_flush_render; engine->emit_request = gen8_emit_request_render; - ret = intel_init_pipe_control(engine, 4096); + ret = intel_engine_create_scratch(engine, 4096); if (ret) return ret; @@ -2085,160 +1845,11 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) return ret; } -static const struct logical_ring_info { - const char *name; - unsigned exec_id; - unsigned guc_id; - u32 mmio_base; - unsigned irq_shift; - int (*init)(struct intel_engine_cs *engine); -} logical_rings[] = { - [RCS] = { - .name = "render ring", - .exec_id = I915_EXEC_RENDER, - .guc_id = GUC_RENDER_ENGINE, - .mmio_base = RENDER_RING_BASE, - .irq_shift = GEN8_RCS_IRQ_SHIFT, - .init = logical_render_ring_init, - }, - [BCS] = { - .name = "blitter ring", - .exec_id = I915_EXEC_BLT, - .guc_id = GUC_BLITTER_ENGINE, - .mmio_base = BLT_RING_BASE, - .irq_shift = GEN8_BCS_IRQ_SHIFT, - .init = logical_ring_init, - }, - [VCS] = { - .name = "bsd ring", - .exec_id = I915_EXEC_BSD, - .guc_id = GUC_VIDEO_ENGINE, - .mmio_base = GEN6_BSD_RING_BASE, - .irq_shift = GEN8_VCS1_IRQ_SHIFT, - .init = logical_ring_init, - }, - [VCS2] = { - .name = "bsd2 ring", - .exec_id = I915_EXEC_BSD, - .guc_id = GUC_VIDEO_ENGINE2, - .mmio_base = GEN8_BSD2_RING_BASE, - .irq_shift = GEN8_VCS2_IRQ_SHIFT, - .init = logical_ring_init, - }, - [VECS] = { - .name = "video enhancement ring", - .exec_id = I915_EXEC_VEBOX, - .guc_id = GUC_VIDEOENHANCE_ENGINE, - .mmio_base = VEBOX_RING_BASE, - .irq_shift = GEN8_VECS_IRQ_SHIFT, - .init = logical_ring_init, - }, -}; - -static struct intel_engine_cs * -logical_ring_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) -{ - const struct logical_ring_info *info = &logical_rings[id]; - struct intel_engine_cs *engine = &dev_priv->engine[id]; - enum forcewake_domains fw_domains; - - engine->id = id; - engine->name = info->name; - engine->exec_id = info->exec_id; - engine->guc_id = info->guc_id; - engine->mmio_base = info->mmio_base; - - engine->i915 = dev_priv; - - /* Intentionally left blank. */ - engine->buffer = NULL; - - fw_domains = intel_uncore_forcewake_for_reg(dev_priv, - RING_ELSP(engine), - FW_REG_WRITE); - - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - RING_CONTEXT_STATUS_PTR(engine), - FW_REG_READ | FW_REG_WRITE); - - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - RING_CONTEXT_STATUS_BUF_BASE(engine), - FW_REG_READ); - - engine->fw_domains = fw_domains; - - INIT_LIST_HEAD(&engine->active_list); - INIT_LIST_HEAD(&engine->request_list); - INIT_LIST_HEAD(&engine->buffers); - INIT_LIST_HEAD(&engine->execlist_queue); - spin_lock_init(&engine->execlist_lock); - - tasklet_init(&engine->irq_tasklet, - intel_lrc_irq_handler, (unsigned long)engine); - - logical_ring_init_platform_invariants(engine); - logical_ring_default_vfuncs(engine); - logical_ring_default_irqs(engine, info->irq_shift); - - intel_engine_init_hangcheck(engine); - i915_gem_batch_pool_init(&dev_priv->drm, &engine->batch_pool); - - return engine; -} - -/** - * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers - * @dev: DRM device. - * - * This function inits the engines for an Execlists submission style (the - * equivalent in the legacy ringbuffer submission world would be - * i915_gem_init_engines). It does it only for those engines that are present in - * the hardware. - * - * Return: non-zero if the initialization failed. - */ -int intel_logical_rings_init(struct drm_device *dev) +int logical_xcs_ring_init(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - unsigned int mask = 0; - unsigned int i; - int ret; - - WARN_ON(INTEL_INFO(dev_priv)->ring_mask & - GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); - - for (i = 0; i < ARRAY_SIZE(logical_rings); i++) { - if (!HAS_ENGINE(dev_priv, i)) - continue; - - if (!logical_rings[i].init) - continue; + logical_ring_setup(engine); - ret = logical_rings[i].init(logical_ring_setup(dev_priv, i)); - if (ret) - goto cleanup; - - mask |= ENGINE_MASK(i); - } - - /* - * Catch failures to update logical_rings table when the new engines - * are added to the driver by a warning and disabling the forgotten - * engines. - */ - if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) { - struct intel_device_info *info = - (struct intel_device_info *)&dev_priv->info; - info->ring_mask = mask; - } - - return 0; - -cleanup: - for (i = 0; i < I915_NUM_ENGINES; i++) - intel_logical_ring_cleanup(&dev_priv->engine[i]); - - return ret; + return logical_ring_init(engine); } static u32 @@ -2259,24 +1870,24 @@ make_rpcs(struct drm_i915_private *dev_priv) * must make an explicit request through RPCS for full * enablement. */ - if (INTEL_INFO(dev_priv)->has_slice_pg) { + if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) { rpcs |= GEN8_RPCS_S_CNT_ENABLE; - rpcs |= INTEL_INFO(dev_priv)->slice_total << + rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask) << GEN8_RPCS_S_CNT_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } - if (INTEL_INFO(dev_priv)->has_subslice_pg) { + if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { rpcs |= GEN8_RPCS_SS_CNT_ENABLE; - rpcs |= INTEL_INFO(dev_priv)->subslice_per_slice << + rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) << GEN8_RPCS_SS_CNT_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } - if (INTEL_INFO(dev_priv)->has_eu_pg) { - rpcs |= INTEL_INFO(dev_priv)->eu_per_subslice << + if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) { + rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; - rpcs |= INTEL_INFO(dev_priv)->eu_per_subslice << + rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } @@ -2305,38 +1916,13 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } -static int -populate_lr_context(struct i915_gem_context *ctx, - struct drm_i915_gem_object *ctx_obj, - struct intel_engine_cs *engine, - struct intel_ringbuffer *ringbuf) +static void execlists_init_reg_state(u32 *reg_state, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_ring *ring) { - struct drm_i915_private *dev_priv = ctx->i915; - struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; - void *vaddr; - u32 *reg_state; - int ret; - - if (!ppgtt) - ppgtt = dev_priv->mm.aliasing_ppgtt; - - ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); - if (ret) { - DRM_DEBUG_DRIVER("Could not set to CPU domain\n"); - return ret; - } - - vaddr = i915_gem_object_pin_map(ctx_obj); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); - return ret; - } - ctx_obj->dirty = true; - - /* The second page of the context object contains some fields which must - * be set up prior to the first execution. */ - reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; + struct drm_i915_private *dev_priv = engine->i915; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt; /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM * commands followed by (reg, value) pairs. The values we are setting here are @@ -2350,19 +1936,16 @@ populate_lr_context(struct i915_gem_context *ctx, _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | (HAS_RESOURCE_STREAMER(dev_priv) ? - CTX_CTRL_RS_CTX_ENABLE : 0))); + CTX_CTRL_RS_CTX_ENABLE : 0))); ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base), 0); - /* Ring buffer start address is not known until the buffer is pinned. - * It is written to the context image in execlists_update_context() - */ ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, RING_START(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, RING_CTL(engine->mmio_base), - ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); + ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, RING_BBADDR_UDW(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, @@ -2383,9 +1966,9 @@ populate_lr_context(struct i915_gem_context *ctx, RING_INDIRECT_CTX(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0); - if (engine->wa_ctx.obj) { + if (engine->wa_ctx.vma) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj); + u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); reg_state[CTX_RCS_INDIRECT_CTX+1] = (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) | @@ -2440,6 +2023,36 @@ populate_lr_context(struct i915_gem_context *ctx, ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, make_rpcs(dev_priv)); } +} + +static int +populate_lr_context(struct i915_gem_context *ctx, + struct drm_i915_gem_object *ctx_obj, + struct intel_engine_cs *engine, + struct intel_ring *ring) +{ + void *vaddr; + int ret; + + ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); + if (ret) { + DRM_DEBUG_DRIVER("Could not set to CPU domain\n"); + return ret; + } + + vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); + return ret; + } + ctx_obj->dirty = true; + + /* The second page of the context object contains some fields which must + * be set up prior to the first execution. */ + + execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, + ctx, engine, ring); i915_gem_object_unpin_map(ctx_obj); @@ -2484,26 +2097,14 @@ uint32_t intel_lr_context_size(struct intel_engine_cs *engine) return ret; } -/** - * execlists_context_deferred_alloc() - create the LRC specific bits of a context - * @ctx: LR context to create. - * @engine: engine to be used with the context. - * - * This function can be called more than once, with different engines, if we plan - * to use the context with them. The context backing objects and the ringbuffers - * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why - * the creation is a deferred call: it's better to make sure first that we need to use - * a given ring with the context. - * - * Return: non-zero on error. - */ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; struct intel_context *ce = &ctx->engine[engine->id]; + struct i915_vma *vma; uint32_t context_size; - struct intel_ringbuffer *ringbuf; + struct intel_ring *ring; int ret; WARN_ON(ce->state); @@ -2519,60 +2120,63 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, return PTR_ERR(ctx_obj); } - ringbuf = intel_engine_create_ringbuffer(engine, ctx->ring_size); - if (IS_ERR(ringbuf)) { - ret = PTR_ERR(ringbuf); + vma = i915_vma_create(ctx_obj, &ctx->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto error_deref_obj; + } + + ring = intel_engine_create_ring(engine, ctx->ring_size); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); goto error_deref_obj; } - ret = populate_lr_context(ctx, ctx_obj, engine, ringbuf); + ret = populate_lr_context(ctx, ctx_obj, engine, ring); if (ret) { DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); - goto error_ringbuf; + goto error_ring_free; } - ce->ringbuf = ringbuf; - ce->state = ctx_obj; + ce->ring = ring; + ce->state = vma; ce->initialised = engine->init_context == NULL; return 0; -error_ringbuf: - intel_ringbuffer_free(ringbuf); +error_ring_free: + intel_ring_free(ring); error_deref_obj: - drm_gem_object_unreference(&ctx_obj->base); - ce->ringbuf = NULL; - ce->state = NULL; + i915_gem_object_put(ctx_obj); return ret; } -void intel_lr_context_reset(struct drm_i915_private *dev_priv, - struct i915_gem_context *ctx) +void intel_lr_context_resume(struct drm_i915_private *dev_priv) { + struct i915_gem_context *ctx = dev_priv->kernel_context; struct intel_engine_cs *engine; for_each_engine(engine, dev_priv) { struct intel_context *ce = &ctx->engine[engine->id]; - struct drm_i915_gem_object *ctx_obj = ce->state; void *vaddr; uint32_t *reg_state; - if (!ctx_obj) + if (!ce->state) continue; - vaddr = i915_gem_object_pin_map(ctx_obj); + vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); if (WARN_ON(IS_ERR(vaddr))) continue; reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - ctx_obj->dirty = true; reg_state[CTX_RING_HEAD+1] = 0; reg_state[CTX_RING_TAIL+1] = 0; - i915_gem_object_unpin_map(ctx_obj); + ce->state->obj->dirty = true; + i915_gem_object_unpin_map(ce->state->obj); - ce->ringbuf->head = 0; - ce->ringbuf->tail = 0; + ce->ring->head = 0; + ce->ring->tail = 0; } } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 2b8255c19dcc..4fed8165f98a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -29,17 +29,17 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 /* Execlists regs */ -#define RING_ELSP(ring) _MMIO((ring)->mmio_base + 0x230) -#define RING_EXECLIST_STATUS_LO(ring) _MMIO((ring)->mmio_base + 0x234) -#define RING_EXECLIST_STATUS_HI(ring) _MMIO((ring)->mmio_base + 0x234 + 4) -#define RING_CONTEXT_CONTROL(ring) _MMIO((ring)->mmio_base + 0x244) +#define RING_ELSP(engine) _MMIO((engine)->mmio_base + 0x230) +#define RING_EXECLIST_STATUS_LO(engine) _MMIO((engine)->mmio_base + 0x234) +#define RING_EXECLIST_STATUS_HI(engine) _MMIO((engine)->mmio_base + 0x234 + 4) +#define RING_CONTEXT_CONTROL(engine) _MMIO((engine)->mmio_base + 0x244) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) -#define RING_CONTEXT_STATUS_BUF_BASE(ring) _MMIO((ring)->mmio_base + 0x370) -#define RING_CONTEXT_STATUS_BUF_LO(ring, i) _MMIO((ring)->mmio_base + 0x370 + (i) * 8) -#define RING_CONTEXT_STATUS_BUF_HI(ring, i) _MMIO((ring)->mmio_base + 0x370 + (i) * 8 + 4) -#define RING_CONTEXT_STATUS_PTR(ring) _MMIO((ring)->mmio_base + 0x3a0) +#define RING_CONTEXT_STATUS_BUF_BASE(engine) _MMIO((engine)->mmio_base + 0x370) +#define RING_CONTEXT_STATUS_BUF_LO(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8) +#define RING_CONTEXT_STATUS_BUF_HI(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8 + 4) +#define RING_CONTEXT_STATUS_PTR(engine) _MMIO((engine)->mmio_base + 0x3a0) /* The docs specify that the write pointer wraps around after 5h, "After status * is written out to the last available status QW at offset 5h, this pointer @@ -67,35 +67,10 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request); void intel_logical_ring_stop(struct intel_engine_cs *engine); void intel_logical_ring_cleanup(struct intel_engine_cs *engine); -int intel_logical_rings_init(struct drm_device *dev); +int logical_render_ring_init(struct intel_engine_cs *engine); +int logical_xcs_ring_init(struct intel_engine_cs *engine); -int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); -/** - * intel_logical_ring_advance() - advance the ringbuffer tail - * @ringbuf: Ringbuffer to advance. - * - * The tail is only updated in our logical ringbuffer struct. - */ -static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) -{ - ringbuf->tail &= ringbuf->size - 1; -} -/** - * intel_logical_ring_emit() - write a DWORD to the ringbuffer. - * @ringbuf: Ringbuffer to write to. - * @data: DWORD to write. - */ -static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, - u32 data) -{ - iowrite32(data, ringbuf->virtual_start + ringbuf->tail); - ringbuf->tail += 4; -} -static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf, - i915_reg_t reg) -{ - intel_logical_ring_emit(ringbuf, i915_mmio_reg_offset(reg)); -} +int intel_engines_init(struct drm_device *dev); /* Logical Ring Contexts */ @@ -112,19 +87,13 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, struct drm_i915_private; -void intel_lr_context_reset(struct drm_i915_private *dev_priv, - struct i915_gem_context *ctx); +void intel_lr_context_resume(struct drm_i915_private *dev_priv); uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, struct intel_engine_cs *engine); /* Execlists */ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists); -struct i915_execbuffer_params; -int intel_execlists_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas); - -void intel_execlists_cancel_requests(struct intel_engine_cs *engine); +void intel_execlists_enable_submission(struct drm_i915_private *dev_priv); #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 49550470483e..e1d47d51ea47 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -48,6 +48,20 @@ struct intel_lvds_connector { struct notifier_block lid_notifier; }; +struct intel_lvds_pps { + /* 100us units */ + int t1_t2; + int t3; + int t4; + int t5; + int tx; + + int divider; + + int port; + bool powerdown_on_reset; +}; + struct intel_lvds_encoder { struct intel_encoder base; @@ -55,6 +69,9 @@ struct intel_lvds_encoder { i915_reg_t reg; u32 a3_power; + struct intel_lvds_pps init_pps; + u32 init_lvds_val; + struct intel_lvds_connector *attached_connector; }; @@ -136,28 +153,108 @@ static void intel_lvds_get_config(struct intel_encoder *encoder, pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; } -static void intel_pre_enable_lvds(struct intel_encoder *encoder) +static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv, + struct intel_lvds_pps *pps) +{ + u32 val; + + pps->powerdown_on_reset = I915_READ(PP_CONTROL(0)) & PANEL_POWER_RESET; + + val = I915_READ(PP_ON_DELAYS(0)); + pps->port = (val & PANEL_PORT_SELECT_MASK) >> + PANEL_PORT_SELECT_SHIFT; + pps->t1_t2 = (val & PANEL_POWER_UP_DELAY_MASK) >> + PANEL_POWER_UP_DELAY_SHIFT; + pps->t5 = (val & PANEL_LIGHT_ON_DELAY_MASK) >> + PANEL_LIGHT_ON_DELAY_SHIFT; + + val = I915_READ(PP_OFF_DELAYS(0)); + pps->t3 = (val & PANEL_POWER_DOWN_DELAY_MASK) >> + PANEL_POWER_DOWN_DELAY_SHIFT; + pps->tx = (val & PANEL_LIGHT_OFF_DELAY_MASK) >> + PANEL_LIGHT_OFF_DELAY_SHIFT; + + val = I915_READ(PP_DIVISOR(0)); + pps->divider = (val & PP_REFERENCE_DIVIDER_MASK) >> + PP_REFERENCE_DIVIDER_SHIFT; + val = (val & PANEL_POWER_CYCLE_DELAY_MASK) >> + PANEL_POWER_CYCLE_DELAY_SHIFT; + /* + * Remove the BSpec specified +1 (100ms) offset that accounts for a + * too short power-cycle delay due to the asynchronous programming of + * the register. + */ + if (val) + val--; + /* Convert from 100ms to 100us units */ + pps->t4 = val * 1000; + + if (INTEL_INFO(dev_priv)->gen <= 4 && + pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) { + DRM_DEBUG_KMS("Panel power timings uninitialized, " + "setting defaults\n"); + /* Set T2 to 40ms and T5 to 200ms in 100 usec units */ + pps->t1_t2 = 40 * 10; + pps->t5 = 200 * 10; + /* Set T3 to 35ms and Tx to 200ms in 100 usec units */ + pps->t3 = 35 * 10; + pps->tx = 200 * 10; + } + + DRM_DEBUG_DRIVER("LVDS PPS:t1+t2 %d t3 %d t4 %d t5 %d tx %d " + "divider %d port %d powerdown_on_reset %d\n", + pps->t1_t2, pps->t3, pps->t4, pps->t5, pps->tx, + pps->divider, pps->port, pps->powerdown_on_reset); +} + +static void intel_lvds_pps_init_hw(struct drm_i915_private *dev_priv, + struct intel_lvds_pps *pps) +{ + u32 val; + + val = I915_READ(PP_CONTROL(0)); + WARN_ON((val & PANEL_UNLOCK_MASK) != PANEL_UNLOCK_REGS); + if (pps->powerdown_on_reset) + val |= PANEL_POWER_RESET; + I915_WRITE(PP_CONTROL(0), val); + + I915_WRITE(PP_ON_DELAYS(0), (pps->port << PANEL_PORT_SELECT_SHIFT) | + (pps->t1_t2 << PANEL_POWER_UP_DELAY_SHIFT) | + (pps->t5 << PANEL_LIGHT_ON_DELAY_SHIFT)); + I915_WRITE(PP_OFF_DELAYS(0), (pps->t3 << PANEL_POWER_DOWN_DELAY_SHIFT) | + (pps->tx << PANEL_LIGHT_OFF_DELAY_SHIFT)); + + val = pps->divider << PP_REFERENCE_DIVIDER_SHIFT; + val |= (DIV_ROUND_UP(pps->t4, 1000) + 1) << + PANEL_POWER_CYCLE_DELAY_SHIFT; + I915_WRITE(PP_DIVISOR(0), val); +} + +static void intel_pre_enable_lvds(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); - const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); + const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; int pipe = crtc->pipe; u32 temp; - if (HAS_PCH_SPLIT(dev)) { + if (HAS_PCH_SPLIT(dev_priv)) { assert_fdi_rx_pll_disabled(dev_priv, pipe); assert_shared_dpll_disabled(dev_priv, - crtc->config->shared_dpll); + pipe_config->shared_dpll); } else { assert_pll_disabled(dev_priv, pipe); } - temp = I915_READ(lvds_encoder->reg); + intel_lvds_pps_init_hw(dev_priv, &lvds_encoder->init_pps); + + temp = lvds_encoder->init_lvds_val; temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP; - if (HAS_PCH_CPT(dev)) { + if (HAS_PCH_CPT(dev_priv)) { temp &= ~PORT_TRANS_SEL_MASK; temp |= PORT_TRANS_SEL_CPT(pipe); } else { @@ -170,7 +267,7 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder) /* set the corresponsding LVDS_BORDER bit */ temp &= ~LVDS_BORDER_ENABLE; - temp |= crtc->config->gmch_pfit.lvds_border_bits; + temp |= pipe_config->gmch_pfit.lvds_border_bits; /* Set the B0-B3 data pairs corresponding to whether we're going to * set the DPLLs for dual-channel mode or not. */ @@ -193,7 +290,7 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder) if (IS_GEN4(dev_priv)) { /* Bspec wording suggests that LVDS port dithering only exists * for 18bpp panels. */ - if (crtc->config->dither && crtc->config->pipe_bpp == 18) + if (pipe_config->dither && pipe_config->pipe_bpp == 18) temp |= LVDS_ENABLE_DITHER; else temp &= ~LVDS_ENABLE_DITHER; @@ -210,57 +307,45 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder) /** * Sets the power state for the panel. */ -static void intel_enable_lvds(struct intel_encoder *encoder) +static void intel_enable_lvds(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); struct intel_connector *intel_connector = &lvds_encoder->attached_connector->base; struct drm_i915_private *dev_priv = to_i915(dev); - i915_reg_t ctl_reg, stat_reg; - - if (HAS_PCH_SPLIT(dev)) { - ctl_reg = PCH_PP_CONTROL; - stat_reg = PCH_PP_STATUS; - } else { - ctl_reg = PP_CONTROL; - stat_reg = PP_STATUS; - } I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) | LVDS_PORT_EN); - I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON); + I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON); POSTING_READ(lvds_encoder->reg); - if (intel_wait_for_register(dev_priv, stat_reg, PP_ON, PP_ON, 1000)) + if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000)) DRM_ERROR("timed out waiting for panel to power on\n"); intel_panel_enable_backlight(intel_connector); } -static void intel_disable_lvds(struct intel_encoder *encoder) +static void intel_disable_lvds(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { - struct drm_device *dev = encoder->base.dev; struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); - struct drm_i915_private *dev_priv = to_i915(dev); - i915_reg_t ctl_reg, stat_reg; - - if (HAS_PCH_SPLIT(dev)) { - ctl_reg = PCH_PP_CONTROL; - stat_reg = PCH_PP_STATUS; - } else { - ctl_reg = PP_CONTROL; - stat_reg = PP_STATUS; - } + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - I915_WRITE(ctl_reg, I915_READ(ctl_reg) & ~POWER_TARGET_ON); - if (intel_wait_for_register(dev_priv, stat_reg, PP_ON, 0, 1000)) + I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) & ~PANEL_POWER_ON); + if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, 0, 1000)) DRM_ERROR("timed out waiting for panel to power off\n"); I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) & ~LVDS_PORT_EN); POSTING_READ(lvds_encoder->reg); } -static void gmch_disable_lvds(struct intel_encoder *encoder) +static void gmch_disable_lvds(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) + { struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); struct intel_connector *intel_connector = @@ -268,10 +353,12 @@ static void gmch_disable_lvds(struct intel_encoder *encoder) intel_panel_disable_backlight(intel_connector); - intel_disable_lvds(encoder); + intel_disable_lvds(encoder, old_crtc_state, old_conn_state); } -static void pch_disable_lvds(struct intel_encoder *encoder) +static void pch_disable_lvds(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); struct intel_connector *intel_connector = @@ -280,9 +367,11 @@ static void pch_disable_lvds(struct intel_encoder *encoder) intel_panel_disable_backlight(intel_connector); } -static void pch_post_disable_lvds(struct intel_encoder *encoder) +static void pch_post_disable_lvds(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { - intel_disable_lvds(encoder); + intel_disable_lvds(encoder, old_crtc_state, old_conn_state); } static enum drm_mode_status @@ -304,7 +393,8 @@ intel_lvds_mode_valid(struct drm_connector *connector, } static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_device *dev = intel_encoder->base.dev; struct intel_lvds_encoder *lvds_encoder = @@ -900,17 +990,6 @@ void intel_lvds_init(struct drm_device *dev) int pipe; u8 pin; - /* - * Unlock registers and just leave them unlocked. Do this before - * checking quirk lists to avoid bogus WARNINGs. - */ - if (HAS_PCH_SPLIT(dev)) { - I915_WRITE(PCH_PP_CONTROL, - I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS); - } else if (INTEL_INFO(dev_priv)->gen < 5) { - I915_WRITE(PP_CONTROL, - I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); - } if (!intel_lvds_supported(dev)) return; @@ -943,18 +1022,6 @@ void intel_lvds_init(struct drm_device *dev) DRM_DEBUG_KMS("LVDS is not present in VBT, but enabled anyway\n"); } - /* Set the Panel Power On/Off timings if uninitialized. */ - if (INTEL_INFO(dev_priv)->gen < 5 && - I915_READ(PP_ON_DELAYS) == 0 && I915_READ(PP_OFF_DELAYS) == 0) { - /* Set T2 to 40ms and T5 to 200ms */ - I915_WRITE(PP_ON_DELAYS, 0x019007d0); - - /* Set T3 to 35ms and Tx to 200ms */ - I915_WRITE(PP_OFF_DELAYS, 0x015e07d0); - - DRM_DEBUG_KMS("Panel power timings uninitialized, setting defaults\n"); - } - lvds_encoder = kzalloc(sizeof(*lvds_encoder), GFP_KERNEL); if (!lvds_encoder) return; @@ -1020,6 +1087,10 @@ void intel_lvds_init(struct drm_device *dev) dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_ASPECT); intel_connector->panel.fitting_mode = DRM_MODE_SCALE_ASPECT; + + intel_lvds_pps_get_hw_state(dev_priv, &lvds_encoder->init_pps); + lvds_encoder->init_lvds_val = lvds; + /* * LVDS discovery: * 1) check for EDID on DDC @@ -1054,17 +1125,6 @@ void intel_lvds_init(struct drm_device *dev) } lvds_connector->base.edid = edid; - if (IS_ERR_OR_NULL(edid)) { - /* Didn't get an EDID, so - * Set wide sync ranges so we get all modes - * handed to valid_mode for checking - */ - connector->display_info.min_vfreq = 0; - connector->display_info.max_vfreq = 200; - connector->display_info.min_hfreq = 0; - connector->display_info.max_hfreq = 200; - } - list_for_each_entry(scan, &connector->probed_modes, head) { if (scan->type & DRM_MODE_TYPE_PREFERRED) { DRM_DEBUG_KMS("using preferred mode from EDID: "); diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 927825f5b284..80bb9247ce66 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -97,7 +97,8 @@ struct drm_i915_mocs_table { * end. */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - { /* 0x00000009 */ + [I915_MOCS_UNCACHED] = { + /* 0x00000009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0010 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_PTE] = { /* 0x00000038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0030 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x0000003b */ .control_value = LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - { + [I915_MOCS_UNCACHED] = { /* 0x00000009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0010 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_PTE] = { /* 0x00000038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0030 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x00000039 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -203,9 +204,9 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, return result; } -static i915_reg_t mocs_register(enum intel_engine_id ring, int index) +static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) { - switch (ring) { + switch (engine_id) { case RCS: return GEN9_GFX_MOCS(index); case VCS: @@ -217,7 +218,7 @@ static i915_reg_t mocs_register(enum intel_engine_id ring, int index) case VCS2: return GEN9_MFX1_MOCS(index); default: - MISSING_CASE(ring); + MISSING_CASE(engine_id); return INVALID_MMIO_REG; } } @@ -275,7 +276,7 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) static int emit_mocs_control_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ring *ring = req->ring; enum intel_engine_id engine = req->engine->id; unsigned int index; int ret; @@ -287,14 +288,11 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, if (ret) return ret; - intel_logical_ring_emit(ringbuf, - MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); for (index = 0; index < table->size; index++) { - intel_logical_ring_emit_reg(ringbuf, - mocs_register(engine, index)); - intel_logical_ring_emit(ringbuf, - table->table[index].control_value); + intel_ring_emit_reg(ring, mocs_register(engine, index)); + intel_ring_emit(ring, table->table[index].control_value); } /* @@ -306,14 +304,12 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, * that value to all the used entries. */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { - intel_logical_ring_emit_reg(ringbuf, - mocs_register(engine, index)); - intel_logical_ring_emit(ringbuf, - table->table[0].control_value); + intel_ring_emit_reg(ring, mocs_register(engine, index)); + intel_ring_emit(ring, table->table[0].control_value); } - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -340,7 +336,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ring *ring = req->ring; unsigned int i; int ret; @@ -351,19 +347,18 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, if (ret) return ret; - intel_logical_ring_emit(ringbuf, + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2)); for (i = 0; i < table->size/2; i++) { - intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); - intel_logical_ring_emit(ringbuf, - l3cc_combine(table, 2*i, 2*i+1)); + intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); + intel_ring_emit(ring, l3cc_combine(table, 2*i, 2*i+1)); } if (table->size & 0x01) { /* Odd table size - 1 left over */ - intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); - intel_logical_ring_emit(ringbuf, l3cc_combine(table, 2*i, 0)); + intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); + intel_ring_emit(ring, l3cc_combine(table, 2*i, 0)); i++; } @@ -373,12 +368,12 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, * they are reserved by the hardware. */ for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { - intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); - intel_logical_ring_emit(ringbuf, l3cc_combine(table, 0, 0)); + intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); + intel_ring_emit(ring, l3cc_combine(table, 0, 0)); } - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/intel_mocs.h index 4640299e04ec..a8bd9f7bfece 100644 --- a/drivers/gpu/drm/i915/intel_mocs.h +++ b/drivers/gpu/drm/i915/intel_mocs.h @@ -54,6 +54,6 @@ int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req); void intel_mocs_init_l3cc_table(struct drm_device *dev); -int intel_mocs_init_engine(struct intel_engine_cs *ring); +int intel_mocs_init_engine(struct intel_engine_cs *engine); #endif diff --git a/drivers/gpu/drm/i915/intel_modes.c b/drivers/gpu/drm/i915/intel_modes.c index f2584d0a01ab..951e834dd274 100644 --- a/drivers/gpu/drm/i915/intel_modes.c +++ b/drivers/gpu/drm/i915/intel_modes.c @@ -25,7 +25,6 @@ #include <linux/slab.h> #include <linux/i2c.h> -#include <linux/fb.h> #include <drm/drm_edid.h> #include <drm/drmP.h> #include "intel_drv.h" diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index adca262d591a..7acbbbf97833 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -1047,6 +1047,23 @@ err_out: return err; } +static int intel_use_opregion_panel_type_callback(const struct dmi_system_id *id) +{ + DRM_INFO("Using panel type from OpRegion on %s\n", id->ident); + return 1; +} + +static const struct dmi_system_id intel_use_opregion_panel_type[] = { + { + .callback = intel_use_opregion_panel_type_callback, + .ident = "Conrac GmbH IX45GM2", + .matches = {DMI_MATCH(DMI_SYS_VENDOR, "Conrac GmbH"), + DMI_MATCH(DMI_PRODUCT_NAME, "IX45GM2"), + }, + }, + { } +}; + int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) { @@ -1073,6 +1090,16 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) } /* + * So far we know that some machined must use it, others must not use it. + * There doesn't seem to be any way to determine which way to go, except + * via a quirk list :( + */ + if (!dmi_check_system(intel_use_opregion_panel_type)) { + DRM_DEBUG_KMS("Ignoring OpRegion panel type (%d)\n", ret - 1); + return -ENODEV; + } + + /* * FIXME On Dell XPS 13 9350 the OpRegion panel type (0) gives us * low vswing for eDP, whereas the VBT panel type (2) gives us normal * vswing instead. Low vswing results in some display flickers, so diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 3212d8806b5a..a24bc8c7889f 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -30,6 +30,7 @@ #include "i915_drv.h" #include "i915_reg.h" #include "intel_drv.h" +#include "intel_frontbuffer.h" /* Limits for overlay size. According to intel doc, the real limits are: * Y width: 4095, UV width (planar): 2047, Y height: 2047, @@ -170,8 +171,8 @@ struct overlay_registers { struct intel_overlay { struct drm_i915_private *i915; struct intel_crtc *crtc; - struct drm_i915_gem_object *vid_bo; - struct drm_i915_gem_object *old_vid_bo; + struct i915_vma *vma; + struct i915_vma *old_vma; bool active; bool pfit_active; u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */ @@ -183,8 +184,7 @@ struct intel_overlay { u32 flip_addr; struct drm_i915_gem_object *reg_bo; /* flip handling */ - struct drm_i915_gem_request *last_flip_req; - void (*flip_tail)(struct intel_overlay *); + struct i915_gem_active last_flip; }; static struct overlay_registers __iomem * @@ -196,7 +196,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay) if (OVERLAY_NEEDS_PHYSICAL(dev_priv)) regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr; else - regs = io_mapping_map_wc(dev_priv->ggtt.mappable, + regs = io_mapping_map_wc(&dev_priv->ggtt.mappable, overlay->flip_addr, PAGE_SIZE); @@ -210,37 +210,46 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay, io_mapping_unmap(regs); } -static int intel_overlay_do_wait_request(struct intel_overlay *overlay, +static void intel_overlay_submit_request(struct intel_overlay *overlay, struct drm_i915_gem_request *req, - void (*tail)(struct intel_overlay *)) + i915_gem_retire_fn retire) { - int ret; - - WARN_ON(overlay->last_flip_req); - i915_gem_request_assign(&overlay->last_flip_req, req); + GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, + &overlay->i915->drm.struct_mutex)); + overlay->last_flip.retire = retire; + i915_gem_active_set(&overlay->last_flip, req); i915_add_request(req); +} - overlay->flip_tail = tail; - ret = i915_wait_request(overlay->last_flip_req); - if (ret) - return ret; +static int intel_overlay_do_wait_request(struct intel_overlay *overlay, + struct drm_i915_gem_request *req, + i915_gem_retire_fn retire) +{ + intel_overlay_submit_request(overlay, req, retire); + return i915_gem_active_retire(&overlay->last_flip, + &overlay->i915->drm.struct_mutex); +} - i915_gem_request_assign(&overlay->last_flip_req, NULL); - return 0; +static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay) +{ + struct drm_i915_private *dev_priv = overlay->i915; + struct intel_engine_cs *engine = &dev_priv->engine[RCS]; + + return i915_gem_request_alloc(engine, dev_priv->kernel_context); } /* overlay needs to be disable in OCMD reg */ static int intel_overlay_on(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; + struct intel_ring *ring; int ret; WARN_ON(overlay->active); WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); - req = i915_gem_request_alloc(engine, NULL); + req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); @@ -252,11 +261,12 @@ static int intel_overlay_on(struct intel_overlay *overlay) overlay->active = true; - intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_ON); - intel_ring_emit(engine, overlay->flip_addr | OFC_UPDATE); - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + ring = req->ring; + intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); + intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return intel_overlay_do_wait_request(overlay, req, NULL); } @@ -266,8 +276,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay, bool load_polyphase_filter) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; + struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; u32 tmp; int ret; @@ -282,7 +292,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (tmp & (1 << 17)) DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); - req = i915_gem_request_alloc(engine, NULL); + req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); @@ -292,38 +302,48 @@ static int intel_overlay_continue(struct intel_overlay *overlay, return ret; } - intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(engine, flip_addr); - intel_ring_advance(engine); + ring = req->ring; + intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); + intel_ring_emit(ring, flip_addr); + intel_ring_advance(ring); - WARN_ON(overlay->last_flip_req); - i915_gem_request_assign(&overlay->last_flip_req, req); - i915_add_request(req); + intel_overlay_submit_request(overlay, req, NULL); return 0; } -static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay) +static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, + struct drm_i915_gem_request *req) { - struct drm_i915_gem_object *obj = overlay->old_vid_bo; + struct intel_overlay *overlay = + container_of(active, typeof(*overlay), last_flip); + struct i915_vma *vma; - i915_gem_object_ggtt_unpin(obj); - drm_gem_object_unreference(&obj->base); + vma = fetch_and_zero(&overlay->old_vma); + if (WARN_ON(!vma)) + return; - overlay->old_vid_bo = NULL; + i915_gem_track_fb(vma->obj, NULL, + INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); + + i915_gem_object_unpin_from_display_plane(vma); + i915_vma_put(vma); } -static void intel_overlay_off_tail(struct intel_overlay *overlay) +static void intel_overlay_off_tail(struct i915_gem_active *active, + struct drm_i915_gem_request *req) { - struct drm_i915_gem_object *obj = overlay->vid_bo; + struct intel_overlay *overlay = + container_of(active, typeof(*overlay), last_flip); + struct i915_vma *vma; /* never have the overlay hw on without showing a frame */ - if (WARN_ON(!obj)) + vma = fetch_and_zero(&overlay->vma); + if (WARN_ON(!vma)) return; - i915_gem_object_ggtt_unpin(obj); - drm_gem_object_unreference(&obj->base); - overlay->vid_bo = NULL; + i915_gem_object_unpin_from_display_plane(vma); + i915_vma_put(vma); overlay->crtc->overlay = NULL; overlay->crtc = NULL; @@ -334,8 +354,8 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay) static int intel_overlay_off(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; + struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; int ret; @@ -347,7 +367,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) * of the hw. Do it in both cases */ flip_addr |= OFC_UPDATE; - req = i915_gem_request_alloc(engine, NULL); + req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); @@ -357,46 +377,36 @@ static int intel_overlay_off(struct intel_overlay *overlay) return ret; } + ring = req->ring; /* wait for overlay to go idle */ - intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(engine, flip_addr); - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); + intel_ring_emit(ring, flip_addr); + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); /* turn overlay off */ if (IS_I830(dev_priv)) { /* Workaround: Don't disable the overlay fully, since otherwise * it dies on the next OVERLAY_ON cmd. */ - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); } else { - intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); - intel_ring_emit(engine, flip_addr); - intel_ring_emit(engine, + intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); + intel_ring_emit(ring, flip_addr); + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); } - intel_ring_advance(engine); + intel_ring_advance(ring); - return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail); + return intel_overlay_do_wait_request(overlay, req, + intel_overlay_off_tail); } /* recover from an interruption due to a signal * We have to be careful not to repeat work forever an make forward progess. */ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) { - int ret; - - if (overlay->last_flip_req == NULL) - return 0; - - ret = i915_wait_request(overlay->last_flip_req); - if (ret) - return ret; - - if (overlay->flip_tail) - overlay->flip_tail(overlay); - - i915_gem_request_assign(&overlay->last_flip_req, NULL); - return 0; + return i915_gem_active_retire(&overlay->last_flip, + &overlay->i915->drm.struct_mutex); } /* Wait for pending overlay flip and release old frame. @@ -406,7 +416,6 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) static int intel_overlay_release_old_vid(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -414,14 +423,15 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) /* Only wait if there is actually an old frame to release to * guarantee forward progress. */ - if (!overlay->old_vid_bo) + if (!overlay->old_vma) return 0; if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ struct drm_i915_gem_request *req; + struct intel_ring *ring; - req = i915_gem_request_alloc(engine, NULL); + req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); @@ -431,22 +441,19 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) return ret; } - intel_ring_emit(engine, + ring = req->ring; + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); ret = intel_overlay_do_wait_request(overlay, req, intel_overlay_release_old_vid_tail); if (ret) return ret; - } + } else + intel_overlay_release_old_vid_tail(&overlay->last_flip, NULL); - intel_overlay_release_old_vid_tail(overlay); - - - i915_gem_track_fb(overlay->old_vid_bo, NULL, - INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); return 0; } @@ -459,7 +466,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv) intel_overlay_release_old_vid(overlay); - overlay->last_flip_req = NULL; overlay->old_xscale = 0; overlay->old_yscale = 0; overlay->crtc = NULL; @@ -740,6 +746,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, struct drm_i915_private *dev_priv = overlay->i915; u32 swidth, swidthsw, sheight, ostride; enum pipe pipe = overlay->crtc->pipe; + struct i915_vma *vma; lockdep_assert_held(&dev_priv->drm.struct_mutex); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); @@ -748,12 +755,12 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (ret != 0) return ret; - ret = i915_gem_object_pin_to_display_plane(new_bo, 0, + vma = i915_gem_object_pin_to_display_plane(new_bo, 0, &i915_ggtt_view_normal); - if (ret != 0) - return ret; + if (IS_ERR(vma)) + return PTR_ERR(vma); - ret = i915_gem_object_put_fence(new_bo); + ret = i915_vma_put_fence(vma); if (ret) goto out_unpin; @@ -794,7 +801,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, swidth = params->src_w; swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width); sheight = params->src_h; - iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, ®s->OBUF_0Y); + iowrite32(i915_ggtt_offset(vma) + params->offset_Y, ®s->OBUF_0Y); ostride = params->stride_Y; if (params->format & I915_OVERLAY_YUV_PLANAR) { @@ -808,8 +815,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, params->src_w/uv_hscale); swidthsw |= max_t(u32, tmp_U, tmp_V) << 16; sheight |= (params->src_h/uv_vscale) << 16; - iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, ®s->OBUF_0U); - iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, ®s->OBUF_0V); + iowrite32(i915_ggtt_offset(vma) + params->offset_U, + ®s->OBUF_0U); + iowrite32(i915_ggtt_offset(vma) + params->offset_V, + ®s->OBUF_0V); ostride |= params->stride_UV << 16; } @@ -830,19 +839,18 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (ret) goto out_unpin; - i915_gem_track_fb(overlay->vid_bo, new_bo, + i915_gem_track_fb(overlay->vma->obj, new_bo, INTEL_FRONTBUFFER_OVERLAY(pipe)); - overlay->old_vid_bo = overlay->vid_bo; - overlay->vid_bo = new_bo; + overlay->old_vma = overlay->vma; + overlay->vma = vma; - intel_frontbuffer_flip(&dev_priv->drm, - INTEL_FRONTBUFFER_OVERLAY(pipe)); + intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe)); return 0; out_unpin: - i915_gem_object_ggtt_unpin(new_bo); + i915_gem_object_unpin_from_display_plane(vma); return ret; } @@ -870,12 +878,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay) iowrite32(0, ®s->OCMD); intel_overlay_unmap_regs(overlay, regs); - ret = intel_overlay_off(overlay); - if (ret != 0) - return ret; - - intel_overlay_off_tail(overlay); - return 0; + return intel_overlay_off(overlay); } static int check_overlay_possible_on_crtc(struct intel_overlay *overlay, @@ -1122,9 +1125,8 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, } crtc = to_intel_crtc(drmmode_crtc); - new_bo = to_intel_bo(drm_gem_object_lookup(file_priv, - put_image_rec->bo_handle)); - if (&new_bo->base == NULL) { + new_bo = i915_gem_object_lookup(file_priv, put_image_rec->bo_handle); + if (!new_bo) { ret = -ENOENT; goto out_free; } @@ -1132,7 +1134,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, drm_modeset_lock_all(dev); mutex_lock(&dev->struct_mutex); - if (new_bo->tiling_mode) { + if (i915_gem_object_is_tiled(new_bo)) { DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n"); ret = -EINVAL; goto out_unlock; @@ -1220,7 +1222,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, out_unlock: mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); - drm_gem_object_unreference_unlocked(&new_bo->base); + i915_gem_object_put_unlocked(new_bo); out_free: kfree(params); @@ -1371,6 +1373,7 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) struct intel_overlay *overlay; struct drm_i915_gem_object *reg_bo; struct overlay_registers __iomem *regs; + struct i915_vma *vma = NULL; int ret; if (!HAS_OVERLAY(dev_priv)) @@ -1404,12 +1407,14 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) } overlay->flip_addr = reg_bo->phys_handle->busaddr; } else { - ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE); - if (ret) { + vma = i915_gem_object_ggtt_pin(reg_bo, NULL, + 0, PAGE_SIZE, PIN_MAPPABLE); + if (IS_ERR(vma)) { DRM_ERROR("failed to pin overlay register bo\n"); + ret = PTR_ERR(vma); goto out_free_bo; } - overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo); + overlay->flip_addr = i915_ggtt_offset(vma); ret = i915_gem_object_set_to_gtt_domain(reg_bo, true); if (ret) { @@ -1441,10 +1446,10 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) return; out_unpin_bo: - if (!OVERLAY_NEEDS_PHYSICAL(dev_priv)) - i915_gem_object_ggtt_unpin(reg_bo); + if (vma) + i915_vma_unpin(vma); out_free_bo: - drm_gem_object_unreference(®_bo->base); + i915_gem_object_put(reg_bo); out_free: mutex_unlock(&dev_priv->drm.struct_mutex); kfree(overlay); @@ -1461,7 +1466,7 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv) * hardware should be off already */ WARN_ON(dev_priv->overlay->active); - drm_gem_object_unreference_unlocked(&dev_priv->overlay->reg_bo->base); + i915_gem_object_put_unlocked(dev_priv->overlay->reg_bo); kfree(dev_priv->overlay); } @@ -1484,7 +1489,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay *overlay) regs = (struct overlay_registers __iomem *) overlay->reg_bo->phys_handle->vaddr; else - regs = io_mapping_map_atomic_wc(dev_priv->ggtt.mappable, + regs = io_mapping_map_atomic_wc(&dev_priv->ggtt.mappable, overlay->flip_addr); return regs; diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 96c65d77e886..be4b4d546fd9 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -841,7 +841,7 @@ static void lpt_enable_backlight(struct intel_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - u32 pch_ctl1, pch_ctl2; + u32 pch_ctl1, pch_ctl2, schicken; pch_ctl1 = I915_READ(BLC_PWM_PCH_CTL1); if (pch_ctl1 & BLM_PCH_PWM_ENABLE) { @@ -850,6 +850,22 @@ static void lpt_enable_backlight(struct intel_connector *connector) I915_WRITE(BLC_PWM_PCH_CTL1, pch_ctl1); } + if (HAS_PCH_LPT(dev_priv)) { + schicken = I915_READ(SOUTH_CHICKEN2); + if (panel->backlight.alternate_pwm_increment) + schicken |= LPT_PWM_GRANULARITY; + else + schicken &= ~LPT_PWM_GRANULARITY; + I915_WRITE(SOUTH_CHICKEN2, schicken); + } else { + schicken = I915_READ(SOUTH_CHICKEN1); + if (panel->backlight.alternate_pwm_increment) + schicken |= SPT_PWM_GRANULARITY; + else + schicken &= ~SPT_PWM_GRANULARITY; + I915_WRITE(SOUTH_CHICKEN1, schicken); + } + pch_ctl2 = panel->backlight.max << 16; I915_WRITE(BLC_PWM_PCH_CTL2, pch_ctl2); @@ -1242,10 +1258,10 @@ static u32 bxt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) */ static u32 spt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) { - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; u32 mul; - if (I915_READ(SOUTH_CHICKEN1) & SPT_PWM_GRANULARITY) + if (panel->backlight.alternate_pwm_increment) mul = 128; else mul = 16; @@ -1261,9 +1277,10 @@ static u32 spt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) static u32 lpt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; u32 mul, clock; - if (I915_READ(SOUTH_CHICKEN2) & LPT_PWM_GRANULARITY) + if (panel->backlight.alternate_pwm_increment) mul = 16; else mul = 128; @@ -1414,6 +1431,13 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; u32 pch_ctl1, pch_ctl2, val; + bool alt; + + if (HAS_PCH_LPT(dev_priv)) + alt = I915_READ(SOUTH_CHICKEN2) & LPT_PWM_GRANULARITY; + else + alt = I915_READ(SOUTH_CHICKEN1) & SPT_PWM_GRANULARITY; + panel->backlight.alternate_pwm_increment = alt; pch_ctl1 = I915_READ(BLC_PWM_PCH_CTL1); panel->backlight.active_low_pwm = pch_ctl1 & BLM_PCH_POLARITY; @@ -1430,10 +1454,11 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus panel->backlight.min = get_backlight_min_vbt(connector); val = lpt_get_backlight(connector); - panel->backlight.level = intel_panel_compute_brightness(connector, val); + val = intel_panel_compute_brightness(connector, val); + panel->backlight.level = clamp(val, panel->backlight.min, + panel->backlight.max); - panel->backlight.enabled = (pch_ctl1 & BLM_PCH_PWM_ENABLE) && - panel->backlight.level != 0; + panel->backlight.enabled = pch_ctl1 & BLM_PCH_PWM_ENABLE; return 0; } @@ -1459,11 +1484,13 @@ static int pch_setup_backlight(struct intel_connector *connector, enum pipe unus panel->backlight.min = get_backlight_min_vbt(connector); val = pch_get_backlight(connector); - panel->backlight.level = intel_panel_compute_brightness(connector, val); + val = intel_panel_compute_brightness(connector, val); + panel->backlight.level = clamp(val, panel->backlight.min, + panel->backlight.max); cpu_ctl2 = I915_READ(BLC_PWM_CPU_CTL2); panel->backlight.enabled = (cpu_ctl2 & BLM_PWM_ENABLE) && - (pch_ctl1 & BLM_PCH_PWM_ENABLE) && panel->backlight.level != 0; + (pch_ctl1 & BLM_PCH_PWM_ENABLE); return 0; } @@ -1498,9 +1525,11 @@ static int i9xx_setup_backlight(struct intel_connector *connector, enum pipe unu panel->backlight.min = get_backlight_min_vbt(connector); val = i9xx_get_backlight(connector); - panel->backlight.level = intel_panel_compute_brightness(connector, val); + val = intel_panel_compute_brightness(connector, val); + panel->backlight.level = clamp(val, panel->backlight.min, + panel->backlight.max); - panel->backlight.enabled = panel->backlight.level != 0; + panel->backlight.enabled = val != 0; return 0; } @@ -1530,10 +1559,11 @@ static int i965_setup_backlight(struct intel_connector *connector, enum pipe unu panel->backlight.min = get_backlight_min_vbt(connector); val = i9xx_get_backlight(connector); - panel->backlight.level = intel_panel_compute_brightness(connector, val); + val = intel_panel_compute_brightness(connector, val); + panel->backlight.level = clamp(val, panel->backlight.min, + panel->backlight.max); - panel->backlight.enabled = (ctl2 & BLM_PWM_ENABLE) && - panel->backlight.level != 0; + panel->backlight.enabled = ctl2 & BLM_PWM_ENABLE; return 0; } @@ -1562,10 +1592,11 @@ static int vlv_setup_backlight(struct intel_connector *connector, enum pipe pipe panel->backlight.min = get_backlight_min_vbt(connector); val = _vlv_get_backlight(dev_priv, pipe); - panel->backlight.level = intel_panel_compute_brightness(connector, val); + val = intel_panel_compute_brightness(connector, val); + panel->backlight.level = clamp(val, panel->backlight.min, + panel->backlight.max); - panel->backlight.enabled = (ctl2 & BLM_PWM_ENABLE) && - panel->backlight.level != 0; + panel->backlight.enabled = ctl2 & BLM_PWM_ENABLE; return 0; } @@ -1607,10 +1638,11 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused) return -ENODEV; val = bxt_get_backlight(connector); - panel->backlight.level = intel_panel_compute_brightness(connector, val); + val = intel_panel_compute_brightness(connector, val); + panel->backlight.level = clamp(val, panel->backlight.min, + panel->backlight.max); - panel->backlight.enabled = (pwm_ctl & BXT_BLC_PWM_ENABLE) && - panel->backlight.level != 0; + panel->backlight.enabled = pwm_ctl & BXT_BLC_PWM_ENABLE; return 0; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f4f3fcc8b3be..a2f751cd187a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -340,6 +340,11 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) I915_WRITE(FW_BLC_SELF, val); POSTING_READ(FW_BLC_SELF); } else if (IS_I915GM(dev)) { + /* + * FIXME can't find a bit like this for 915G, and + * and yet it does have the related watermark in + * FW_BLC_SELF. What's going on? + */ val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : _MASKED_BIT_DISABLE(INSTPM_SELF_EN); I915_WRITE(INSTPM, val); @@ -960,7 +965,7 @@ static uint16_t vlv_compute_wm_level(struct intel_plane *plane, if (dev_priv->wm.pri_latency[level] == 0) return USHRT_MAX; - if (!state->visible) + if (!state->base.visible) return 0; cpp = drm_format_plane_cpp(state->base.fb->pixel_format, 0); @@ -1002,7 +1007,7 @@ static void vlv_compute_fifo(struct intel_crtc *crtc) if (plane->base.type == DRM_PLANE_TYPE_CURSOR) continue; - if (state->visible) { + if (state->base.visible) { wm_state->num_active_planes++; total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0); } @@ -1018,7 +1023,7 @@ static void vlv_compute_fifo(struct intel_crtc *crtc) continue; } - if (!state->visible) { + if (!state->base.visible) { plane->wm.fifo_size = 0; continue; } @@ -1118,7 +1123,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) struct intel_plane_state *state = to_intel_plane_state(plane->base.state); - if (!state->visible) + if (!state->base.visible) continue; /* normal watermarks */ @@ -1580,7 +1585,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) obj = intel_fb_obj(enabled->primary->state->fb); /* self-refresh seems busted with untiled */ - if (obj->tiling_mode == I915_TILING_NONE) + if (!i915_gem_object_is_tiled(obj)) enabled = NULL; } @@ -1604,6 +1609,9 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) unsigned long line_time_us; int entries; + if (IS_I915GM(dev) || IS_I945GM(dev)) + cpp = 4; + line_time_us = max(htotal * 1000 / clock, 1); /* Use ns/us then divide to preserve precision */ @@ -1618,7 +1626,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) if (IS_I945G(dev) || IS_I945GM(dev)) I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); - else if (IS_I915GM(dev)) + else I915_WRITE(FW_BLC_SELF, srwm & 0x3f); } @@ -1767,7 +1775,7 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; uint32_t method1, method2; - if (!cstate->base.active || !pstate->visible) + if (!cstate->base.active || !pstate->base.visible) return 0; method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); @@ -1777,7 +1785,7 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), cstate->base.adjusted_mode.crtc_htotal, - drm_rect_width(&pstate->dst), + drm_rect_width(&pstate->base.dst), cpp, mem_value); return min(method1, method2); @@ -1795,13 +1803,13 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; uint32_t method1, method2; - if (!cstate->base.active || !pstate->visible) + if (!cstate->base.active || !pstate->base.visible) return 0; method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), cstate->base.adjusted_mode.crtc_htotal, - drm_rect_width(&pstate->dst), + drm_rect_width(&pstate->base.dst), cpp, mem_value); return min(method1, method2); } @@ -1820,7 +1828,7 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, * this is necessary to avoid flickering. */ int cpp = 4; - int width = pstate->visible ? pstate->base.crtc_w : 64; + int width = pstate->base.visible ? pstate->base.crtc_w : 64; if (!cstate->base.active) return 0; @@ -1838,10 +1846,10 @@ static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, int cpp = pstate->base.fb ? drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; - if (!cstate->base.active || !pstate->visible) + if (!cstate->base.active || !pstate->base.visible) return 0; - return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), cpp); + return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp); } static unsigned int ilk_display_fifo_size(const struct drm_device *dev) @@ -2119,32 +2127,34 @@ static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) GEN9_MEM_LATENCY_LEVEL_MASK; /* + * If a level n (n > 1) has a 0us latency, all levels m (m >= n) + * need to be disabled. We make sure to sanitize the values out + * of the punit to satisfy this requirement. + */ + for (level = 1; level <= max_level; level++) { + if (wm[level] == 0) { + for (i = level + 1; i <= max_level; i++) + wm[i] = 0; + break; + } + } + + /* * WaWmMemoryReadLatency:skl * * punit doesn't take into account the read latency so we need - * to add 2us to the various latency levels we retrieve from - * the punit. - * - W0 is a bit special in that it's the only level that - * can't be disabled if we want to have display working, so - * we always add 2us there. - * - For levels >=1, punit returns 0us latency when they are - * disabled, so we respect that and don't add 2us then - * - * Additionally, if a level n (n > 1) has a 0us latency, all - * levels m (m >= n) need to be disabled. We make sure to - * sanitize the values out of the punit to satisfy this - * requirement. + * to add 2us to the various latency levels we retrieve from the + * punit when level 0 response data us 0us. */ - wm[0] += 2; - for (level = 1; level <= max_level; level++) - if (wm[level] != 0) + if (wm[0] == 0) { + wm[0] += 2; + for (level = 1; level <= max_level; level++) { + if (wm[level] == 0) + break; wm[level] += 2; - else { - for (i = level + 1; i <= max_level; i++) - wm[i] = 0; - - break; } + } + } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { uint64_t sskpd = I915_READ64(MCH_SSKPD); @@ -2358,10 +2368,10 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) pipe_wm->pipe_enabled = cstate->base.active; if (sprstate) { - pipe_wm->sprites_enabled = sprstate->visible; - pipe_wm->sprites_scaled = sprstate->visible && - (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 || - drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16); + pipe_wm->sprites_enabled = sprstate->base.visible; + pipe_wm->sprites_scaled = sprstate->base.visible && + (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 || + drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16); } usable_level = max_level; @@ -2845,13 +2855,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev) return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); } -/* - * On gen9, we need to allocate Display Data Buffer (DDB) portions to the - * different active planes. - */ - -#define SKL_DDB_SIZE 896 /* in blocks */ -#define BXT_DDB_SIZE 512 +#define SKL_SAGV_BLOCK_TIME 30 /* µs */ /* * Return the index of a plane in the SKL DDB and wm result arrays. Primary @@ -2875,6 +2879,173 @@ skl_wm_plane_id(const struct intel_plane *plane) } } +static bool +intel_has_sagv(struct drm_i915_private *dev_priv) +{ + if (IS_KABYLAKE(dev_priv)) + return true; + + if (IS_SKYLAKE(dev_priv) && + dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED) + return true; + + return false; +} + +/* + * SAGV dynamically adjusts the system agent voltage and clock frequencies + * depending on power and performance requirements. The display engine access + * to system memory is blocked during the adjustment time. Because of the + * blocking time, having this enabled can cause full system hangs and/or pipe + * underruns if we don't meet all of the following requirements: + * + * - <= 1 pipe enabled + * - All planes can enable watermarks for latencies >= SAGV engine block time + * - We're not using an interlaced display configuration + */ +int +intel_enable_sagv(struct drm_i915_private *dev_priv) +{ + int ret; + + if (!intel_has_sagv(dev_priv)) + return 0; + + if (dev_priv->sagv_status == I915_SAGV_ENABLED) + return 0; + + DRM_DEBUG_KMS("Enabling the SAGV\n"); + mutex_lock(&dev_priv->rps.hw_lock); + + ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, + GEN9_SAGV_ENABLE); + + /* We don't need to wait for the SAGV when enabling */ + mutex_unlock(&dev_priv->rps.hw_lock); + + /* + * Some skl systems, pre-release machines in particular, + * don't actually have an SAGV. + */ + if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { + DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); + dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; + return 0; + } else if (ret < 0) { + DRM_ERROR("Failed to enable the SAGV\n"); + return ret; + } + + dev_priv->sagv_status = I915_SAGV_ENABLED; + return 0; +} + +static int +intel_do_sagv_disable(struct drm_i915_private *dev_priv) +{ + int ret; + uint32_t temp = GEN9_SAGV_DISABLE; + + ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL, + &temp); + if (ret) + return ret; + else + return temp & GEN9_SAGV_IS_DISABLED; +} + +int +intel_disable_sagv(struct drm_i915_private *dev_priv) +{ + int ret, result; + + if (!intel_has_sagv(dev_priv)) + return 0; + + if (dev_priv->sagv_status == I915_SAGV_DISABLED) + return 0; + + DRM_DEBUG_KMS("Disabling the SAGV\n"); + mutex_lock(&dev_priv->rps.hw_lock); + + /* bspec says to keep retrying for at least 1 ms */ + ret = wait_for(result = intel_do_sagv_disable(dev_priv), 1); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret == -ETIMEDOUT) { + DRM_ERROR("Request to disable SAGV timed out\n"); + return -ETIMEDOUT; + } + + /* + * Some skl systems, pre-release machines in particular, + * don't actually have an SAGV. + */ + if (IS_SKYLAKE(dev_priv) && result == -ENXIO) { + DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); + dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; + return 0; + } else if (result < 0) { + DRM_ERROR("Failed to disable the SAGV\n"); + return result; + } + + dev_priv->sagv_status = I915_SAGV_DISABLED; + return 0; +} + +bool intel_can_enable_sagv(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_crtc *crtc; + enum pipe pipe; + int level, plane; + + if (!intel_has_sagv(dev_priv)) + return false; + + /* + * SKL workaround: bspec recommends we disable the SAGV when we have + * more then one pipe enabled + * + * If there are no active CRTCs, no additional checks need be performed + */ + if (hweight32(intel_state->active_crtcs) == 0) + return true; + else if (hweight32(intel_state->active_crtcs) > 1) + return false; + + /* Since we're now guaranteed to only have one active CRTC... */ + pipe = ffs(intel_state->active_crtcs) - 1; + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + + if (crtc->state->mode.flags & DRM_MODE_FLAG_INTERLACE) + return false; + + for_each_plane(dev_priv, pipe, plane) { + /* Skip this plane if it's not enabled */ + if (intel_state->wm_results.plane[pipe][plane][0] == 0) + continue; + + /* Find the highest enabled wm level for this plane */ + for (level = ilk_wm_max_level(dev); + intel_state->wm_results.plane[pipe][plane][level] == 0; --level) + { } + + /* + * If any of the planes on this pipe don't enable wm levels + * that incur memory latencies higher then 30µs we can't enable + * the SAGV + */ + if (dev_priv->wm.skl_latency[level] < SKL_SAGV_BLOCK_TIME) + return false; + } + + return true; +} + static void skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, const struct intel_crtc_state *cstate, @@ -2901,10 +3072,8 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, else *num_active = hweight32(dev_priv->active_crtcs); - if (IS_BROXTON(dev)) - ddb_size = BXT_DDB_SIZE; - else - ddb_size = SKL_DDB_SIZE; + ddb_size = INTEL_INFO(dev_priv)->ddb_size; + WARN_ON(ddb_size == 0); ddb_size -= 4; /* 4 blocks for bypass path allocation */ @@ -2996,14 +3165,14 @@ skl_plane_downscale_amount(const struct intel_plane_state *pstate) uint32_t downscale_h, downscale_w; uint32_t src_w, src_h, dst_w, dst_h; - if (WARN_ON(!pstate->visible)) + if (WARN_ON(!pstate->base.visible)) return DRM_PLANE_HELPER_NO_SCALING; /* n.b., src is 16.16 fixed point, dst is whole integer */ - src_w = drm_rect_width(&pstate->src); - src_h = drm_rect_height(&pstate->src); - dst_w = drm_rect_width(&pstate->dst); - dst_h = drm_rect_height(&pstate->dst); + src_w = drm_rect_width(&pstate->base.src); + src_h = drm_rect_height(&pstate->base.src); + dst_w = drm_rect_width(&pstate->base.dst); + dst_h = drm_rect_height(&pstate->base.dst); if (intel_rotation_90_or_270(pstate->base.rotation)) swap(dst_w, dst_h); @@ -3025,15 +3194,15 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, uint32_t width = 0, height = 0; unsigned format = fb ? fb->pixel_format : DRM_FORMAT_XRGB8888; - if (!intel_pstate->visible) + if (!intel_pstate->base.visible) return 0; if (pstate->plane->type == DRM_PLANE_TYPE_CURSOR) return 0; if (y && format != DRM_FORMAT_NV12) return 0; - width = drm_rect_width(&intel_pstate->src) >> 16; - height = drm_rect_height(&intel_pstate->src) >> 16; + width = drm_rect_width(&intel_pstate->base.src) >> 16; + height = drm_rect_height(&intel_pstate->base.src) >> 16; if (intel_rotation_90_or_270(pstate->rotation)) swap(width, height); @@ -3107,8 +3276,6 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate) total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id]; } - WARN_ON(cstate->plane_mask && total_data_rate == 0); - return total_data_rate; } @@ -3134,8 +3301,8 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, fb->modifier[0] != I915_FORMAT_MOD_Yf_TILED) return 8; - src_w = drm_rect_width(&intel_pstate->src) >> 16; - src_h = drm_rect_height(&intel_pstate->src) >> 16; + src_w = drm_rect_width(&intel_pstate->base.src) >> 16; + src_h = drm_rect_height(&intel_pstate->base.src) >> 16; if (intel_rotation_90_or_270(pstate->rotation)) swap(src_w, src_h); @@ -3226,7 +3393,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, if (intel_plane->pipe != pipe) continue; - if (!to_intel_plane_state(pstate)->visible) { + if (!to_intel_plane_state(pstate)->base.visible) { minimum[id] = 0; y_minimum[id] = 0; continue; @@ -3327,27 +3494,14 @@ static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latenc } static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, - uint32_t horiz_pixels, uint8_t cpp, - uint64_t tiling, uint32_t latency) + uint32_t latency, uint32_t plane_blocks_per_line) { uint32_t ret; - uint32_t plane_bytes_per_line, plane_blocks_per_line; uint32_t wm_intermediate_val; if (latency == 0) return UINT_MAX; - plane_bytes_per_line = horiz_pixels * cpp; - - if (tiling == I915_FORMAT_MOD_Y_TILED || - tiling == I915_FORMAT_MOD_Yf_TILED) { - plane_bytes_per_line *= 4; - plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); - plane_blocks_per_line /= 4; - } else { - plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); - } - wm_intermediate_val = latency * pixel_rate; ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) * plane_blocks_per_line; @@ -3363,7 +3517,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst uint64_t pixel_rate; /* Shouldn't reach here on disabled planes... */ - if (WARN_ON(!pstate->visible)) + if (WARN_ON(!pstate->base.visible)) return 0; /* @@ -3398,14 +3552,15 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint8_t cpp; uint32_t width = 0, height = 0; uint32_t plane_pixel_rate; + uint32_t y_tile_minimum, y_min_scanlines; - if (latency == 0 || !cstate->base.active || !intel_pstate->visible) { + if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { *enabled = false; return 0; } - width = drm_rect_width(&intel_pstate->src) >> 16; - height = drm_rect_height(&intel_pstate->src) >> 16; + width = drm_rect_width(&intel_pstate->base.src) >> 16; + height = drm_rect_height(&intel_pstate->base.src) >> 16; if (intel_rotation_90_or_270(pstate->rotation)) swap(width, height); @@ -3413,38 +3568,51 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, cpp = drm_format_plane_cpp(fb->pixel_format, 0); plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); + if (intel_rotation_90_or_270(pstate->rotation)) { + int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ? + drm_format_plane_cpp(fb->pixel_format, 1) : + drm_format_plane_cpp(fb->pixel_format, 0); + + switch (cpp) { + case 1: + y_min_scanlines = 16; + break; + case 2: + y_min_scanlines = 8; + break; + default: + WARN(1, "Unsupported pixel depth for rotation"); + case 4: + y_min_scanlines = 4; + break; + } + } else { + y_min_scanlines = 4; + } + + plane_bytes_per_line = width * cpp; + if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || + fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { + plane_blocks_per_line = + DIV_ROUND_UP(plane_bytes_per_line * y_min_scanlines, 512); + plane_blocks_per_line /= y_min_scanlines; + } else if (fb->modifier[0] == DRM_FORMAT_MOD_NONE) { + plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + + 1; + } else { + plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); + } + method1 = skl_wm_method1(plane_pixel_rate, cpp, latency); method2 = skl_wm_method2(plane_pixel_rate, cstate->base.adjusted_mode.crtc_htotal, - width, - cpp, - fb->modifier[0], - latency); + latency, + plane_blocks_per_line); - plane_bytes_per_line = width * cpp; - plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); + y_tile_minimum = plane_blocks_per_line * y_min_scanlines; if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { - uint32_t min_scanlines = 4; - uint32_t y_tile_minimum; - if (intel_rotation_90_or_270(pstate->rotation)) { - int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ? - drm_format_plane_cpp(fb->pixel_format, 1) : - drm_format_plane_cpp(fb->pixel_format, 0); - - switch (cpp) { - case 1: - min_scanlines = 16; - break; - case 2: - min_scanlines = 8; - break; - case 8: - WARN(1, "Unsupported pixel depth for rotation"); - } - } - y_tile_minimum = plane_blocks_per_line * min_scanlines; selected_result = max(method2, y_tile_minimum); } else { if ((ddb_allocation / plane_blocks_per_line) >= 1) @@ -3458,10 +3626,12 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (level >= 1 && level <= 7) { if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || - fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) - res_lines += 4; - else + fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { + res_blocks += y_tile_minimum; + res_lines += y_min_scanlines; + } else { res_blocks++; + } } if (res_blocks >= ddb_allocation || res_lines > 31) { @@ -3680,183 +3850,82 @@ static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, I915_WRITE(reg, 0); } -static void skl_write_wm_values(struct drm_i915_private *dev_priv, - const struct skl_wm_values *new) +void skl_write_plane_wm(struct intel_crtc *intel_crtc, + const struct skl_wm_values *wm, + int plane) { - struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *crtc; - - for_each_intel_crtc(dev, crtc) { - int i, level, max_level = ilk_wm_max_level(dev); - enum pipe pipe = crtc->pipe; - - if ((new->dirty_pipes & drm_crtc_mask(&crtc->base)) == 0) - continue; - if (!crtc->active) - continue; - - I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]); - - for (level = 0; level <= max_level; level++) { - for (i = 0; i < intel_num_planes(crtc); i++) - I915_WRITE(PLANE_WM(pipe, i, level), - new->plane[pipe][i][level]); - I915_WRITE(CUR_WM(pipe, level), - new->plane[pipe][PLANE_CURSOR][level]); - } - for (i = 0; i < intel_num_planes(crtc); i++) - I915_WRITE(PLANE_WM_TRANS(pipe, i), - new->plane_trans[pipe][i]); - I915_WRITE(CUR_WM_TRANS(pipe), - new->plane_trans[pipe][PLANE_CURSOR]); - - for (i = 0; i < intel_num_planes(crtc); i++) { - skl_ddb_entry_write(dev_priv, - PLANE_BUF_CFG(pipe, i), - &new->ddb.plane[pipe][i]); - skl_ddb_entry_write(dev_priv, - PLANE_NV12_BUF_CFG(pipe, i), - &new->ddb.y_plane[pipe][i]); - } + struct drm_crtc *crtc = &intel_crtc->base; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + int level, max_level = ilk_wm_max_level(dev); + enum pipe pipe = intel_crtc->pipe; - skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), - &new->ddb.plane[pipe][PLANE_CURSOR]); + for (level = 0; level <= max_level; level++) { + I915_WRITE(PLANE_WM(pipe, plane, level), + wm->plane[pipe][plane][level]); } -} + I915_WRITE(PLANE_WM_TRANS(pipe, plane), wm->plane_trans[pipe][plane]); -/* - * When setting up a new DDB allocation arrangement, we need to correctly - * sequence the times at which the new allocations for the pipes are taken into - * account or we'll have pipes fetching from space previously allocated to - * another pipe. - * - * Roughly the sequence looks like: - * 1. re-allocate the pipe(s) with the allocation being reduced and not - * overlapping with a previous light-up pipe (another way to put it is: - * pipes with their new allocation strickly included into their old ones). - * 2. re-allocate the other pipes that get their allocation reduced - * 3. allocate the pipes having their allocation increased - * - * Steps 1. and 2. are here to take care of the following case: - * - Initially DDB looks like this: - * | B | C | - * - enable pipe A. - * - pipe B has a reduced DDB allocation that overlaps with the old pipe C - * allocation - * | A | B | C | - * - * We need to sequence the re-allocation: C, B, A (and not B, C, A). - */ + skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane), + &wm->ddb.plane[pipe][plane]); + skl_ddb_entry_write(dev_priv, PLANE_NV12_BUF_CFG(pipe, plane), + &wm->ddb.y_plane[pipe][plane]); +} -static void -skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass) +void skl_write_cursor_wm(struct intel_crtc *intel_crtc, + const struct skl_wm_values *wm) { - int plane; - - DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass); + struct drm_crtc *crtc = &intel_crtc->base; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + int level, max_level = ilk_wm_max_level(dev); + enum pipe pipe = intel_crtc->pipe; - for_each_plane(dev_priv, pipe, plane) { - I915_WRITE(PLANE_SURF(pipe, plane), - I915_READ(PLANE_SURF(pipe, plane))); + for (level = 0; level <= max_level; level++) { + I915_WRITE(CUR_WM(pipe, level), + wm->plane[pipe][PLANE_CURSOR][level]); } - I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe))); + I915_WRITE(CUR_WM_TRANS(pipe), wm->plane_trans[pipe][PLANE_CURSOR]); + + skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), + &wm->ddb.plane[pipe][PLANE_CURSOR]); } -static bool -skl_ddb_allocation_included(const struct skl_ddb_allocation *old, - const struct skl_ddb_allocation *new, - enum pipe pipe) +bool skl_ddb_allocation_equals(const struct skl_ddb_allocation *old, + const struct skl_ddb_allocation *new, + enum pipe pipe) { - uint16_t old_size, new_size; - - old_size = skl_ddb_entry_size(&old->pipe[pipe]); - new_size = skl_ddb_entry_size(&new->pipe[pipe]); - - return old_size != new_size && - new->pipe[pipe].start >= old->pipe[pipe].start && - new->pipe[pipe].end <= old->pipe[pipe].end; + return new->pipe[pipe].start == old->pipe[pipe].start && + new->pipe[pipe].end == old->pipe[pipe].end; } -static void skl_flush_wm_values(struct drm_i915_private *dev_priv, - struct skl_wm_values *new_values) +static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, + const struct skl_ddb_entry *b) { - struct drm_device *dev = &dev_priv->drm; - struct skl_ddb_allocation *cur_ddb, *new_ddb; - bool reallocated[I915_MAX_PIPES] = {}; - struct intel_crtc *crtc; - enum pipe pipe; - - new_ddb = &new_values->ddb; - cur_ddb = &dev_priv->wm.skl_hw.ddb; - - /* - * First pass: flush the pipes with the new allocation contained into - * the old space. - * - * We'll wait for the vblank on those pipes to ensure we can safely - * re-allocate the freed space without this pipe fetching from it. - */ - for_each_intel_crtc(dev, crtc) { - if (!crtc->active) - continue; - - pipe = crtc->pipe; - - if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe)) - continue; - - skl_wm_flush_pipe(dev_priv, pipe, 1); - intel_wait_for_vblank(dev, pipe); - - reallocated[pipe] = true; - } - + return a->start < b->end && b->start < a->end; +} - /* - * Second pass: flush the pipes that are having their allocation - * reduced, but overlapping with a previous allocation. - * - * Here as well we need to wait for the vblank to make sure the freed - * space is not used anymore. - */ - for_each_intel_crtc(dev, crtc) { - if (!crtc->active) - continue; +bool skl_ddb_allocation_overlaps(struct drm_atomic_state *state, + const struct skl_ddb_allocation *old, + const struct skl_ddb_allocation *new, + enum pipe pipe) +{ + struct drm_device *dev = state->dev; + struct intel_crtc *intel_crtc; + enum pipe otherp; - pipe = crtc->pipe; + for_each_intel_crtc(dev, intel_crtc) { + otherp = intel_crtc->pipe; - if (reallocated[pipe]) + if (otherp == pipe) continue; - if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) < - skl_ddb_entry_size(&cur_ddb->pipe[pipe])) { - skl_wm_flush_pipe(dev_priv, pipe, 2); - intel_wait_for_vblank(dev, pipe); - reallocated[pipe] = true; - } + if (skl_ddb_entries_overlap(&new->pipe[pipe], + &old->pipe[otherp])) + return true; } - /* - * Third pass: flush the pipes that got more space allocated. - * - * We don't need to actively wait for the update here, next vblank - * will just get more DDB space with the correct WM values. - */ - for_each_intel_crtc(dev, crtc) { - if (!crtc->active) - continue; - - pipe = crtc->pipe; - - /* - * At this point, only the pipes more space than before are - * left to re-allocate. - */ - if (reallocated[pipe]) - continue; - - skl_wm_flush_pipe(dev_priv, pipe, 3); - } + return false; } static int skl_update_pipe_wm(struct drm_crtc_state *cstate, @@ -3893,6 +3962,41 @@ pipes_modified(struct drm_atomic_state *state) return ret; } +int +skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) +{ + struct drm_atomic_state *state = cstate->base.state; + struct drm_device *dev = state->dev; + struct drm_crtc *crtc = cstate->base.crtc; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; + struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; + struct drm_plane_state *plane_state; + struct drm_plane *plane; + enum pipe pipe = intel_crtc->pipe; + int id; + + WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); + + drm_for_each_plane_mask(plane, dev, crtc->state->plane_mask) { + id = skl_wm_plane_id(to_intel_plane(plane)); + + if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][id], + &new_ddb->plane[pipe][id]) && + skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][id], + &new_ddb->y_plane[pipe][id])) + continue; + + plane_state = drm_atomic_get_plane_state(state, plane); + if (IS_ERR(plane_state)) + return PTR_ERR(plane_state); + } + + return 0; +} + static int skl_compute_ddb(struct drm_atomic_state *state) { @@ -3910,9 +4014,24 @@ skl_compute_ddb(struct drm_atomic_state *state) * pretend that all pipes switched active status so that we'll * ensure a full DDB recompute. */ - if (dev_priv->wm.distrust_bios_wm) + if (dev_priv->wm.distrust_bios_wm) { + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, + state->acquire_ctx); + if (ret) + return ret; + intel_state->active_pipe_changes = ~0; + /* + * We usually only initialize intel_state->active_crtcs if we + * we're doing a modeset; make sure this field is always + * initialized during the sanitization process that happens + * on the first commit too. + */ + if (!intel_state->modeset) + intel_state->active_crtcs = dev_priv->active_crtcs; + } + /* * If the modeset changes which CRTC's are active, we need to * recompute the DDB allocation for *all* active pipes, even @@ -3941,11 +4060,33 @@ skl_compute_ddb(struct drm_atomic_state *state) ret = skl_allocate_pipe_ddb(cstate, ddb); if (ret) return ret; + + ret = skl_ddb_add_affected_planes(cstate); + if (ret) + return ret; } return 0; } +static void +skl_copy_wm_for_pipe(struct skl_wm_values *dst, + struct skl_wm_values *src, + enum pipe pipe) +{ + dst->wm_linetime[pipe] = src->wm_linetime[pipe]; + memcpy(dst->plane[pipe], src->plane[pipe], + sizeof(dst->plane[pipe])); + memcpy(dst->plane_trans[pipe], src->plane_trans[pipe], + sizeof(dst->plane_trans[pipe])); + + dst->ddb.pipe[pipe] = src->ddb.pipe[pipe]; + memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe], + sizeof(dst->ddb.y_plane[pipe])); + memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe], + sizeof(dst->ddb.plane[pipe])); +} + static int skl_compute_wm(struct drm_atomic_state *state) { @@ -4018,8 +4159,10 @@ static void skl_update_wm(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct skl_wm_values *results = &dev_priv->wm.skl_results; + struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal; + enum pipe pipe = intel_crtc->pipe; if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0) return; @@ -4028,11 +4171,22 @@ static void skl_update_wm(struct drm_crtc *crtc) mutex_lock(&dev_priv->wm.wm_mutex); - skl_write_wm_values(dev_priv, results); - skl_flush_wm_values(dev_priv, results); + /* + * If this pipe isn't active already, we're going to be enabling it + * very soon. Since it's safe to update a pipe's ddb allocation while + * the pipe's shut off, just do so here. Already active pipes will have + * their watermarks updated once we update their planes. + */ + if (crtc->state->active_changed) { + int plane; - /* store the new configuration */ - dev_priv->wm.skl_hw = *results; + for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) + skl_write_plane_wm(intel_crtc, results, plane); + + skl_write_cursor_wm(intel_crtc, results); + } + + skl_copy_wm_for_pipe(hw_vals, results, pipe); mutex_unlock(&dev_priv->wm.wm_mutex); } @@ -4892,7 +5046,8 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) else gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); dev_priv->rps.last_adj = 0; - I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); + I915_WRITE(GEN6_PMINTRMSK, + gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } mutex_unlock(&dev_priv->rps.hw_lock); @@ -4911,7 +5066,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, */ if (!(dev_priv->gt.awake && dev_priv->rps.enabled && - dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)) + dev_priv->rps.cur_freq < dev_priv->rps.boost_freq)) return; /* Force a RPS boost (and don't count it against the client) if @@ -5102,35 +5257,31 @@ int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6) static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) { - uint32_t rp_state_cap; - u32 ddcc_status = 0; - int ret; - /* All of these values are in units of 50MHz */ - dev_priv->rps.cur_freq = 0; + /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_BROXTON(dev_priv)) { - rp_state_cap = I915_READ(BXT_RP_STATE_CAP); + u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; } else { - rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; } - /* hw_max = RP0 until we check for overclocking */ - dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; + dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - ret = sandybridge_pcode_read(dev_priv, - HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, - &ddcc_status); - if (0 == ret) + u32 ddcc_status = 0; + + if (sandybridge_pcode_read(dev_priv, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status) == 0) dev_priv->rps.efficient_freq = clamp_t(u8, ((ddcc_status >> 8) & 0xff), @@ -5140,29 +5291,26 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is - the natural hardware unit for SKL */ + * the natural hardware unit for SKL + */ dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; } +} - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; +static void reset_rps(struct drm_i915_private *dev_priv, + void (*set)(struct drm_i915_private *, u8)) +{ + u8 freq = dev_priv->rps.cur_freq; - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + /* force a reset */ + dev_priv->rps.power = -1; + dev_priv->rps.cur_freq = -1; - if (dev_priv->rps.min_freq_softlimit == 0) { - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - dev_priv->rps.min_freq_softlimit = - max_t(int, dev_priv->rps.efficient_freq, - intel_freq_opcode(dev_priv, 450)); - else - dev_priv->rps.min_freq_softlimit = - dev_priv->rps.min_freq; - } + set(dev_priv, freq); } /* See the Gen9_GT_PM_Programming_Guide doc for the below */ @@ -5170,8 +5318,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) { intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - gen6_init_rps_frequencies(dev_priv); - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { /* @@ -5201,8 +5347,7 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) /* Leaning on the below call to gen6_set_rps to program/setup the * Up/Down EI & threshold registers, as well as the RP_CONTROL, * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -5289,9 +5434,6 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) /* 2a: Disable RC states. */ I915_WRITE(GEN6_RC_CONTROL, 0); - /* Initialize rps frequencies */ - gen6_init_rps_frequencies(dev_priv); - /* 2b: Program RC6 thresholds.*/ I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ @@ -5348,8 +5490,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) /* 6: Ring frequency + overclocking (our driver does this later */ - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -5357,7 +5498,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) static void gen6_enable_rps(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; + u32 rc6vids, rc6_mask = 0; u32 gtfifodbg; int rc6_mode; int ret; @@ -5381,9 +5522,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* Initialize rps frequencies */ - gen6_init_rps_frequencies(dev_priv); - /* disable the counters and set deterministic thresholds */ I915_WRITE(GEN6_RC_CONTROL, 0); @@ -5434,16 +5572,7 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) if (ret) DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); - ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); - if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ - DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", - (dev_priv->rps.max_freq_softlimit & 0xff) * 50, - (pcu_mbox & 0xff) * 50); - dev_priv->rps.max_freq = pcu_mbox & 0xff; - } - - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); rc6vids = 0; ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); @@ -5462,7 +5591,7 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void __gen6_update_ring_freq(struct drm_i915_private *dev_priv) +static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { int min_freq = 15; unsigned int gpu_freq; @@ -5546,23 +5675,13 @@ static void __gen6_update_ring_freq(struct drm_i915_private *dev_priv) } } -void gen6_update_ring_freq(struct drm_i915_private *dev_priv) -{ - if (!HAS_CORE_RING_FREQ(dev_priv)) - return; - - mutex_lock(&dev_priv->rps.hw_lock); - __gen6_update_ring_freq(dev_priv); - mutex_unlock(&dev_priv->rps.hw_lock); -} - static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) { u32 val, rp0; val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); - switch (INTEL_INFO(dev_priv)->eu_total) { + switch (INTEL_INFO(dev_priv)->sseu.eu_total) { case 8: /* (2 * 4) config */ rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT); @@ -5700,8 +5819,6 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) u32 pcbr; int pctx_size = 24*1024; - mutex_lock(&dev_priv->drm.struct_mutex); - pcbr = I915_READ(VLV_PCBR); if (pcbr) { /* BIOS set it up already, grab the pre-alloc'd space */ @@ -5737,7 +5854,6 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) out: DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); dev_priv->vlv_pctx = pctx; - mutex_unlock(&dev_priv->drm.struct_mutex); } static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) @@ -5745,7 +5861,7 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) if (WARN_ON(!dev_priv->vlv_pctx)) return; - drm_gem_object_unreference_unlocked(&dev_priv->vlv_pctx->base); + i915_gem_object_put_unlocked(dev_priv->vlv_pctx); dev_priv->vlv_pctx = NULL; } @@ -5768,8 +5884,6 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) vlv_init_gpll_ref_freq(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); switch ((val >> 6) & 3) { case 0: @@ -5805,17 +5919,6 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); - - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - - mutex_unlock(&dev_priv->rps.hw_lock); } static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) @@ -5826,8 +5929,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) vlv_init_gpll_ref_freq(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - mutex_lock(&dev_priv->sb_lock); val = vlv_cck_read(dev_priv, CCK_FUSE_REG); mutex_unlock(&dev_priv->sb_lock); @@ -5869,17 +5970,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->rps.rp1_freq | dev_priv->rps.min_freq) & 1, "Odd GPU freq values\n"); - - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - - mutex_unlock(&dev_priv->rps.hw_lock); } static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) @@ -5970,16 +6060,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); - - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - dev_priv->rps.idle_freq); - - valleyview_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, valleyview_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -6059,16 +6140,7 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); - - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - dev_priv->rps.idle_freq); - - valleyview_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, valleyview_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -6397,19 +6469,11 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower); */ bool i915_gpu_busy(void) { - struct drm_i915_private *dev_priv; - struct intel_engine_cs *engine; bool ret = false; spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) - goto out_unlock; - dev_priv = i915_mch_dev; - - for_each_engine(engine, dev_priv) - ret |= !list_empty(&engine->request_list); - -out_unlock: + if (i915_mch_dev) + ret = i915_mch_dev->gt.awake; spin_unlock_irq(&mchdev_lock); return ret; @@ -6565,30 +6629,62 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) intel_runtime_pm_get(dev_priv); } + mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->rps.hw_lock); + + /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) cherryview_init_gt_powersave(dev_priv); else if (IS_VALLEYVIEW(dev_priv)) valleyview_init_gt_powersave(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_init_rps_frequencies(dev_priv); + + /* Derive initial user preferences/limits from the hardware limits */ + dev_priv->rps.idle_freq = dev_priv->rps.min_freq; + dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; + + dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + dev_priv->rps.min_freq_softlimit = + max_t(int, + dev_priv->rps.efficient_freq, + intel_freq_opcode(dev_priv, 450)); + + /* After setting max-softlimit, find the overclock max freq */ + if (IS_GEN6(dev_priv) || + IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { + u32 params = 0; + + sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms); + if (params & BIT(31)) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (dev_priv->rps.max_freq & 0xff) * 50, + (params & 0xff) * 50); + dev_priv->rps.max_freq = params & 0xff; + } + } + + /* Finally allow us to boost to max by default */ + dev_priv->rps.boost_freq = dev_priv->rps.max_freq; + + mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->drm.struct_mutex); + + intel_autoenable_gt_powersave(dev_priv); } void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) { - if (IS_CHERRYVIEW(dev_priv)) - return; - else if (IS_VALLEYVIEW(dev_priv)) + if (IS_VALLEYVIEW(dev_priv)) valleyview_cleanup_gt_powersave(dev_priv); if (!i915.enable_rc6) intel_runtime_pm_put(dev_priv); } -static void gen6_suspend_rps(struct drm_i915_private *dev_priv) -{ - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - gen6_disable_rps_interrupts(dev_priv); -} - /** * intel_suspend_gt_powersave - suspend PM work and helper threads * @dev_priv: i915 device @@ -6602,60 +6698,76 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) < 6) return; - gen6_suspend_rps(dev_priv); + if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) + intel_runtime_pm_put(dev_priv); + + /* gen6_rps_idle() will be called later to disable interrupts */ +} + +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) +{ + dev_priv->rps.enabled = true; /* force disabling */ + intel_disable_gt_powersave(dev_priv); - /* Force GPU to min freq during suspend */ - gen6_rps_idle(dev_priv); + gen6_reset_rps_interrupts(dev_priv); } void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { - if (IS_IRONLAKE_M(dev_priv)) { - ironlake_disable_drps(dev_priv); - } else if (INTEL_INFO(dev_priv)->gen >= 6) { - intel_suspend_gt_powersave(dev_priv); + if (!READ_ONCE(dev_priv->rps.enabled)) + return; - mutex_lock(&dev_priv->rps.hw_lock); - if (INTEL_INFO(dev_priv)->gen >= 9) { - gen9_disable_rc6(dev_priv); - gen9_disable_rps(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) - cherryview_disable_rps(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_disable_rps(dev_priv); - else - gen6_disable_rps(dev_priv); + mutex_lock(&dev_priv->rps.hw_lock); - dev_priv->rps.enabled = false; - mutex_unlock(&dev_priv->rps.hw_lock); + if (INTEL_GEN(dev_priv) >= 9) { + gen9_disable_rc6(dev_priv); + gen9_disable_rps(dev_priv); + } else if (IS_CHERRYVIEW(dev_priv)) { + cherryview_disable_rps(dev_priv); + } else if (IS_VALLEYVIEW(dev_priv)) { + valleyview_disable_rps(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 6) { + gen6_disable_rps(dev_priv); + } else if (IS_IRONLAKE_M(dev_priv)) { + ironlake_disable_drps(dev_priv); } + + dev_priv->rps.enabled = false; + mutex_unlock(&dev_priv->rps.hw_lock); } -static void intel_gen6_powersave_work(struct work_struct *work) +void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, - rps.delayed_resume_work.work); + /* We shouldn't be disabling as we submit, so this should be less + * racy than it appears! + */ + if (READ_ONCE(dev_priv->rps.enabled)) + return; - mutex_lock(&dev_priv->rps.hw_lock); + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + return; - gen6_reset_rps_interrupts(dev_priv); + mutex_lock(&dev_priv->rps.hw_lock); if (IS_CHERRYVIEW(dev_priv)) { cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_enable_rps(dev_priv); - } else if (INTEL_INFO(dev_priv)->gen >= 9) { + } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - __gen6_update_ring_freq(dev_priv); + gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); - __gen6_update_ring_freq(dev_priv); - } else { + gen6_update_ring_freq(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 6) { gen6_enable_rps(dev_priv); - __gen6_update_ring_freq(dev_priv); + gen6_update_ring_freq(dev_priv); + } else if (IS_IRONLAKE_M(dev_priv)) { + ironlake_enable_drps(dev_priv); + intel_init_emon(dev_priv); } WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); @@ -6665,25 +6777,52 @@ static void intel_gen6_powersave_work(struct work_struct *work) WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); dev_priv->rps.enabled = true; + mutex_unlock(&dev_priv->rps.hw_lock); +} - gen6_enable_rps_interrupts(dev_priv); +static void __intel_autoenable_gt_powersave(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), rps.autoenable_work.work); + struct intel_engine_cs *rcs; + struct drm_i915_gem_request *req; - mutex_unlock(&dev_priv->rps.hw_lock); + if (READ_ONCE(dev_priv->rps.enabled)) + goto out; + + rcs = &dev_priv->engine[RCS]; + if (rcs->last_context) + goto out; + + if (!rcs->init_context) + goto out; + + mutex_lock(&dev_priv->drm.struct_mutex); + + req = i915_gem_request_alloc(rcs, dev_priv->kernel_context); + if (IS_ERR(req)) + goto unlock; + if (!i915.enable_execlists && i915_switch_context(req) == 0) + rcs->init_context(req); + + /* Mark the device busy, calling intel_enable_gt_powersave() */ + i915_add_request_no_flush(req); + +unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); +out: intel_runtime_pm_put(dev_priv); } -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) +void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) { - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev_priv)) + if (READ_ONCE(dev_priv->rps.enabled)) return; if (IS_IRONLAKE_M(dev_priv)) { ironlake_enable_drps(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); intel_init_emon(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); } else if (INTEL_INFO(dev_priv)->gen >= 6) { /* * PCU communication is slow and this doesn't need to be @@ -6697,21 +6836,13 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) * paths, so the _noresume version is enough (and in case of * runtime resume it's necessary). */ - if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, - round_jiffies_up_relative(HZ))) + if (queue_delayed_work(dev_priv->wq, + &dev_priv->rps.autoenable_work, + round_jiffies_up_relative(HZ))) intel_runtime_pm_get_noresume(dev_priv); } } -void intel_reset_gt_powersave(struct drm_i915_private *dev_priv) -{ - if (INTEL_INFO(dev_priv)->gen < 6) - return; - - gen6_suspend_rps(dev_priv); - dev_priv->rps.enabled = false; -} - static void ibx_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -7657,8 +7788,54 @@ void intel_init_pm(struct drm_device *dev) } } +static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) +{ + uint32_t flags = + I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; + + switch (flags) { + case GEN6_PCODE_SUCCESS: + return 0; + case GEN6_PCODE_UNIMPLEMENTED_CMD: + case GEN6_PCODE_ILLEGAL_CMD: + return -ENXIO; + case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + return -EOVERFLOW; + case GEN6_PCODE_TIMEOUT: + return -ETIMEDOUT; + default: + MISSING_CASE(flags) + return 0; + } +} + +static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv) +{ + uint32_t flags = + I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; + + switch (flags) { + case GEN6_PCODE_SUCCESS: + return 0; + case GEN6_PCODE_ILLEGAL_CMD: + return -ENXIO; + case GEN7_PCODE_TIMEOUT: + return -ETIMEDOUT; + case GEN7_PCODE_ILLEGAL_DATA: + return -EINVAL; + case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + return -EOVERFLOW; + default: + MISSING_CASE(flags); + return 0; + } +} + int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) { + int status; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can @@ -7685,12 +7862,25 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val *val = I915_READ_FW(GEN6_PCODE_DATA); I915_WRITE_FW(GEN6_PCODE_DATA, 0); + if (INTEL_GEN(dev_priv) > 6) + status = gen7_check_mailbox_status(dev_priv); + else + status = gen6_check_mailbox_status(dev_priv); + + if (status) { + DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed: %d\n", + status); + return status; + } + return 0; } int sandybridge_pcode_write(struct drm_i915_private *dev_priv, - u32 mbox, u32 val) + u32 mbox, u32 val) { + int status; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can @@ -7715,6 +7905,17 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, I915_WRITE_FW(GEN6_PCODE_DATA, 0); + if (INTEL_GEN(dev_priv) > 6) + status = gen7_check_mailbox_status(dev_priv); + else + status = gen6_check_mailbox_status(dev_priv); + + if (status) { + DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed: %d\n", + status); + return status; + } + return 0; } @@ -7786,7 +7987,7 @@ static void __intel_rps_boost_work(struct work_struct *work) if (!i915_gem_request_completed(req)) gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); - i915_gem_request_unreference(req); + i915_gem_request_put(req); kfree(boost); } @@ -7804,8 +8005,7 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) if (boost == NULL) return; - i915_gem_request_reference(req); - boost->req = req; + boost->req = i915_gem_request_get(req); INIT_WORK(&boost->work, __intel_rps_boost_work); queue_work(req->i915->wq, &boost->work); @@ -7818,11 +8018,9 @@ void intel_pm_setup(struct drm_device *dev) mutex_init(&dev_priv->rps.hw_lock); spin_lock_init(&dev_priv->rps.client_lock); - INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, - intel_gen6_powersave_work); + INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, + __intel_autoenable_gt_powersave); INIT_LIST_HEAD(&dev_priv->rps.clients); - INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); - INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link); dev_priv->pm.suspended = false; atomic_set(&dev_priv->pm.wakeref_count, 0); diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 2b0d1baf15b3..108ba1e5d658 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -255,14 +255,14 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); uint32_t max_sleep_time = 0x1f; - /* Lately it was identified that depending on panel idle frame count - * calculated at HW can be off by 1. So let's use what came - * from VBT + 1. - * There are also other cases where panel demands at least 4 - * but VBT is not being set. To cover these 2 cases lets use - * at least 5 when VBT isn't set to be on the safest side. + /* + * Let's respect VBT in case VBT asks a higher idle_frame value. + * Let's use 6 as the minimum to cover all known cases including + * the off-by-one issue that HW has in some cases. Also there are + * cases where sink should be able to train + * with the 5 or 6 idle patterns. */ - uint32_t idle_frames = dev_priv->vbt.psr.idle_frames + 1; + uint32_t idle_frames = max(6, dev_priv->vbt.psr.idle_frames); uint32_t val = EDP_PSR_ENABLE; val |= max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT; @@ -645,9 +645,8 @@ unlock: mutex_unlock(&dev_priv->psr.lock); } -static void intel_psr_exit(struct drm_device *dev) +static void intel_psr_exit(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_dp *intel_dp = dev_priv->psr.enabled; struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc; enum pipe pipe = to_intel_crtc(crtc)->pipe; @@ -656,7 +655,7 @@ static void intel_psr_exit(struct drm_device *dev) if (!dev_priv->psr.active) return; - if (HAS_DDI(dev)) { + if (HAS_DDI(dev_priv)) { val = I915_READ(EDP_PSR_CTL); WARN_ON(!(val & EDP_PSR_ENABLE)); @@ -691,7 +690,7 @@ static void intel_psr_exit(struct drm_device *dev) /** * intel_psr_single_frame_update - Single Frame Update - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * Some platforms support a single frame update feature that is used to @@ -699,10 +698,9 @@ static void intel_psr_exit(struct drm_device *dev) * So far it is only implemented for Valleyview and Cherryview because * hardware requires this to be done before a page flip. */ -void intel_psr_single_frame_update(struct drm_device *dev, +void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; u32 val; @@ -711,7 +709,7 @@ void intel_psr_single_frame_update(struct drm_device *dev, * Single frame update is already supported on BDW+ but it requires * many W/A and it isn't really needed. */ - if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) + if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) return; mutex_lock(&dev_priv->psr.lock); @@ -737,7 +735,7 @@ void intel_psr_single_frame_update(struct drm_device *dev, /** * intel_psr_invalidate - Invalidade PSR - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * Since the hardware frontbuffer tracking has gaps we need to integrate @@ -747,10 +745,9 @@ void intel_psr_single_frame_update(struct drm_device *dev, * * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits." */ -void intel_psr_invalidate(struct drm_device *dev, +void intel_psr_invalidate(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@ -767,14 +764,14 @@ void intel_psr_invalidate(struct drm_device *dev, dev_priv->psr.busy_frontbuffer_bits |= frontbuffer_bits; if (frontbuffer_bits) - intel_psr_exit(dev); + intel_psr_exit(dev_priv); mutex_unlock(&dev_priv->psr.lock); } /** * intel_psr_flush - Flush PSR - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * @origin: which operation caused the flush * @@ -785,10 +782,9 @@ void intel_psr_invalidate(struct drm_device *dev, * * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits. */ -void intel_psr_flush(struct drm_device *dev, +void intel_psr_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@ -806,7 +802,7 @@ void intel_psr_flush(struct drm_device *dev, /* By definition flush = invalidate + flush */ if (frontbuffer_bits) - intel_psr_exit(dev); + intel_psr_exit(dev_priv); if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits) if (!work_busy(&dev_priv->psr.work.work)) diff --git a/drivers/gpu/drm/i915/intel_renderstate.h b/drivers/gpu/drm/i915/intel_renderstate.h index 5bd69852752c..08f6fea05a2c 100644 --- a/drivers/gpu/drm/i915/intel_renderstate.h +++ b/drivers/gpu/drm/i915/intel_renderstate.h @@ -24,12 +24,13 @@ #ifndef _INTEL_RENDERSTATE_H #define _INTEL_RENDERSTATE_H -#include "i915_drv.h" +#include <linux/types.h> -extern const struct intel_renderstate_rodata gen6_null_state; -extern const struct intel_renderstate_rodata gen7_null_state; -extern const struct intel_renderstate_rodata gen8_null_state; -extern const struct intel_renderstate_rodata gen9_null_state; +struct intel_renderstate_rodata { + const u32 *reloc; + const u32 *batch; + const u32 batch_items; +}; #define RO_RENDERSTATE(_g) \ const struct intel_renderstate_rodata gen ## _g ## _null_state = { \ @@ -38,4 +39,9 @@ extern const struct intel_renderstate_rodata gen9_null_state; .batch_items = sizeof(gen ## _g ## _null_state_batch)/4, \ } +extern const struct intel_renderstate_rodata gen6_null_state; +extern const struct intel_renderstate_rodata gen7_null_state; +extern const struct intel_renderstate_rodata gen8_null_state; +extern const struct intel_renderstate_rodata gen9_null_state; + #endif /* INTEL_RENDERSTATE_H */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cca7792f26d5..ed9955dce156 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -47,57 +47,44 @@ int __intel_ring_space(int head, int tail, int size) return space - I915_RING_FREE_SPACE; } -void intel_ring_update_space(struct intel_ringbuffer *ringbuf) +void intel_ring_update_space(struct intel_ring *ring) { - if (ringbuf->last_retired_head != -1) { - ringbuf->head = ringbuf->last_retired_head; - ringbuf->last_retired_head = -1; + if (ring->last_retired_head != -1) { + ring->head = ring->last_retired_head; + ring->last_retired_head = -1; } - ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR, - ringbuf->tail, ringbuf->size); -} - -static void __intel_ring_advance(struct intel_engine_cs *engine) -{ - struct intel_ringbuffer *ringbuf = engine->buffer; - ringbuf->tail &= ringbuf->size - 1; - engine->write_tail(engine, ringbuf->tail); + ring->space = __intel_ring_space(ring->head & HEAD_ADDR, + ring->tail, ring->size); } static int -gen2_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains) +gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; u32 cmd; int ret; cmd = MI_FLUSH; - if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) - cmd |= MI_NO_WRITE_FLUSH; - if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) + if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, cmd); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } static int -gen4_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains) +gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; u32 cmd; int ret; @@ -129,23 +116,20 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, * are flushed at any MI_FLUSH. */ - cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; - if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) - cmd &= ~MI_NO_WRITE_FLUSH; - if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) + cmd = MI_FLUSH; + if (mode & EMIT_INVALIDATE) { cmd |= MI_EXE_FLUSH; - - if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && - (IS_G4X(req->i915) || IS_GEN5(req->i915))) - cmd |= MI_INVALIDATE_ISP; + if (IS_G4X(req->i915) || IS_GEN5(req->i915)) + cmd |= MI_INVALIDATE_ISP; + } ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, cmd); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -190,45 +174,46 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, static int intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + struct intel_ring *ring = req->ring; + u32 scratch_addr = + i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; int ret; ret = intel_ring_begin(req, 6); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(engine, PIPE_CONTROL_CS_STALL | + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); + intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ - intel_ring_emit(engine, 0); /* low dword */ - intel_ring_emit(engine, 0); /* high dword */ - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); /* low dword */ + intel_ring_emit(ring, 0); /* high dword */ + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); ret = intel_ring_begin(req, 6); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(engine, PIPE_CONTROL_QW_WRITE); - intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ - intel_ring_emit(engine, 0); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); + intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); + intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } static int -gen6_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, u32 flush_domains) +gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; + u32 scratch_addr = + i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; u32 flags = 0; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ @@ -240,7 +225,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, * number of bits based on the write domains has little performance * impact. */ - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* @@ -249,7 +234,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, */ flags |= PIPE_CONTROL_CS_STALL; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -266,11 +251,11 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(engine, flags); - intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(engine, 0); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); return 0; } @@ -278,30 +263,31 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, static int gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 4); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(engine, PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, 0); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); return 0; } static int -gen7_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, u32 flush_domains) +gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; + u32 scratch_addr = + i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; u32 flags = 0; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; /* @@ -318,13 +304,13 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, * number of bits based on the write domains has little performance * impact. */ - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -350,11 +336,11 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(engine, flags); - intel_ring_emit(engine, scratch_addr); - intel_ring_emit(engine, 0); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); return 0; } @@ -363,41 +349,41 @@ static int gen8_emit_pipe_control(struct drm_i915_gem_request *req, u32 flags, u32 scratch_addr) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 6); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(engine, flags); - intel_ring_emit(engine, scratch_addr); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, 0); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); return 0; } static int -gen8_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, u32 flush_domains) +gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { + u32 scratch_addr = + i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; u32 flags = 0; - u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; flags |= PIPE_CONTROL_CS_STALL; - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -419,14 +405,7 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, return gen8_emit_pipe_control(req, flags, scratch_addr); } -static void ring_write_tail(struct intel_engine_cs *engine, - u32 value) -{ - struct drm_i915_private *dev_priv = engine->i915; - I915_WRITE_TAIL(engine, value); -} - -u64 intel_ring_get_active_head(struct intel_engine_cs *engine) +u64 intel_engine_get_active_head(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; u64 acthd; @@ -488,7 +467,7 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine) mmio = RING_HWS_PGA(engine->mmio_base); } - I915_WRITE(mmio, (u32)engine->status_page.gfx_addr); + I915_WRITE(mmio, engine->status_page.ggtt_offset); POSTING_READ(mmio); /* @@ -519,7 +498,7 @@ static bool stop_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (!IS_GEN2(dev_priv)) { + if (INTEL_GEN(dev_priv) > 2) { I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); if (intel_wait_for_register(dev_priv, RING_MI_MODE(engine->mmio_base), @@ -539,9 +518,9 @@ static bool stop_ring(struct intel_engine_cs *engine) I915_WRITE_CTL(engine, 0); I915_WRITE_HEAD(engine, 0); - engine->write_tail(engine, 0); + I915_WRITE_TAIL(engine, 0); - if (!IS_GEN2(dev_priv)) { + if (INTEL_GEN(dev_priv) > 2) { (void)I915_READ_CTL(engine); I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); } @@ -549,16 +528,10 @@ static bool stop_ring(struct intel_engine_cs *engine) return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0; } -void intel_engine_init_hangcheck(struct intel_engine_cs *engine) -{ - memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); -} - static int init_ring_common(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - struct intel_ringbuffer *ringbuf = engine->buffer; - struct drm_i915_gem_object *obj = ringbuf->obj; + struct intel_ring *ring = engine->buffer; int ret = 0; intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -586,10 +559,12 @@ static int init_ring_common(struct intel_engine_cs *engine) } } - if (I915_NEED_GFX_HWS(dev_priv)) - intel_ring_setup_status_page(engine); - else + if (HWS_NEEDS_PHYSICAL(dev_priv)) ring_setup_phys_status_page(engine); + else + intel_ring_setup_status_page(engine); + + intel_engine_reset_breadcrumbs(engine); /* Enforce ordering by reading HEAD register back */ I915_READ_HEAD(engine); @@ -598,40 +573,39 @@ static int init_ring_common(struct intel_engine_cs *engine) * registers with the above sequence (the readback of the HEAD registers * also enforces ordering), otherwise the hw might lose the new ring * register values. */ - I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj)); + I915_WRITE_START(engine, i915_ggtt_offset(ring->vma)); /* WaClearRingBufHeadRegAtInit:ctg,elk */ if (I915_READ_HEAD(engine)) DRM_DEBUG("%s initialization failed [head=%08x], fudging\n", engine->name, I915_READ_HEAD(engine)); - I915_WRITE_HEAD(engine, 0); - (void)I915_READ_HEAD(engine); + + intel_ring_update_space(ring); + I915_WRITE_HEAD(engine, ring->head); + I915_WRITE_TAIL(engine, ring->tail); + (void)I915_READ_TAIL(engine); I915_WRITE_CTL(engine, - ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) + ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); /* If the head is still not zero, the ring is dead */ - if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 && - I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) && - (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) { + if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base), + RING_VALID, RING_VALID, + 50)) { DRM_ERROR("%s initialization failed " - "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n", + "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", engine->name, I915_READ_CTL(engine), I915_READ_CTL(engine) & RING_VALID, - I915_READ_HEAD(engine), I915_READ_TAIL(engine), + I915_READ_HEAD(engine), ring->head, + I915_READ_TAIL(engine), ring->tail, I915_READ_START(engine), - (unsigned long)i915_gem_obj_ggtt_offset(obj)); + i915_ggtt_offset(ring->vma)); ret = -EIO; goto out; } - ringbuf->last_retired_head = -1; - ringbuf->head = I915_READ_HEAD(engine); - ringbuf->tail = I915_READ_TAIL(engine) & TAIL_ADDR; - intel_ring_update_space(ringbuf); - intel_engine_init_hangcheck(engine); out: @@ -640,59 +614,25 @@ out: return ret; } -void intel_fini_pipe_control(struct intel_engine_cs *engine) -{ - if (engine->scratch.obj == NULL) - return; - - i915_gem_object_ggtt_unpin(engine->scratch.obj); - drm_gem_object_unreference(&engine->scratch.obj->base); - engine->scratch.obj = NULL; -} - -int intel_init_pipe_control(struct intel_engine_cs *engine, int size) +static void reset_ring_common(struct intel_engine_cs *engine, + struct drm_i915_gem_request *request) { - struct drm_i915_gem_object *obj; - int ret; - - WARN_ON(engine->scratch.obj); + struct intel_ring *ring = request->ring; - obj = i915_gem_object_create_stolen(&engine->i915->drm, size); - if (!obj) - obj = i915_gem_object_create(&engine->i915->drm, size); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate scratch page\n"); - ret = PTR_ERR(obj); - goto err; - } - - ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_HIGH); - if (ret) - goto err_unref; - - engine->scratch.obj = obj; - engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); - DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", - engine->name, engine->scratch.gtt_offset); - return 0; - -err_unref: - drm_gem_object_unreference(&engine->scratch.obj->base); -err: - return ret; + ring->head = request->postfix; + ring->last_retired_head = -1; } static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; int ret, i; if (w->count == 0) return 0; - engine->gpu_caches_dirty = true; - ret = intel_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -700,17 +640,16 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(w->count)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(engine, w->reg[i].addr); - intel_ring_emit(engine, w->reg[i].value); + intel_ring_emit_reg(ring, w->reg[i].addr); + intel_ring_emit(ring, w->reg[i].value); } - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(engine); + intel_ring_advance(ring); - engine->gpu_caches_dirty = true; - ret = intel_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1022,7 +961,7 @@ static int skl_tune_iz_hashing(struct intel_engine_cs *engine) * Only consider slices where one, and only one, subslice has 7 * EUs */ - if (!is_power_of_2(dev_priv->info.subslice_7eu[i])) + if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) continue; /* @@ -1031,7 +970,7 @@ static int skl_tune_iz_hashing(struct intel_engine_cs *engine) * * -> 0 <= ss <= 3; */ - ss = ffs(dev_priv->info.subslice_7eu[i]) - 1; + ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; vals[i] = 3 - ss; } @@ -1178,8 +1117,8 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2)); - /* WaInsertDummyPushConstPs:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + /* WaToEnableHwFixForPushConstHWBug:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -1222,8 +1161,8 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_RO_PERF_DIS); - /* WaInsertDummyPushConstPs:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -1329,191 +1268,194 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (dev_priv->semaphore_obj) { - i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj); - drm_gem_object_unreference(&dev_priv->semaphore_obj->base); - dev_priv->semaphore_obj = NULL; - } - - intel_fini_pipe_control(engine); + i915_vma_unpin_and_release(&dev_priv->semaphore); } -static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, - unsigned int num_dwords) +static int gen8_rcs_signal(struct drm_i915_gem_request *req) { -#define MBOX_UPDATE_DWORDS 8 - struct intel_engine_cs *signaller = signaller_req->engine; - struct drm_i915_private *dev_priv = signaller_req->i915; + struct intel_ring *ring = req->ring; + struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; int ret, num_rings; - num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); - num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; -#undef MBOX_UPDATE_DWORDS - - ret = intel_ring_begin(signaller_req, num_dwords); + num_rings = INTEL_INFO(dev_priv)->num_rings; + ret = intel_ring_begin(req, (num_rings-1) * 8); if (ret) return ret; for_each_engine_id(waiter, dev_priv, id) { - u64 gtt_offset = signaller->semaphore.signal_ggtt[id]; + u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); - intel_ring_emit(signaller, lower_32_bits(gtt_offset)); - intel_ring_emit(signaller, upper_32_bits(gtt_offset)); - intel_ring_emit(signaller, signaller_req->seqno); - intel_ring_emit(signaller, 0); - intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - intel_ring_emit(signaller, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL); + intel_ring_emit(ring, lower_32_bits(gtt_offset)); + intel_ring_emit(ring, upper_32_bits(gtt_offset)); + intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, + MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id)); + intel_ring_emit(ring, 0); } + intel_ring_advance(ring); return 0; } -static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, - unsigned int num_dwords) +static int gen8_xcs_signal(struct drm_i915_gem_request *req) { -#define MBOX_UPDATE_DWORDS 6 - struct intel_engine_cs *signaller = signaller_req->engine; - struct drm_i915_private *dev_priv = signaller_req->i915; + struct intel_ring *ring = req->ring; + struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; int ret, num_rings; - num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); - num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; -#undef MBOX_UPDATE_DWORDS - - ret = intel_ring_begin(signaller_req, num_dwords); + num_rings = INTEL_INFO(dev_priv)->num_rings; + ret = intel_ring_begin(req, (num_rings-1) * 6); if (ret) return ret; for_each_engine_id(waiter, dev_priv, id) { - u64 gtt_offset = signaller->semaphore.signal_ggtt[id]; + u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | - MI_FLUSH_DW_OP_STOREDW); - intel_ring_emit(signaller, lower_32_bits(gtt_offset) | - MI_FLUSH_DW_USE_GTT); - intel_ring_emit(signaller, upper_32_bits(gtt_offset)); - intel_ring_emit(signaller, signaller_req->seqno); - intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - intel_ring_emit(signaller, 0); + intel_ring_emit(ring, + (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); + intel_ring_emit(ring, + lower_32_bits(gtt_offset) | + MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, upper_32_bits(gtt_offset)); + intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, + MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id)); + intel_ring_emit(ring, 0); } + intel_ring_advance(ring); return 0; } -static int gen6_signal(struct drm_i915_gem_request *signaller_req, - unsigned int num_dwords) +static int gen6_signal(struct drm_i915_gem_request *req) { - struct intel_engine_cs *signaller = signaller_req->engine; - struct drm_i915_private *dev_priv = signaller_req->i915; - struct intel_engine_cs *useless; - enum intel_engine_id id; + struct intel_ring *ring = req->ring; + struct drm_i915_private *dev_priv = req->i915; + struct intel_engine_cs *engine; int ret, num_rings; -#define MBOX_UPDATE_DWORDS 3 - num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); - num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2); -#undef MBOX_UPDATE_DWORDS - - ret = intel_ring_begin(signaller_req, num_dwords); + num_rings = INTEL_INFO(dev_priv)->num_rings; + ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2)); if (ret) return ret; - for_each_engine_id(useless, dev_priv, id) { - i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[id]; + for_each_engine(engine, dev_priv) { + i915_reg_t mbox_reg; + + if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK)) + continue; + mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { - intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(signaller, mbox_reg); - intel_ring_emit(signaller, signaller_req->seqno); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, mbox_reg); + intel_ring_emit(ring, req->fence.seqno); } } /* If num_dwords was rounded, make sure the tail pointer is correct */ if (num_rings % 2 == 0) - intel_ring_emit(signaller, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + + return 0; +} + +static void i9xx_submit_request(struct drm_i915_gem_request *request) +{ + struct drm_i915_private *dev_priv = request->i915; + + I915_WRITE_TAIL(request->engine, + intel_ring_offset(request->ring, request->tail)); +} + +static int i9xx_emit_request(struct drm_i915_gem_request *req) +{ + struct intel_ring *ring = req->ring; + int ret; + + ret = intel_ring_begin(req, 4); + if (ret) + return ret; + + intel_ring_emit(ring, MI_STORE_DWORD_INDEX); + intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); + intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_advance(ring); + + req->tail = ring->tail; return 0; } /** - * gen6_add_request - Update the semaphore mailbox registers + * gen6_sema_emit_request - Update the semaphore mailbox registers * * @request - request to write to the ring * * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static int -gen6_add_request(struct drm_i915_gem_request *req) +static int gen6_sema_emit_request(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; int ret; - if (engine->semaphore.signal) - ret = engine->semaphore.signal(req, 4); - else - ret = intel_ring_begin(req, 4); - + ret = req->engine->semaphore.signal(req); if (ret) return ret; - intel_ring_emit(engine, MI_STORE_DWORD_INDEX); - intel_ring_emit(engine, - I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(engine, req->seqno); - intel_ring_emit(engine, MI_USER_INTERRUPT); - __intel_ring_advance(engine); - - return 0; + return i9xx_emit_request(req); } -static int -gen8_render_add_request(struct drm_i915_gem_request *req) +static int gen8_render_emit_request(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; - if (engine->semaphore.signal) - ret = engine->semaphore.signal(req, 8); - else - ret = intel_ring_begin(req, 8); + if (engine->semaphore.signal) { + ret = engine->semaphore.signal(req); + if (ret) + return ret; + } + + ret = intel_ring_begin(req, 8); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(engine, (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - intel_ring_emit(engine, intel_hws_seqno_address(req->engine)); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, i915_gem_request_get_seqno(req)); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, (PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE)); + intel_ring_emit(ring, intel_hws_seqno_address(engine)); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); /* We're thrashing one dword of HWS. */ - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_USER_INTERRUPT); - intel_ring_emit(engine, MI_NOOP); - __intel_ring_advance(engine); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); - return 0; -} + req->tail = ring->tail; -static inline bool i915_gem_has_seqno_wrapped(struct drm_i915_private *dev_priv, - u32 seqno) -{ - return dev_priv->last_seqno < seqno; + return 0; } /** @@ -1525,82 +1467,71 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_i915_private *dev_priv, */ static int -gen8_ring_sync(struct drm_i915_gem_request *waiter_req, - struct intel_engine_cs *signaller, - u32 seqno) +gen8_ring_sync_to(struct drm_i915_gem_request *req, + struct drm_i915_gem_request *signal) { - struct intel_engine_cs *waiter = waiter_req->engine; - struct drm_i915_private *dev_priv = waiter_req->i915; - u64 offset = GEN8_WAIT_OFFSET(waiter, signaller->id); + struct intel_ring *ring = req->ring; + struct drm_i915_private *dev_priv = req->i915; + u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id); struct i915_hw_ppgtt *ppgtt; int ret; - ret = intel_ring_begin(waiter_req, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; - intel_ring_emit(waiter, MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_SAD_GTE_SDD); - intel_ring_emit(waiter, seqno); - intel_ring_emit(waiter, lower_32_bits(offset)); - intel_ring_emit(waiter, upper_32_bits(offset)); - intel_ring_advance(waiter); + intel_ring_emit(ring, + MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_SAD_GTE_SDD); + intel_ring_emit(ring, signal->fence.seqno); + intel_ring_emit(ring, lower_32_bits(offset)); + intel_ring_emit(ring, upper_32_bits(offset)); + intel_ring_advance(ring); /* When the !RCS engines idle waiting upon a semaphore, they lose their * pagetables and we must reload them before executing the batch. * We do this on the i915_switch_context() following the wait and * before the dispatch. */ - ppgtt = waiter_req->ctx->ppgtt; - if (ppgtt && waiter_req->engine->id != RCS) - ppgtt->pd_dirty_rings |= intel_engine_flag(waiter_req->engine); + ppgtt = req->ctx->ppgtt; + if (ppgtt && req->engine->id != RCS) + ppgtt->pd_dirty_rings |= intel_engine_flag(req->engine); return 0; } static int -gen6_ring_sync(struct drm_i915_gem_request *waiter_req, - struct intel_engine_cs *signaller, - u32 seqno) +gen6_ring_sync_to(struct drm_i915_gem_request *req, + struct drm_i915_gem_request *signal) { - struct intel_engine_cs *waiter = waiter_req->engine; + struct intel_ring *ring = req->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; - u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id]; + u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; int ret; - /* Throughout all of the GEM code, seqno passed implies our current - * seqno is >= the last seqno executed. However for hardware the - * comparison is strictly greater than. - */ - seqno -= 1; - WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - ret = intel_ring_begin(waiter_req, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; - /* If seqno wrap happened, omit the wait with no-ops */ - if (likely(!i915_gem_has_seqno_wrapped(waiter_req->i915, seqno))) { - intel_ring_emit(waiter, dw1 | wait_mbox); - intel_ring_emit(waiter, seqno); - intel_ring_emit(waiter, 0); - intel_ring_emit(waiter, MI_NOOP); - } else { - intel_ring_emit(waiter, MI_NOOP); - intel_ring_emit(waiter, MI_NOOP); - intel_ring_emit(waiter, MI_NOOP); - intel_ring_emit(waiter, MI_NOOP); - } - intel_ring_advance(waiter); + intel_ring_emit(ring, dw1 | wait_mbox); + /* Throughout all of the GEM code, seqno passed implies our current + * seqno is >= the last seqno executed. However for hardware the + * comparison is strictly greater than. + */ + intel_ring_emit(ring, signal->fence.seqno - 1); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } static void -gen5_seqno_barrier(struct intel_engine_cs *ring) +gen5_seqno_barrier(struct intel_engine_cs *engine) { /* MI_STORE are internally buffered by the GPU and not flushed * either by MI_FLUSH or SyncFlush or any other combination of @@ -1693,40 +1624,18 @@ i8xx_irq_disable(struct intel_engine_cs *engine) } static int -bsd_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains) +bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, MI_FLUSH); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); - return 0; -} - -static int -i9xx_add_request(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - int ret; - - ret = intel_ring_begin(req, 4); - if (ret) - return ret; - - intel_ring_emit(engine, MI_STORE_DWORD_INDEX); - intel_ring_emit(engine, - I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(engine, req->seqno); - intel_ring_emit(engine, MI_USER_INTERRUPT); - __intel_ring_advance(engine); - + intel_ring_emit(ring, MI_FLUSH); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -1788,24 +1697,24 @@ gen8_irq_disable(struct intel_engine_cs *engine) } static int -i965_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 length, - unsigned dispatch_flags) +i965_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 length, + unsigned int dispatch_flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, + intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); - intel_ring_emit(engine, offset); - intel_ring_advance(engine); + intel_ring_emit(ring, offset); + intel_ring_advance(ring); return 0; } @@ -1815,12 +1724,12 @@ i965_dispatch_execbuffer(struct drm_i915_gem_request *req, #define I830_TLB_ENTRIES (2) #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) static int -i830_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +i830_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_engine_cs *engine = req->engine; - u32 cs_offset = engine->scratch.gtt_offset; + struct intel_ring *ring = req->ring; + u32 cs_offset = i915_ggtt_offset(req->engine->scratch); int ret; ret = intel_ring_begin(req, 6); @@ -1828,13 +1737,13 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, return ret; /* Evict the invalid PTE TLBs */ - intel_ring_emit(engine, COLOR_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(engine, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); - intel_ring_emit(engine, I830_TLB_ENTRIES << 16 | 4); /* load each page */ - intel_ring_emit(engine, cs_offset); - intel_ring_emit(engine, 0xdeadbeef); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA); + intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); + intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */ + intel_ring_emit(ring, cs_offset); + intel_ring_emit(ring, 0xdeadbeef); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) @@ -1848,17 +1757,17 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, * stable batch scratch bo area (so that the CS never * stumbles over its tlb invalidation bug) ... */ - intel_ring_emit(engine, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(engine, + intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); + intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); - intel_ring_emit(engine, DIV_ROUND_UP(len, 4096) << 16 | 4096); - intel_ring_emit(engine, cs_offset); - intel_ring_emit(engine, 4096); - intel_ring_emit(engine, offset); + intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096); + intel_ring_emit(ring, cs_offset); + intel_ring_emit(ring, 4096); + intel_ring_emit(ring, offset); - intel_ring_emit(engine, MI_FLUSH); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_FLUSH); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); /* ... and execute it. */ offset = cs_offset; @@ -1868,30 +1777,30 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(engine, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); + intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE)); + intel_ring_advance(ring); return 0; } static int -i915_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +i915_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(engine, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); + intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE)); + intel_ring_advance(ring); return 0; } @@ -1909,79 +1818,79 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine) static void cleanup_status_page(struct intel_engine_cs *engine) { - struct drm_i915_gem_object *obj; + struct i915_vma *vma; - obj = engine->status_page.obj; - if (obj == NULL) + vma = fetch_and_zero(&engine->status_page.vma); + if (!vma) return; - kunmap(sg_page(obj->pages->sgl)); - i915_gem_object_ggtt_unpin(obj); - drm_gem_object_unreference(&obj->base); - engine->status_page.obj = NULL; + i915_vma_unpin(vma); + i915_gem_object_unpin_map(vma->obj); + i915_vma_put(vma); } static int init_status_page(struct intel_engine_cs *engine) { - struct drm_i915_gem_object *obj = engine->status_page.obj; - - if (obj == NULL) { - unsigned flags; - int ret; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags; + int ret; - obj = i915_gem_object_create(&engine->i915->drm, 4096); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate status page\n"); - return PTR_ERR(obj); - } + obj = i915_gem_object_create(&engine->i915->drm, 4096); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate status page\n"); + return PTR_ERR(obj); + } - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - if (ret) - goto err_unref; - - flags = 0; - if (!HAS_LLC(engine->i915)) - /* On g33, we cannot place HWS above 256MiB, so - * restrict its pinning to the low mappable arena. - * Though this restriction is not documented for - * gen4, gen5, or byt, they also behave similarly - * and hang if the HWS is placed at the top of the - * GTT. To generalise, it appears that all !llc - * platforms have issues with us placing the HWS - * above the mappable region (even though we never - * actualy map it). - */ - flags |= PIN_MAPPABLE; - ret = i915_gem_obj_ggtt_pin(obj, 4096, flags); - if (ret) { -err_unref: - drm_gem_object_unreference(&obj->base); - return ret; - } + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + if (ret) + goto err; - engine->status_page.obj = obj; + vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; } - engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj); - engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); - memset(engine->status_page.page_addr, 0, PAGE_SIZE); + flags = PIN_GLOBAL; + if (!HAS_LLC(engine->i915)) + /* On g33, we cannot place HWS above 256MiB, so + * restrict its pinning to the low mappable arena. + * Though this restriction is not documented for + * gen4, gen5, or byt, they also behave similarly + * and hang if the HWS is placed at the top of the + * GTT. To generalise, it appears that all !llc + * platforms have issues with us placing the HWS + * above the mappable region (even though we never + * actualy map it). + */ + flags |= PIN_MAPPABLE; + ret = i915_vma_pin(vma, 0, 4096, flags); + if (ret) + goto err; - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", - engine->name, engine->status_page.gfx_addr); + engine->status_page.vma = vma; + engine->status_page.ggtt_offset = i915_ggtt_offset(vma); + engine->status_page.page_addr = + i915_gem_object_pin_map(obj, I915_MAP_WB); + DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", + engine->name, i915_ggtt_offset(vma)); return 0; + +err: + i915_gem_object_put(obj); + return ret; } static int init_phys_status_page(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (!dev_priv->status_page_dmah) { - dev_priv->status_page_dmah = - drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); - if (!dev_priv->status_page_dmah) - return -ENOMEM; - } + dev_priv->status_page_dmah = + drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); + if (!dev_priv->status_page_dmah) + return -ENOMEM; engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr; memset(engine->status_page.page_addr, 0, PAGE_SIZE); @@ -1989,115 +1898,105 @@ static int init_phys_status_page(struct intel_engine_cs *engine) return 0; } -void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf) -{ - GEM_BUG_ON(ringbuf->vma == NULL); - GEM_BUG_ON(ringbuf->virtual_start == NULL); - - if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen) - i915_gem_object_unpin_map(ringbuf->obj); - else - i915_vma_unpin_iomap(ringbuf->vma); - ringbuf->virtual_start = NULL; - - i915_gem_object_ggtt_unpin(ringbuf->obj); - ringbuf->vma = NULL; -} - -int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, - struct intel_ringbuffer *ringbuf) +int intel_ring_pin(struct intel_ring *ring) { - struct drm_i915_gem_object *obj = ringbuf->obj; /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - unsigned flags = PIN_OFFSET_BIAS | 4096; + unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096; + enum i915_map_type map; + struct i915_vma *vma = ring->vma; void *addr; int ret; - if (HAS_LLC(dev_priv) && !obj->stolen) { - ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, flags); - if (ret) - return ret; + GEM_BUG_ON(ring->vaddr); - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - goto err_unpin; + map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC; - addr = i915_gem_object_pin_map(obj); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_unpin; - } - } else { - ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, - flags | PIN_MAPPABLE); - if (ret) - return ret; + if (vma->obj->stolen) + flags |= PIN_MAPPABLE; - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - goto err_unpin; + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + if (flags & PIN_MAPPABLE || map == I915_MAP_WC) + ret = i915_gem_object_set_to_gtt_domain(vma->obj, true); + else + ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); + if (unlikely(ret)) + return ret; + } - /* Access through the GTT requires the device to be awake. */ - assert_rpm_wakelock_held(dev_priv); + ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags); + if (unlikely(ret)) + return ret; - addr = i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj)); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_unpin; - } - } + if (i915_vma_is_map_and_fenceable(vma)) + addr = (void __force *)i915_vma_pin_iomap(vma); + else + addr = i915_gem_object_pin_map(vma->obj, map); + if (IS_ERR(addr)) + goto err; - ringbuf->virtual_start = addr; - ringbuf->vma = i915_gem_obj_to_ggtt(obj); + ring->vaddr = addr; return 0; -err_unpin: - i915_gem_object_ggtt_unpin(obj); - return ret; +err: + i915_vma_unpin(vma); + return PTR_ERR(addr); } -static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) +void intel_ring_unpin(struct intel_ring *ring) { - drm_gem_object_unreference(&ringbuf->obj->base); - ringbuf->obj = NULL; + GEM_BUG_ON(!ring->vma); + GEM_BUG_ON(!ring->vaddr); + + if (i915_vma_is_map_and_fenceable(ring->vma)) + i915_vma_unpin_iomap(ring->vma); + else + i915_gem_object_unpin_map(ring->vma->obj); + ring->vaddr = NULL; + + i915_vma_unpin(ring->vma); } -static int intel_alloc_ringbuffer_obj(struct drm_device *dev, - struct intel_ringbuffer *ringbuf) +static struct i915_vma * +intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) { struct drm_i915_gem_object *obj; + struct i915_vma *vma; - obj = NULL; - if (!HAS_LLC(dev)) - obj = i915_gem_object_create_stolen(dev, ringbuf->size); - if (obj == NULL) - obj = i915_gem_object_create(dev, ringbuf->size); + obj = i915_gem_object_create_stolen(&dev_priv->drm, size); + if (!obj) + obj = i915_gem_object_create(&dev_priv->drm, size); if (IS_ERR(obj)) - return PTR_ERR(obj); + return ERR_CAST(obj); /* mark ring buffers as read-only from GPU side by default */ obj->gt_ro = 1; - ringbuf->obj = obj; + vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err; - return 0; + return vma; + +err: + i915_gem_object_put(obj); + return vma; } -struct intel_ringbuffer * -intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size) { - struct intel_ringbuffer *ring; - int ret; + struct intel_ring *ring; + struct i915_vma *vma; + + GEM_BUG_ON(!is_power_of_2(size)); ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (ring == NULL) { - DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", - engine->name); + if (!ring) return ERR_PTR(-ENOMEM); - } ring->engine = engine; - list_add(&ring->link, &engine->buffers); + + INIT_LIST_HEAD(&ring->request_list); ring->size = size; /* Workaround an erratum on the i830 which causes a hang if @@ -2111,23 +2010,20 @@ intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) ring->last_retired_head = -1; intel_ring_update_space(ring); - ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring); - if (ret) { - DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n", - engine->name, ret); - list_del(&ring->link); + vma = intel_ring_create_vma(engine->i915, size); + if (IS_ERR(vma)) { kfree(ring); - return ERR_PTR(ret); + return ERR_CAST(vma); } + ring->vma = vma; return ring; } void -intel_ringbuffer_free(struct intel_ringbuffer *ring) +intel_ring_free(struct intel_ring *ring) { - intel_destroy_ringbuffer_obj(ring); - list_del(&ring->link); + i915_vma_put(ring->vma); kfree(ring); } @@ -2143,7 +2039,12 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, return 0; if (ce->state) { - ret = i915_gem_obj_ggtt_pin(ce->state, ctx->ggtt_alignment, 0); + ret = i915_gem_object_set_to_gtt_domain(ce->state->obj, false); + if (ret) + goto error; + + ret = i915_vma_pin(ce->state, 0, ctx->ggtt_alignment, + PIN_GLOBAL | PIN_HIGH); if (ret) goto error; } @@ -2158,7 +2059,7 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, if (ctx == ctx->i915->kernel_context) ce->initialised = true; - i915_gem_context_reference(ctx); + i915_gem_context_get(ctx); return 0; error: @@ -2177,30 +2078,25 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx, return; if (ce->state) - i915_gem_object_ggtt_unpin(ce->state); + i915_vma_unpin(ce->state); - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); } -static int intel_init_ring_buffer(struct drm_device *dev, - struct intel_engine_cs *engine) +static int intel_init_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ringbuffer *ringbuf; + struct drm_i915_private *dev_priv = engine->i915; + struct intel_ring *ring; int ret; WARN_ON(engine->buffer); - engine->i915 = dev_priv; - INIT_LIST_HEAD(&engine->active_list); - INIT_LIST_HEAD(&engine->request_list); - INIT_LIST_HEAD(&engine->execlist_queue); - INIT_LIST_HEAD(&engine->buffers); - i915_gem_batch_pool_init(dev, &engine->batch_pool); + intel_engine_setup_common(engine); + memset(engine->semaphore.sync_seqno, 0, sizeof(engine->semaphore.sync_seqno)); - ret = intel_engine_init_breadcrumbs(engine); + ret = intel_engine_init_common(engine); if (ret) goto error; @@ -2215,44 +2111,38 @@ static int intel_init_ring_buffer(struct drm_device *dev, if (ret) goto error; - ringbuf = intel_engine_create_ringbuffer(engine, 32 * PAGE_SIZE); - if (IS_ERR(ringbuf)) { - ret = PTR_ERR(ringbuf); + ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); goto error; } - engine->buffer = ringbuf; - if (I915_NEED_GFX_HWS(dev_priv)) { - ret = init_status_page(engine); + if (HWS_NEEDS_PHYSICAL(dev_priv)) { + WARN_ON(engine->id != RCS); + ret = init_phys_status_page(engine); if (ret) goto error; } else { - WARN_ON(engine->id != RCS); - ret = init_phys_status_page(engine); + ret = init_status_page(engine); if (ret) goto error; } - ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ringbuf); + ret = intel_ring_pin(ring); if (ret) { - DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", - engine->name, ret); - intel_destroy_ringbuffer_obj(ringbuf); + intel_ring_free(ring); goto error; } - - ret = i915_cmd_parser_init_ring(engine); - if (ret) - goto error; + engine->buffer = ring; return 0; error: - intel_cleanup_engine(engine); + intel_engine_cleanup(engine); return ret; } -void intel_cleanup_engine(struct intel_engine_cs *engine) +void intel_engine_cleanup(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv; @@ -2262,49 +2152,39 @@ void intel_cleanup_engine(struct intel_engine_cs *engine) dev_priv = engine->i915; if (engine->buffer) { - intel_stop_engine(engine); - WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0); + WARN_ON(INTEL_GEN(dev_priv) > 2 && + (I915_READ_MODE(engine) & MODE_IDLE) == 0); - intel_unpin_ringbuffer_obj(engine->buffer); - intel_ringbuffer_free(engine->buffer); + intel_ring_unpin(engine->buffer); + intel_ring_free(engine->buffer); engine->buffer = NULL; } if (engine->cleanup) engine->cleanup(engine); - if (I915_NEED_GFX_HWS(dev_priv)) { - cleanup_status_page(engine); - } else { + if (HWS_NEEDS_PHYSICAL(dev_priv)) { WARN_ON(engine->id != RCS); cleanup_phys_status_page(engine); + } else { + cleanup_status_page(engine); } - i915_cmd_parser_fini_ring(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); - intel_engine_fini_breadcrumbs(engine); + intel_engine_cleanup_common(engine); intel_ring_context_unpin(dev_priv->kernel_context, engine); engine->i915 = NULL; } -int intel_engine_idle(struct intel_engine_cs *engine) +void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) { - struct drm_i915_gem_request *req; - - /* Wait upon the last request to be completed */ - if (list_empty(&engine->request_list)) - return 0; + struct intel_engine_cs *engine; - req = list_entry(engine->request_list.prev, - struct drm_i915_gem_request, - list); - - /* Make sure we do not trigger any retires */ - return __i915_wait_request(req, - req->i915->mm.interruptible, - NULL, NULL); + for_each_engine(engine, dev_priv) { + engine->buffer->head = engine->buffer->tail; + engine->buffer->last_retired_head = -1; + } } int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) @@ -2317,7 +2197,7 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) */ request->reserved_space += LEGACY_REQUEST_SIZE; - request->ringbuf = request->engine->buffer; + request->ring = request->engine->buffer; ret = intel_ring_begin(request, 0); if (ret) @@ -2329,12 +2209,12 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) static int wait_for_space(struct drm_i915_gem_request *req, int bytes) { - struct intel_ringbuffer *ringbuf = req->ringbuf; - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct drm_i915_gem_request *target; + int ret; - intel_ring_update_space(ringbuf); - if (ringbuf->space >= bytes) + intel_ring_update_space(ring); + if (ring->space >= bytes) return 0; /* @@ -2348,35 +2228,37 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) */ GEM_BUG_ON(!req->reserved_space); - list_for_each_entry(target, &engine->request_list, list) { + list_for_each_entry(target, &ring->request_list, ring_link) { unsigned space; - /* - * The request queue is per-engine, so can contain requests - * from multiple ringbuffers. Here, we must ignore any that - * aren't from the ringbuffer we're considering. - */ - if (target->ringbuf != ringbuf) - continue; - /* Would completion of this request free enough space? */ - space = __intel_ring_space(target->postfix, ringbuf->tail, - ringbuf->size); + space = __intel_ring_space(target->postfix, ring->tail, + ring->size); if (space >= bytes) break; } - if (WARN_ON(&target->list == &engine->request_list)) + if (WARN_ON(&target->ring_link == &ring->request_list)) return -ENOSPC; - return i915_wait_request(target); + ret = i915_wait_request(target, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + NULL, NO_WAITBOOST); + if (ret) + return ret; + + i915_gem_request_retire_upto(target); + + intel_ring_update_space(ring); + GEM_BUG_ON(ring->space < bytes); + return 0; } int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { - struct intel_ringbuffer *ringbuf = req->ringbuf; - int remain_actual = ringbuf->size - ringbuf->tail; - int remain_usable = ringbuf->effective_size - ringbuf->tail; + struct intel_ring *ring = req->ring; + int remain_actual = ring->size - ring->tail; + int remain_usable = ring->effective_size - ring->tail; int bytes = num_dwords * sizeof(u32); int total_bytes, wait_bytes; bool need_wrap = false; @@ -2403,37 +2285,33 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) wait_bytes = total_bytes; } - if (wait_bytes > ringbuf->space) { + if (wait_bytes > ring->space) { int ret = wait_for_space(req, wait_bytes); if (unlikely(ret)) return ret; - - intel_ring_update_space(ringbuf); - if (unlikely(ringbuf->space < wait_bytes)) - return -EAGAIN; } if (unlikely(need_wrap)) { - GEM_BUG_ON(remain_actual > ringbuf->space); - GEM_BUG_ON(ringbuf->tail + remain_actual > ringbuf->size); + GEM_BUG_ON(remain_actual > ring->space); + GEM_BUG_ON(ring->tail + remain_actual > ring->size); /* Fill the tail with MI_NOOP */ - memset(ringbuf->virtual_start + ringbuf->tail, - 0, remain_actual); - ringbuf->tail = 0; - ringbuf->space -= remain_actual; + memset(ring->vaddr + ring->tail, 0, remain_actual); + ring->tail = 0; + ring->space -= remain_actual; } - ringbuf->space -= bytes; - GEM_BUG_ON(ringbuf->space < 0); + ring->space -= bytes; + GEM_BUG_ON(ring->space < 0); return 0; } /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; - int num_dwords = (engine->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + struct intel_ring *ring = req->ring; + int num_dwords = + (ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); int ret; if (num_dwords == 0) @@ -2445,61 +2323,16 @@ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) return ret; while (num_dwords--) - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } -void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno) -{ - struct drm_i915_private *dev_priv = engine->i915; - - /* Our semaphore implementation is strictly monotonic (i.e. we proceed - * so long as the semaphore value in the register/page is greater - * than the sync value), so whenever we reset the seqno, - * so long as we reset the tracking semaphore value to 0, it will - * always be before the next request's seqno. If we don't reset - * the semaphore value, then when the seqno moves backwards all - * future waits will complete instantly (causing rendering corruption). - */ - if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { - I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); - I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); - if (HAS_VEBOX(dev_priv)) - I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); - } - if (dev_priv->semaphore_obj) { - struct drm_i915_gem_object *obj = dev_priv->semaphore_obj; - struct page *page = i915_gem_object_get_dirty_page(obj, 0); - void *semaphores = kmap(page); - memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), - 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); - kunmap(page); - } - memset(engine->semaphore.sync_seqno, 0, - sizeof(engine->semaphore.sync_seqno)); - - intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - engine->last_submitted_seqno = seqno; - - engine->hangcheck.seqno = seqno; - - /* After manually advancing the seqno, fake the interrupt in case - * there are any waiters for that seqno. - */ - rcu_read_lock(); - intel_engine_wakeup(engine); - rcu_read_unlock(); -} - -static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, - u32 value) +static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) { - struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_private *dev_priv = request->i915; intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -2523,8 +2356,7 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); /* Now that the ring is fully powered up, update the tail */ - I915_WRITE_FW(RING_TAIL(engine->mmio_base), value); - POSTING_READ_FW(RING_TAIL(engine->mmio_base)); + i9xx_submit_request(request); /* Let the ring send IDLE messages to the GT again, * and so let it sleep to conserve power when idle. @@ -2535,10 +2367,9 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate, u32 flush) +static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; uint32_t cmd; int ret; @@ -2563,30 +2394,29 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, * operation is complete. This bit is only valid when the * Post-Sync Operation field is a value of 1h or 3h." */ - if (invalidate & I915_GEM_GPU_DOMAINS) + if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; - intel_ring_emit(engine, cmd); - intel_ring_emit(engine, - I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(engine, 0); /* upper addr */ - intel_ring_emit(engine, 0); /* value */ + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ } else { - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); } - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } static int -gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +gen8_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_engine_cs *engine = req->engine; - bool ppgtt = USES_PPGTT(engine->dev) && + struct intel_ring *ring = req->ring; + bool ppgtt = USES_PPGTT(req->i915) && !(dispatch_flags & I915_DISPATCH_SECURE); int ret; @@ -2595,71 +2425,70 @@ gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(engine, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(engine, lower_32_bits(offset)); - intel_ring_emit(engine, upper_32_bits(offset)); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, lower_32_bits(offset)); + intel_ring_emit(ring, upper_32_bits(offset)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } static int -hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +hsw_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, + intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(engine, offset); - intel_ring_advance(engine); + intel_ring_emit(ring, offset); + intel_ring_advance(ring); return 0; } static int -gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +gen6_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, + intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(engine, offset); - intel_ring_advance(engine); + intel_ring_emit(ring, offset); + intel_ring_advance(ring); return 0; } /* Blitter support (SandyBridge+) */ -static int gen6_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate, u32 flush) +static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; uint32_t cmd; int ret; @@ -2684,19 +2513,19 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, * operation is complete. This bit is only valid when the * Post-Sync Operation field is a value of 1h or 3h." */ - if (invalidate & I915_GEM_DOMAIN_RENDER) + if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB; - intel_ring_emit(engine, cmd); - intel_ring_emit(engine, + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(engine, 0); /* upper addr */ - intel_ring_emit(engine, 0); /* value */ + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ } else { - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); } - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } @@ -2707,38 +2536,39 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, struct drm_i915_gem_object *obj; int ret, i; - if (!i915_semaphore_is_enabled(dev_priv)) + if (!i915.semaphores) return; - if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) { + if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) { + struct i915_vma *vma; + obj = i915_gem_object_create(&dev_priv->drm, 4096); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n"); - i915.semaphores = 0; - } else { - i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK); - if (ret != 0) { - drm_gem_object_unreference(&obj->base); - DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); - i915.semaphores = 0; - } else { - dev_priv->semaphore_obj = obj; - } - } - } + if (IS_ERR(obj)) + goto err; - if (!i915_semaphore_is_enabled(dev_priv)) - return; + vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err_obj; + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto err_obj; + + ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (ret) + goto err_obj; + + dev_priv->semaphore = vma; + } if (INTEL_GEN(dev_priv) >= 8) { - u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj); + u32 offset = i915_ggtt_offset(dev_priv->semaphore); - engine->semaphore.sync_to = gen8_ring_sync; + engine->semaphore.sync_to = gen8_ring_sync_to; engine->semaphore.signal = gen8_xcs_signal; for (i = 0; i < I915_NUM_ENGINES; i++) { - u64 ring_offset; + u32 ring_offset; if (i != engine->id) ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i); @@ -2748,7 +2578,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, engine->semaphore.signal_ggtt[i] = ring_offset; } } else if (INTEL_GEN(dev_priv) >= 6) { - engine->semaphore.sync_to = gen6_ring_sync; + engine->semaphore.sync_to = gen6_ring_sync_to; engine->semaphore.signal = gen6_signal; /* @@ -2758,52 +2588,62 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, * initialized as INVALID. Gen8 will initialize the * sema between VCS2 and RCS later. */ - for (i = 0; i < I915_NUM_ENGINES; i++) { + for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) { static const struct { u32 wait_mbox; i915_reg_t mbox_reg; - } sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = { - [RCS] = { - [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, - [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, - [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, + } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = { + [RCS_HW] = { + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, }, - [VCS] = { - [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, - [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, - [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, + [VCS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, }, - [BCS] = { - [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, - [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, - [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, + [BCS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, }, - [VECS] = { - [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, - [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, - [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, + [VECS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, }, }; u32 wait_mbox; i915_reg_t mbox_reg; - if (i == engine->id || i == VCS2) { + if (i == engine->hw_id) { wait_mbox = MI_SEMAPHORE_SYNC_INVALID; mbox_reg = GEN6_NOSYNC; } else { - wait_mbox = sem_data[engine->id][i].wait_mbox; - mbox_reg = sem_data[engine->id][i].mbox_reg; + wait_mbox = sem_data[engine->hw_id][i].wait_mbox; + mbox_reg = sem_data[engine->hw_id][i].mbox_reg; } engine->semaphore.mbox.wait[i] = wait_mbox; engine->semaphore.mbox.signal[i] = mbox_reg; } } + + return; + +err_obj: + i915_gem_object_put(obj); +err: + DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n"); + i915.semaphores = 0; } static void intel_ring_init_irq(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift; + if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable = gen8_irq_enable; engine->irq_disable = gen8_irq_disable; @@ -2828,83 +2668,76 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv, static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { + intel_ring_init_irq(dev_priv, engine); + intel_ring_init_semaphores(dev_priv, engine); + engine->init_hw = init_ring_common; - engine->write_tail = ring_write_tail; + engine->reset_hw = reset_ring_common; - engine->add_request = i9xx_add_request; - if (INTEL_GEN(dev_priv) >= 6) - engine->add_request = gen6_add_request; + engine->emit_request = i9xx_emit_request; + if (i915.semaphores) + engine->emit_request = gen6_sema_emit_request; + engine->submit_request = i9xx_submit_request; if (INTEL_GEN(dev_priv) >= 8) - engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + engine->emit_bb_start = gen8_emit_bb_start; else if (INTEL_GEN(dev_priv) >= 6) - engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; + engine->emit_bb_start = gen6_emit_bb_start; else if (INTEL_GEN(dev_priv) >= 4) - engine->dispatch_execbuffer = i965_dispatch_execbuffer; + engine->emit_bb_start = i965_emit_bb_start; else if (IS_I830(dev_priv) || IS_845G(dev_priv)) - engine->dispatch_execbuffer = i830_dispatch_execbuffer; + engine->emit_bb_start = i830_emit_bb_start; else - engine->dispatch_execbuffer = i915_dispatch_execbuffer; - - intel_ring_init_irq(dev_priv, engine); - intel_ring_init_semaphores(dev_priv, engine); + engine->emit_bb_start = i915_emit_bb_start; } -int intel_init_render_ring_buffer(struct drm_device *dev) +int intel_init_render_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; + struct drm_i915_private *dev_priv = engine->i915; int ret; - engine->name = "render ring"; - engine->id = RCS; - engine->exec_id = I915_EXEC_RENDER; - engine->hw_id = 0; - engine->mmio_base = RENDER_RING_BASE; - intel_ring_default_vfuncs(dev_priv, engine); - engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; if (HAS_L3_DPF(dev_priv)) engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; if (INTEL_GEN(dev_priv) >= 8) { engine->init_context = intel_rcs_ctx_init; - engine->add_request = gen8_render_add_request; - engine->flush = gen8_render_ring_flush; - if (i915_semaphore_is_enabled(dev_priv)) + engine->emit_request = gen8_render_emit_request; + engine->emit_flush = gen8_render_ring_flush; + if (i915.semaphores) engine->semaphore.signal = gen8_rcs_signal; } else if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; - engine->flush = gen7_render_ring_flush; + engine->emit_flush = gen7_render_ring_flush; if (IS_GEN6(dev_priv)) - engine->flush = gen6_render_ring_flush; + engine->emit_flush = gen6_render_ring_flush; } else if (IS_GEN5(dev_priv)) { - engine->flush = gen4_render_ring_flush; + engine->emit_flush = gen4_render_ring_flush; } else { if (INTEL_GEN(dev_priv) < 4) - engine->flush = gen2_render_ring_flush; + engine->emit_flush = gen2_render_ring_flush; else - engine->flush = gen4_render_ring_flush; + engine->emit_flush = gen4_render_ring_flush; engine->irq_enable_mask = I915_USER_INTERRUPT; } if (IS_HASWELL(dev_priv)) - engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; + engine->emit_bb_start = hsw_emit_bb_start; engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; - ret = intel_init_ring_buffer(dev, engine); + ret = intel_init_ring_buffer(engine); if (ret) return ret; if (INTEL_GEN(dev_priv) >= 6) { - ret = intel_init_pipe_control(engine, 4096); + ret = intel_engine_create_scratch(engine, 4096); if (ret) return ret; } else if (HAS_BROKEN_CS_TLB(dev_priv)) { - ret = intel_init_pipe_control(engine, I830_WA_SIZE); + ret = intel_engine_create_scratch(engine, I830_WA_SIZE); if (ret) return ret; } @@ -2912,166 +2745,71 @@ int intel_init_render_ring_buffer(struct drm_device *dev) return 0; } -int intel_init_bsd_ring_buffer(struct drm_device *dev) +int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[VCS]; - - engine->name = "bsd ring"; - engine->id = VCS; - engine->exec_id = I915_EXEC_BSD; - engine->hw_id = 1; + struct drm_i915_private *dev_priv = engine->i915; intel_ring_default_vfuncs(dev_priv, engine); if (INTEL_GEN(dev_priv) >= 6) { - engine->mmio_base = GEN6_BSD_RING_BASE; /* gen6 bsd needs a special wa for tail updates */ if (IS_GEN6(dev_priv)) - engine->write_tail = gen6_bsd_ring_write_tail; - engine->flush = gen6_bsd_ring_flush; - if (INTEL_GEN(dev_priv) >= 8) - engine->irq_enable_mask = - GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; - else + engine->submit_request = gen6_bsd_submit_request; + engine->emit_flush = gen6_bsd_ring_flush; + if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; } else { engine->mmio_base = BSD_RING_BASE; - engine->flush = bsd_ring_flush; + engine->emit_flush = bsd_ring_flush; if (IS_GEN5(dev_priv)) engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; else engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; } - return intel_init_ring_buffer(dev, engine); + return intel_init_ring_buffer(engine); } /** * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3) */ -int intel_init_bsd2_ring_buffer(struct drm_device *dev) +int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[VCS2]; - - engine->name = "bsd2 ring"; - engine->id = VCS2; - engine->exec_id = I915_EXEC_BSD; - engine->hw_id = 4; - engine->mmio_base = GEN8_BSD2_RING_BASE; + struct drm_i915_private *dev_priv = engine->i915; intel_ring_default_vfuncs(dev_priv, engine); - engine->flush = gen6_bsd_ring_flush; - engine->irq_enable_mask = - GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + engine->emit_flush = gen6_bsd_ring_flush; - return intel_init_ring_buffer(dev, engine); + return intel_init_ring_buffer(engine); } -int intel_init_blt_ring_buffer(struct drm_device *dev) +int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[BCS]; - - engine->name = "blitter ring"; - engine->id = BCS; - engine->exec_id = I915_EXEC_BLT; - engine->hw_id = 2; - engine->mmio_base = BLT_RING_BASE; + struct drm_i915_private *dev_priv = engine->i915; intel_ring_default_vfuncs(dev_priv, engine); - engine->flush = gen6_ring_flush; - if (INTEL_GEN(dev_priv) >= 8) - engine->irq_enable_mask = - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; - else + engine->emit_flush = gen6_ring_flush; + if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - return intel_init_ring_buffer(dev, engine); + return intel_init_ring_buffer(engine); } -int intel_init_vebox_ring_buffer(struct drm_device *dev) +int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[VECS]; - - engine->name = "video enhancement ring"; - engine->id = VECS; - engine->exec_id = I915_EXEC_VEBOX; - engine->hw_id = 3; - engine->mmio_base = VEBOX_RING_BASE; + struct drm_i915_private *dev_priv = engine->i915; intel_ring_default_vfuncs(dev_priv, engine); - engine->flush = gen6_ring_flush; + engine->emit_flush = gen6_ring_flush; - if (INTEL_GEN(dev_priv) >= 8) { - engine->irq_enable_mask = - GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; - } else { + if (INTEL_GEN(dev_priv) < 8) { engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; engine->irq_enable = hsw_vebox_irq_enable; engine->irq_disable = hsw_vebox_irq_disable; } - return intel_init_ring_buffer(dev, engine); -} - -int -intel_ring_flush_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - int ret; - - if (!engine->gpu_caches_dirty) - return 0; - - ret = engine->flush(req, 0, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; - - trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS); - - engine->gpu_caches_dirty = false; - return 0; -} - -int -intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - uint32_t flush_domains; - int ret; - - flush_domains = 0; - if (engine->gpu_caches_dirty) - flush_domains = I915_GEM_GPU_DOMAINS; - - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - if (ret) - return ret; - - trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - - engine->gpu_caches_dirty = false; - return 0; -} - -void -intel_stop_engine(struct intel_engine_cs *engine) -{ - int ret; - - if (!intel_engine_initialized(engine)) - return; - - ret = intel_engine_idle(engine); - if (ret) - DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", - engine->name, ret); - - stop_ring(engine); + return intel_init_ring_buffer(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 12cb7ed90014..ec0b4a0c605d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -3,6 +3,7 @@ #include <linux/hashtable.h> #include "i915_gem_batch_pool.h" +#include "i915_gem_request.h" #define I915_CMD_HASH_ORDER 9 @@ -25,29 +26,29 @@ */ #define I915_RING_FREE_SPACE 64 -struct intel_hw_status_page { - u32 *page_addr; - unsigned int gfx_addr; - struct drm_i915_gem_object *obj; +struct intel_hw_status_page { + struct i915_vma *vma; + u32 *page_addr; + u32 ggtt_offset; }; -#define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base)) -#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val) +#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) +#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) -#define I915_READ_START(ring) I915_READ(RING_START((ring)->mmio_base)) -#define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val) +#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base)) +#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val) -#define I915_READ_HEAD(ring) I915_READ(RING_HEAD((ring)->mmio_base)) -#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val) +#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base)) +#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val) -#define I915_READ_CTL(ring) I915_READ(RING_CTL((ring)->mmio_base)) -#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val) +#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base)) +#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val) -#define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base)) -#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val) +#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base)) +#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val) -#define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)->mmio_base)) -#define I915_WRITE_MODE(ring, val) I915_WRITE(RING_MI_MODE((ring)->mmio_base), val) +#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base)) +#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val) /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. @@ -56,13 +57,13 @@ struct intel_hw_status_page { #define GEN8_SEMAPHORE_OFFSET(__from, __to) \ (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size) #define GEN8_SIGNAL_OFFSET(__ring, to) \ - (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ + (dev_priv->semaphore->node.start + \ GEN8_SEMAPHORE_OFFSET((__ring)->id, (to))) #define GEN8_WAIT_OFFSET(__ring, from) \ - (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ + (dev_priv->semaphore->node.start + \ GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) -enum intel_ring_hangcheck_action { +enum intel_engine_hangcheck_action { HANGCHECK_IDLE = 0, HANGCHECK_WAIT, HANGCHECK_ACTIVE, @@ -72,23 +73,22 @@ enum intel_ring_hangcheck_action { #define HANGCHECK_SCORE_RING_HUNG 31 -struct intel_ring_hangcheck { +struct intel_engine_hangcheck { u64 acthd; - unsigned long user_interrupts; u32 seqno; int score; - enum intel_ring_hangcheck_action action; + enum intel_engine_hangcheck_action action; int deadlock; u32 instdone[I915_NUM_INSTDONE_REG]; }; -struct intel_ringbuffer { - struct drm_i915_gem_object *obj; - void __iomem *virtual_start; +struct intel_ring { struct i915_vma *vma; + void *vaddr; struct intel_engine_cs *engine; - struct list_head link; + + struct list_head request_list; u32 head; u32 tail; @@ -121,12 +121,12 @@ struct drm_i915_reg_table; * an option for future use. * size: size of the batch in DWORDS */ -struct i915_ctx_workarounds { +struct i915_ctx_workarounds { struct i915_wa_ctx_bb { u32 offset; u32 size; } indirect_ctx, per_ctx; - struct drm_i915_gem_object *obj; + struct i915_vma *vma; }; struct drm_i915_gem_request; @@ -144,11 +144,18 @@ struct intel_engine_cs { #define I915_NUM_ENGINES 5 #define _VCS(n) (VCS + (n)) unsigned int exec_id; - unsigned int hw_id; - unsigned int guc_id; /* XXX same as hw_id? */ + enum intel_engine_hw_id { + RCS_HW = 0, + VCS_HW, + BCS_HW, + VECS_HW, + VCS2_HW + } hw_id; + enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */ + u64 fence_context; u32 mmio_base; - struct intel_ringbuffer *buffer; - struct list_head buffers; + unsigned int irq_shift; + struct intel_ring *buffer; /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the @@ -167,8 +174,7 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - struct task_struct *irq_seqno_bh; /* bh for user interrupts */ - unsigned long irq_wakeups; + struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */ bool irq_posted; spinlock_t lock; /* protects the lists of requests */ @@ -178,6 +184,9 @@ struct intel_engine_cs { struct task_struct *signaler; /* used for fence signalling */ struct drm_i915_gem_request *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ + struct timer_list hangcheck; /* detect missed interrupts */ + + unsigned long timeout; bool irq_enabled : 1; bool rpm_wakelock : 1; @@ -192,36 +201,48 @@ struct intel_engine_cs { struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; + struct i915_vma *scratch; u32 irq_keep_mask; /* always keep these interrupts */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */ - void (*irq_enable)(struct intel_engine_cs *ring); - void (*irq_disable)(struct intel_engine_cs *ring); + void (*irq_enable)(struct intel_engine_cs *engine); + void (*irq_disable)(struct intel_engine_cs *engine); - int (*init_hw)(struct intel_engine_cs *ring); + int (*init_hw)(struct intel_engine_cs *engine); + void (*reset_hw)(struct intel_engine_cs *engine, + struct drm_i915_gem_request *req); int (*init_context)(struct drm_i915_gem_request *req); - void (*write_tail)(struct intel_engine_cs *ring, - u32 value); - int __must_check (*flush)(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains); - int (*add_request)(struct drm_i915_gem_request *req); + int (*emit_flush)(struct drm_i915_gem_request *request, + u32 mode); +#define EMIT_INVALIDATE BIT(0) +#define EMIT_FLUSH BIT(1) +#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) + int (*emit_bb_start)(struct drm_i915_gem_request *req, + u64 offset, u32 length, + unsigned int dispatch_flags); +#define I915_DISPATCH_SECURE BIT(0) +#define I915_DISPATCH_PINNED BIT(1) +#define I915_DISPATCH_RS BIT(2) + int (*emit_request)(struct drm_i915_gem_request *req); + + /* Pass the request to the hardware queue (e.g. directly into + * the legacy ringbuffer or to the end of an execlist). + * + * This is called from an atomic context with irqs disabled; must + * be irq safe. + */ + void (*submit_request)(struct drm_i915_gem_request *req); + /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. * However, the up-to-date seqno is not always required and the last * seen value is good enough. Note that the seqno will always be * monotonic, even if not coherent. */ - void (*irq_seqno_barrier)(struct intel_engine_cs *ring); - int (*dispatch_execbuffer)(struct drm_i915_gem_request *req, - u64 offset, u32 length, - unsigned dispatch_flags); -#define I915_DISPATCH_SECURE 0x1 -#define I915_DISPATCH_PINNED 0x2 -#define I915_DISPATCH_RS 0x4 - void (*cleanup)(struct intel_engine_cs *ring); + void (*irq_seqno_barrier)(struct intel_engine_cs *engine); + void (*cleanup)(struct intel_engine_cs *engine); /* GEN8 signal/wait table - never trust comments! * signal to signal to signal to signal to signal to @@ -264,51 +285,36 @@ struct intel_engine_cs { u32 sync_seqno[I915_NUM_ENGINES-1]; union { +#define GEN6_SEMAPHORE_LAST VECS_HW +#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) +#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) struct { /* our mbox written by others */ - u32 wait[I915_NUM_ENGINES]; + u32 wait[GEN6_NUM_SEMAPHORES]; /* mboxes this ring signals to */ - i915_reg_t signal[I915_NUM_ENGINES]; + i915_reg_t signal[GEN6_NUM_SEMAPHORES]; } mbox; u64 signal_ggtt[I915_NUM_ENGINES]; }; /* AKA wait() */ - int (*sync_to)(struct drm_i915_gem_request *to_req, - struct intel_engine_cs *from, - u32 seqno); - int (*signal)(struct drm_i915_gem_request *signaller_req, - /* num_dwords needed by caller */ - unsigned int num_dwords); + int (*sync_to)(struct drm_i915_gem_request *req, + struct drm_i915_gem_request *signal); + int (*signal)(struct drm_i915_gem_request *req); } semaphore; /* Execlists */ struct tasklet_struct irq_tasklet; spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */ + struct execlist_port { + struct drm_i915_gem_request *request; + unsigned int count; + } execlist_port[2]; struct list_head execlist_queue; unsigned int fw_domains; - unsigned int next_context_status_buffer; - unsigned int idle_lite_restore_wa; bool disable_lite_restore_wa; + bool preempt_wa; u32 ctx_desc_template; - int (*emit_request)(struct drm_i915_gem_request *request); - int (*emit_flush)(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 flush_domains); - int (*emit_bb_start)(struct drm_i915_gem_request *req, - u64 offset, unsigned dispatch_flags); - - /** - * List of objects currently involved in rendering from the - * ringbuffer. - * - * Includes buffers having the contents of their GPU caches - * flushed, not necessarily primitives. last_read_req - * represents when the rendering involved will be completed. - * - * A reference is held on the buffer while on this list. - */ - struct list_head active_list; /** * List of breadcrumbs associated with GPU requests currently @@ -322,23 +328,24 @@ struct intel_engine_cs { * inspecting request list. */ u32 last_submitted_seqno; + u32 last_pending_seqno; - bool gpu_caches_dirty; + /* An RCU guarded pointer to the last request. No reference is + * held to the request, users must carefully acquire a reference to + * the request using i915_gem_active_get_rcu(), or hold the + * struct_mutex. + */ + struct i915_gem_active last_request; struct i915_gem_context *last_context; - struct intel_ring_hangcheck hangcheck; - - struct { - struct drm_i915_gem_object *obj; - u32 gtt_offset; - } scratch; + struct intel_engine_hangcheck hangcheck; bool needs_cmd_parser; /* * Table of commands the command parser needs to know about - * for this ring. + * for this engine. */ DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); @@ -352,11 +359,11 @@ struct intel_engine_cs { * Returns the bitmask for the length field of the specified command. * Return 0 for an unrecognized/invalid command. * - * If the command parser finds an entry for a command in the ring's + * If the command parser finds an entry for a command in the engine's * cmd_tables, it gets the command's length based on the table entry. - * If not, it calls this function to determine the per-ring length field - * encoding for the command (i.e. certain opcode ranges use certain bits - * to encode the command length in the header). + * If not, it calls this function to determine the per-engine length + * field encoding for the command (i.e. different opcode ranges use + * certain bits to encode the command length in the header). */ u32 (*get_cmd_length_mask)(u32 cmd_header); }; @@ -374,8 +381,8 @@ intel_engine_flag(const struct intel_engine_cs *engine) } static inline u32 -intel_ring_sync_index(struct intel_engine_cs *engine, - struct intel_engine_cs *other) +intel_engine_sync_index(struct intel_engine_cs *engine, + struct intel_engine_cs *other) { int idx; @@ -437,55 +444,76 @@ intel_write_status_page(struct intel_engine_cs *engine, #define I915_GEM_HWS_SCRATCH_INDEX 0x40 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -struct intel_ringbuffer * -intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size); -int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, - struct intel_ringbuffer *ringbuf); -void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf); -void intel_ringbuffer_free(struct intel_ringbuffer *ring); +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size); +int intel_ring_pin(struct intel_ring *ring); +void intel_ring_unpin(struct intel_ring *ring); +void intel_ring_free(struct intel_ring *ring); + +void intel_engine_stop(struct intel_engine_cs *engine); +void intel_engine_cleanup(struct intel_engine_cs *engine); -void intel_stop_engine(struct intel_engine_cs *engine); -void intel_cleanup_engine(struct intel_engine_cs *engine); +void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request); int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -static inline void intel_ring_emit(struct intel_engine_cs *engine, - u32 data) + +static inline void intel_ring_emit(struct intel_ring *ring, u32 data) { - struct intel_ringbuffer *ringbuf = engine->buffer; - iowrite32(data, ringbuf->virtual_start + ringbuf->tail); - ringbuf->tail += 4; + *(uint32_t *)(ring->vaddr + ring->tail) = data; + ring->tail += 4; } -static inline void intel_ring_emit_reg(struct intel_engine_cs *engine, - i915_reg_t reg) + +static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg) { - intel_ring_emit(engine, i915_mmio_reg_offset(reg)); + intel_ring_emit(ring, i915_mmio_reg_offset(reg)); } -static inline void intel_ring_advance(struct intel_engine_cs *engine) + +static inline void intel_ring_advance(struct intel_ring *ring) { - struct intel_ringbuffer *ringbuf = engine->buffer; - ringbuf->tail &= ringbuf->size - 1; + /* Dummy function. + * + * This serves as a placeholder in the code so that the reader + * can compare against the preceding intel_ring_begin() and + * check that the number of dwords emitted matches the space + * reserved for the command packet (i.e. the value passed to + * intel_ring_begin()). + */ +} + +static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value) +{ + /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ + return value & (ring->size - 1); } + int __intel_ring_space(int head, int tail, int size); -void intel_ring_update_space(struct intel_ringbuffer *ringbuf); +void intel_ring_update_space(struct intel_ring *ring); -int __must_check intel_engine_idle(struct intel_engine_cs *engine); -void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno); -int intel_ring_flush_all_caches(struct drm_i915_gem_request *req); -int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req); +void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); -int intel_init_pipe_control(struct intel_engine_cs *engine, int size); -void intel_fini_pipe_control(struct intel_engine_cs *engine); +void intel_engine_setup_common(struct intel_engine_cs *engine); +int intel_engine_init_common(struct intel_engine_cs *engine); +int intel_engine_create_scratch(struct intel_engine_cs *engine, int size); +void intel_engine_cleanup_common(struct intel_engine_cs *engine); -int intel_init_render_ring_buffer(struct drm_device *dev); -int intel_init_bsd_ring_buffer(struct drm_device *dev); -int intel_init_bsd2_ring_buffer(struct drm_device *dev); -int intel_init_blt_ring_buffer(struct drm_device *dev); -int intel_init_vebox_ring_buffer(struct drm_device *dev); +static inline int intel_engine_idle(struct intel_engine_cs *engine, + unsigned int flags) +{ + /* Wait upon the last request to be completed */ + return i915_gem_active_wait_unlocked(&engine->last_request, + flags, NULL, NULL); +} + +int intel_init_render_ring_buffer(struct intel_engine_cs *engine); +int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); +int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine); +int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); +int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); -u64 intel_ring_get_active_head(struct intel_engine_cs *engine); +u64 intel_engine_get_active_head(struct intel_engine_cs *engine); static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) { return intel_read_status_page(engine, I915_GEM_HWS_INDEX); @@ -493,11 +521,6 @@ static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) int init_workarounds_ring(struct intel_engine_cs *engine); -static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf) -{ - return ringbuf->tail; -} - /* * Arbitrary size for largest possible 'add request' sequence. The code paths * are complex and variable. Empirical measurement shows that the worst case @@ -509,21 +532,10 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf) static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) { - return engine->status_page.gfx_addr + I915_GEM_HWS_INDEX_ADDR; + return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; } /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ -struct intel_wait { - struct rb_node node; - struct task_struct *tsk; - u32 seqno; -}; - -struct intel_signal_node { - struct rb_node node; - struct intel_wait wait; -}; - int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) @@ -543,31 +555,43 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_enable_signaling(struct drm_i915_gem_request *request); -static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine) +static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { - return READ_ONCE(engine->breadcrumbs.irq_seqno_bh); + return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh); } -static inline bool intel_engine_wakeup(struct intel_engine_cs *engine) +static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) { bool wakeup = false; - struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh); + /* Note that for this not to dangerously chase a dangling pointer, - * the caller is responsible for ensure that the task remain valid for - * wake_up_process() i.e. that the RCU grace period cannot expire. + * we must hold the rcu_read_lock here. * * Also note that tsk is likely to be in !TASK_RUNNING state so an * early test for tsk->state != TASK_RUNNING before wake_up_process() * is unlikely to be beneficial. */ - if (tsk) - wakeup = wake_up_process(tsk); + if (intel_engine_has_waiter(engine)) { + struct task_struct *tsk; + + rcu_read_lock(); + tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); + if (tsk) + wakeup = wake_up_process(tsk); + rcu_read_unlock(); + } + return wakeup; } -void intel_engine_enable_fake_irq(struct intel_engine_cs *engine); +void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); unsigned int intel_kick_waiters(struct drm_i915_private *i915); unsigned int intel_kick_signalers(struct drm_i915_private *i915); +static inline bool intel_engine_is_active(struct intel_engine_cs *engine) +{ + return i915_gem_active_isset(&engine->last_request); +} + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 1c603bbe5784..6c11168facd6 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -287,6 +287,7 @@ void intel_display_set_init_power(struct drm_i915_private *dev_priv, */ static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv) { + struct pci_dev *pdev = dev_priv->drm.pdev; struct drm_device *dev = &dev_priv->drm; /* @@ -299,9 +300,9 @@ static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv) * sure vgacon can keep working normally without triggering interrupts * and error messages. */ - vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO); + vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); outb(inb(VGA_MSR_READ), VGA_MSR_WRITE); - vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); + vga_put(pdev, VGA_RSRC_LEGACY_IO); if (IS_BROADWELL(dev)) gen8_irq_power_well_post_enable(dev_priv, @@ -318,7 +319,7 @@ static void hsw_power_well_pre_disable(struct drm_i915_private *dev_priv) static void skl_power_well_post_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - struct drm_device *dev = &dev_priv->drm; + struct pci_dev *pdev = dev_priv->drm.pdev; /* * After we re-enable the power well, if we touch VGA register 0x3d5 @@ -331,9 +332,9 @@ static void skl_power_well_post_enable(struct drm_i915_private *dev_priv, * and error messages. */ if (power_well->data == SKL_DISP_PW_2) { - vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO); + vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); outb(inb(VGA_MSR_READ), VGA_MSR_WRITE); - vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); + vga_put(pdev, VGA_RSRC_LEGACY_IO); gen8_irq_power_well_post_enable(dev_priv, 1 << PIPE_C | 1 << PIPE_B); @@ -592,6 +593,8 @@ void bxt_disable_dc9(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Disabling DC9\n"); gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + + intel_pps_unlock_regs_wa(dev_priv); } static void assert_csr_loaded(struct drm_i915_private *dev_priv) @@ -854,7 +857,7 @@ static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { enum skl_disp_power_wells power_well_id = power_well->data; - struct i915_power_well *cmn_a_well; + struct i915_power_well *cmn_a_well = NULL; if (power_well_id == BXT_DPIO_CMN_BC) { /* @@ -867,7 +870,7 @@ static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, bxt_ddi_phy_init(dev_priv, bxt_power_well_to_phy(power_well)); - if (power_well_id == BXT_DPIO_CMN_BC) + if (cmn_a_well) intel_power_well_put(dev_priv, cmn_a_well); } @@ -1121,6 +1124,8 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv) } i915_redisable_vga_power_on(&dev_priv->drm); + + intel_pps_unlock_regs_wa(dev_priv); } static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv) @@ -2284,7 +2289,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) */ void intel_power_domains_fini(struct drm_i915_private *dev_priv) { - struct device *device = &dev_priv->drm.pdev->dev; + struct device *kdev = &dev_priv->drm.pdev->dev; /* * The i915.ko module is still not prepared to be loaded when @@ -2306,7 +2311,7 @@ void intel_power_domains_fini(struct drm_i915_private *dev_priv) * the platform doesn't support runtime PM. */ if (!HAS_RUNTIME_PM(dev_priv)) - pm_runtime_put(device); + pm_runtime_put(kdev); } static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) @@ -2647,10 +2652,10 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv) */ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) { - struct drm_device *dev = &dev_priv->drm; - struct device *device = &dev->pdev->dev; + struct pci_dev *pdev = dev_priv->drm.pdev; + struct device *kdev = &pdev->dev; - pm_runtime_get_sync(device); + pm_runtime_get_sync(kdev); atomic_inc(&dev_priv->pm.wakeref_count); assert_rpm_wakelock_held(dev_priv); @@ -2668,11 +2673,11 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) */ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) { - struct drm_device *dev = &dev_priv->drm; - struct device *device = &dev->pdev->dev; + struct pci_dev *pdev = dev_priv->drm.pdev; + struct device *kdev = &pdev->dev; if (IS_ENABLED(CONFIG_PM)) { - int ret = pm_runtime_get_if_in_use(device); + int ret = pm_runtime_get_if_in_use(kdev); /* * In cases runtime PM is disabled by the RPM core and we get @@ -2710,11 +2715,11 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) */ void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv) { - struct drm_device *dev = &dev_priv->drm; - struct device *device = &dev->pdev->dev; + struct pci_dev *pdev = dev_priv->drm.pdev; + struct device *kdev = &pdev->dev; assert_rpm_wakelock_held(dev_priv); - pm_runtime_get_noresume(device); + pm_runtime_get_noresume(kdev); atomic_inc(&dev_priv->pm.wakeref_count); } @@ -2729,15 +2734,15 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv) */ void intel_runtime_pm_put(struct drm_i915_private *dev_priv) { - struct drm_device *dev = &dev_priv->drm; - struct device *device = &dev->pdev->dev; + struct pci_dev *pdev = dev_priv->drm.pdev; + struct device *kdev = &pdev->dev; assert_rpm_wakelock_held(dev_priv); if (atomic_dec_and_test(&dev_priv->pm.wakeref_count)) atomic_inc(&dev_priv->pm.atomic_seq); - pm_runtime_mark_last_busy(device); - pm_runtime_put_autosuspend(device); + pm_runtime_mark_last_busy(kdev); + pm_runtime_put_autosuspend(kdev); } /** @@ -2752,11 +2757,12 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv) */ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv) { + struct pci_dev *pdev = dev_priv->drm.pdev; struct drm_device *dev = &dev_priv->drm; - struct device *device = &dev->pdev->dev; + struct device *kdev = &pdev->dev; - pm_runtime_set_autosuspend_delay(device, 10000); /* 10s */ - pm_runtime_mark_last_busy(device); + pm_runtime_set_autosuspend_delay(kdev, 10000); /* 10s */ + pm_runtime_mark_last_busy(kdev); /* * Take a permanent reference to disable the RPM functionality and drop @@ -2765,10 +2771,10 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv) * platforms without RPM support. */ if (!HAS_RUNTIME_PM(dev)) { - pm_runtime_dont_use_autosuspend(device); - pm_runtime_get_sync(device); + pm_runtime_dont_use_autosuspend(kdev); + pm_runtime_get_sync(kdev); } else { - pm_runtime_use_autosuspend(device); + pm_runtime_use_autosuspend(kdev); } /* @@ -2776,6 +2782,5 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv) * We drop that here and will reacquire it during unloading in * intel_power_domains_fini(). */ - pm_runtime_put_autosuspend(device); + pm_runtime_put_autosuspend(kdev); } - diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index e378f35365a2..c551024d4871 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1003,24 +1003,22 @@ static bool intel_sdvo_write_infoframe(struct intel_sdvo *intel_sdvo, } static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo, - const struct drm_display_mode *adjusted_mode) + struct intel_crtc_state *pipe_config) { uint8_t sdvo_data[HDMI_INFOFRAME_SIZE(AVI)]; - struct drm_crtc *crtc = intel_sdvo->base.base.crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); union hdmi_infoframe frame; int ret; ssize_t len; ret = drm_hdmi_avi_infoframe_from_display_mode(&frame.avi, - adjusted_mode); + &pipe_config->base.adjusted_mode); if (ret < 0) { DRM_ERROR("couldn't fill AVI infoframe\n"); return false; } if (intel_sdvo->rgb_quant_range_selectable) { - if (intel_crtc->config->limited_color_range) + if (pipe_config->limited_color_range) frame.avi.quantization_range = HDMI_QUANTIZATION_RANGE_LIMITED; else @@ -1125,7 +1123,8 @@ static void i9xx_adjust_sdvo_tv_clock(struct intel_crtc_state *pipe_config) } static bool intel_sdvo_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_sdvo *intel_sdvo = to_sdvo(encoder); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; @@ -1192,22 +1191,21 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, return true; } -static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder) +static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder, + struct intel_crtc_state *crtc_state, + struct drm_connector_state *conn_state) { struct drm_device *dev = intel_encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(intel_encoder->base.crtc); - const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; - struct drm_display_mode *mode = &crtc->config->base.mode; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + struct drm_display_mode *mode = &crtc_state->base.mode; struct intel_sdvo *intel_sdvo = to_sdvo(intel_encoder); u32 sdvox; struct intel_sdvo_in_out_map in_out; struct intel_sdvo_dtd input_dtd, output_dtd; int rate; - if (!mode) - return; - /* First, set the input mapping for the first input to our controlled * output. This is only correct if we're a single-input device, in * which case the first input is the output from the appropriate SDVO @@ -1240,11 +1238,11 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder) if (!intel_sdvo_set_target_input(intel_sdvo)) return; - if (crtc->config->has_hdmi_sink) { + if (crtc_state->has_hdmi_sink) { intel_sdvo_set_encode(intel_sdvo, SDVO_ENCODE_HDMI); intel_sdvo_set_colorimetry(intel_sdvo, SDVO_COLORIMETRY_RGB256); - intel_sdvo_set_avi_infoframe(intel_sdvo, adjusted_mode); + intel_sdvo_set_avi_infoframe(intel_sdvo, crtc_state); } else intel_sdvo_set_encode(intel_sdvo, SDVO_ENCODE_DVI); @@ -1260,7 +1258,7 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder) DRM_INFO("Setting input timings on %s failed\n", SDVO_NAME(intel_sdvo)); - switch (crtc->config->pixel_multiplier) { + switch (crtc_state->pixel_multiplier) { default: WARN(1, "unknown pixel multiplier specified\n"); case 1: rate = SDVO_CLOCK_RATE_MULT_1X; break; @@ -1275,7 +1273,7 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder) /* The real mode polarity is set by the SDVO commands, using * struct intel_sdvo_dtd. */ sdvox = SDVO_VSYNC_ACTIVE_HIGH | SDVO_HSYNC_ACTIVE_HIGH; - if (!HAS_PCH_SPLIT(dev) && crtc->config->limited_color_range) + if (!HAS_PCH_SPLIT(dev) && crtc_state->limited_color_range) sdvox |= HDMI_COLOR_RANGE_16_235; if (INTEL_INFO(dev)->gen < 5) sdvox |= SDVO_BORDER_ENABLE; @@ -1301,7 +1299,7 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder) } else if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) { /* done in crtc_mode_set as it lives inside the dpll register */ } else { - sdvox |= (crtc->config->pixel_multiplier - 1) + sdvox |= (crtc_state->pixel_multiplier - 1) << SDVO_PORT_MULTIPLY_SHIFT; } @@ -1434,7 +1432,9 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder, pipe_config->pixel_multiplier, encoder_pixel_multiplier); } -static void intel_disable_sdvo(struct intel_encoder *encoder) +static void intel_disable_sdvo(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_sdvo *intel_sdvo = to_sdvo(encoder); @@ -1477,16 +1477,22 @@ static void intel_disable_sdvo(struct intel_encoder *encoder) } } -static void pch_disable_sdvo(struct intel_encoder *encoder) +static void pch_disable_sdvo(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { } -static void pch_post_disable_sdvo(struct intel_encoder *encoder) +static void pch_post_disable_sdvo(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { - intel_disable_sdvo(encoder); + intel_disable_sdvo(encoder, old_crtc_state, old_conn_state); } -static void intel_enable_sdvo(struct intel_encoder *encoder) +static void intel_enable_sdvo(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -2930,10 +2936,12 @@ static bool intel_sdvo_init_ddc_proxy(struct intel_sdvo *sdvo, struct drm_device *dev) { + struct pci_dev *pdev = dev->pdev; + sdvo->ddc.owner = THIS_MODULE; sdvo->ddc.class = I2C_CLASS_DDC; snprintf(sdvo->ddc.name, I2C_NAME_SIZE, "SDVO DDC proxy"); - sdvo->ddc.dev.parent = &dev->pdev->dev; + sdvo->ddc.dev.parent = &pdev->dev; sdvo->ddc.algo_data = sdvo; sdvo->ddc.algo = &intel_sdvo_ddc_proxy; diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 7c08e4f29032..73a521fdf1bd 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -36,6 +36,7 @@ #include <drm/drm_atomic.h> #include <drm/drm_plane_helper.h> #include "intel_drv.h" +#include "intel_frontbuffer.h" #include <drm/i915_drm.h> #include "i915_drv.h" @@ -202,23 +203,24 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = to_intel_plane(drm_plane); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); + const struct skl_wm_values *wm = &dev_priv->wm.skl_results; + struct drm_crtc *crtc = crtc_state->base.crtc; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); const int pipe = intel_plane->pipe; const int plane = intel_plane->plane + 1; - u32 plane_ctl, stride_div, stride; + u32 plane_ctl; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - u32 surf_addr; - u32 tile_height, plane_offset, plane_size; + u32 surf_addr = plane_state->main.offset; unsigned int rotation = plane_state->base.rotation; - int x_offset, y_offset; - int crtc_x = plane_state->dst.x1; - int crtc_y = plane_state->dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->dst); - uint32_t crtc_h = drm_rect_height(&plane_state->dst); - uint32_t x = plane_state->src.x1 >> 16; - uint32_t y = plane_state->src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->src) >> 16; + u32 stride = skl_plane_stride(fb, 0, rotation); + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->main.x; + uint32_t y = plane_state->main.y; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; plane_ctl = PLANE_CTL_ENABLE | PLANE_CTL_PIPE_GAMMA_ENABLE | @@ -229,14 +231,8 @@ skl_update_plane(struct drm_plane *drm_plane, plane_ctl |= skl_plane_ctl_rotation(rotation); - stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0], - fb->pixel_format); - - /* Sizes are 0 based */ - src_w--; - src_h--; - crtc_w--; - crtc_h--; + if (wm->dirty_pipes & drm_crtc_mask(crtc)) + skl_write_plane_wm(intel_crtc, wm, plane); if (key->flags) { I915_WRITE(PLANE_KEYVAL(pipe, plane), key->min_value); @@ -249,28 +245,15 @@ skl_update_plane(struct drm_plane *drm_plane, else if (key->flags & I915_SET_COLORKEY_SOURCE) plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE; - surf_addr = intel_plane_obj_offset(intel_plane, obj, 0); - - if (intel_rotation_90_or_270(rotation)) { - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); - - /* stride: Surface height in tiles */ - tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp); - stride = DIV_ROUND_UP(fb->height, tile_height); - plane_size = (src_w << 16) | src_h; - x_offset = stride * tile_height - y - (src_h + 1); - y_offset = x; - } else { - stride = fb->pitches[0] / stride_div; - plane_size = (src_h << 16) | src_w; - x_offset = x; - y_offset = y; - } - plane_offset = y_offset << 16 | x_offset; + /* Sizes are 0 based */ + src_w--; + src_h--; + crtc_w--; + crtc_h--; - I915_WRITE(PLANE_OFFSET(pipe, plane), plane_offset); + I915_WRITE(PLANE_OFFSET(pipe, plane), (y << 16) | x); I915_WRITE(PLANE_STRIDE(pipe, plane), stride); - I915_WRITE(PLANE_SIZE(pipe, plane), plane_size); + I915_WRITE(PLANE_SIZE(pipe, plane), (src_h << 16) | src_w); /* program plane scaler */ if (plane_state->scaler_id >= 0) { @@ -295,7 +278,8 @@ skl_update_plane(struct drm_plane *drm_plane, } I915_WRITE(PLANE_CTL(pipe, plane), plane_ctl); - I915_WRITE(PLANE_SURF(pipe, plane), surf_addr); + I915_WRITE(PLANE_SURF(pipe, plane), + intel_fb_gtt_offset(fb, rotation) + surf_addr); POSTING_READ(PLANE_SURF(pipe, plane)); } @@ -308,6 +292,14 @@ skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc) const int pipe = intel_plane->pipe; const int plane = intel_plane->plane + 1; + /* + * We only populate skl_results on watermark updates, and if the + * plane's visiblity isn't actually changing neither is its watermarks. + */ + if (!dplane->state->visible) + skl_write_plane_wm(to_intel_crtc(crtc), + &dev_priv->wm.skl_results, plane); + I915_WRITE(PLANE_CTL(pipe, plane), 0); I915_WRITE(PLANE_SURF(pipe, plane), 0); @@ -362,22 +354,20 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = to_intel_plane(dplane); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); int pipe = intel_plane->pipe; int plane = intel_plane->plane; u32 sprctl; u32 sprsurf_offset, linear_offset; unsigned int rotation = dplane->state->rotation; - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->dst.x1; - int crtc_y = plane_state->dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->dst); - uint32_t crtc_h = drm_rect_height(&plane_state->dst); - uint32_t x = plane_state->src.x1 >> 16; - uint32_t y = plane_state->src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->src) >> 16; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; sprctl = SP_ENABLE; @@ -430,7 +420,7 @@ vlv_update_plane(struct drm_plane *dplane, */ sprctl |= SP_GAMMA_ENABLE; - if (obj->tiling_mode != I915_TILING_NONE) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) sprctl |= SP_TILED; /* Sizes are 0 based */ @@ -439,19 +429,18 @@ vlv_update_plane(struct drm_plane *dplane, crtc_w--; crtc_h--; - linear_offset = y * fb->pitches[0] + x * cpp; - sprsurf_offset = intel_compute_tile_offset(&x, &y, fb, 0, - fb->pitches[0], rotation); - linear_offset -= sprsurf_offset; + intel_add_fb_offsets(&x, &y, plane_state, 0); + sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { sprctl |= SP_ROTATE_180; x += src_w; y += src_h; - linear_offset += src_h * fb->pitches[0] + src_w * cpp; } + linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + if (key->flags) { I915_WRITE(SPKEYMINVAL(pipe, plane), key->min_value); I915_WRITE(SPKEYMAXVAL(pipe, plane), key->max_value); @@ -467,7 +456,7 @@ vlv_update_plane(struct drm_plane *dplane, I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]); I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x); - if (obj->tiling_mode != I915_TILING_NONE) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) I915_WRITE(SPTILEOFF(pipe, plane), (y << 16) | x); else I915_WRITE(SPLINOFF(pipe, plane), linear_offset); @@ -476,8 +465,8 @@ vlv_update_plane(struct drm_plane *dplane, I915_WRITE(SPSIZE(pipe, plane), (crtc_h << 16) | crtc_w); I915_WRITE(SPCNTR(pipe, plane), sprctl); - I915_WRITE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) + - sprsurf_offset); + I915_WRITE(SPSURF(pipe, plane), + intel_fb_gtt_offset(fb, rotation) + sprsurf_offset); POSTING_READ(SPSURF(pipe, plane)); } @@ -505,21 +494,19 @@ ivb_update_plane(struct drm_plane *plane, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); enum pipe pipe = intel_plane->pipe; u32 sprctl, sprscale = 0; u32 sprsurf_offset, linear_offset; unsigned int rotation = plane_state->base.rotation; - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->dst.x1; - int crtc_y = plane_state->dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->dst); - uint32_t crtc_h = drm_rect_height(&plane_state->dst); - uint32_t x = plane_state->src.x1 >> 16; - uint32_t y = plane_state->src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->src) >> 16; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; sprctl = SPRITE_ENABLE; @@ -552,7 +539,7 @@ ivb_update_plane(struct drm_plane *plane, */ sprctl |= SPRITE_GAMMA_ENABLE; - if (obj->tiling_mode != I915_TILING_NONE) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) sprctl |= SPRITE_TILED; if (IS_HASWELL(dev) || IS_BROADWELL(dev)) @@ -572,22 +559,21 @@ ivb_update_plane(struct drm_plane *plane, if (crtc_w != src_w || crtc_h != src_h) sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h; - linear_offset = y * fb->pitches[0] + x * cpp; - sprsurf_offset = intel_compute_tile_offset(&x, &y, fb, 0, - fb->pitches[0], rotation); - linear_offset -= sprsurf_offset; + intel_add_fb_offsets(&x, &y, plane_state, 0); + sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { sprctl |= SPRITE_ROTATE_180; /* HSW and BDW does this automagically in hardware */ if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) { x += src_w; y += src_h; - linear_offset += src_h * fb->pitches[0] + src_w * cpp; } } + linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + if (key->flags) { I915_WRITE(SPRKEYVAL(pipe), key->min_value); I915_WRITE(SPRKEYMAX(pipe), key->max_value); @@ -606,7 +592,7 @@ ivb_update_plane(struct drm_plane *plane, * register */ if (IS_HASWELL(dev) || IS_BROADWELL(dev)) I915_WRITE(SPROFFSET(pipe), (y << 16) | x); - else if (obj->tiling_mode != I915_TILING_NONE) + else if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x); else I915_WRITE(SPRLINOFF(pipe), linear_offset); @@ -616,7 +602,7 @@ ivb_update_plane(struct drm_plane *plane, I915_WRITE(SPRSCALE(pipe), sprscale); I915_WRITE(SPRCTL(pipe), sprctl); I915_WRITE(SPRSURF(pipe), - i915_gem_obj_ggtt_offset(obj) + sprsurf_offset); + intel_fb_gtt_offset(fb, rotation) + sprsurf_offset); POSTING_READ(SPRSURF(pipe)); } @@ -646,21 +632,19 @@ ilk_update_plane(struct drm_plane *plane, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); int pipe = intel_plane->pipe; u32 dvscntr, dvsscale; u32 dvssurf_offset, linear_offset; unsigned int rotation = plane_state->base.rotation; - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->dst.x1; - int crtc_y = plane_state->dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->dst); - uint32_t crtc_h = drm_rect_height(&plane_state->dst); - uint32_t x = plane_state->src.x1 >> 16; - uint32_t y = plane_state->src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->src) >> 16; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; dvscntr = DVS_ENABLE; @@ -693,7 +677,7 @@ ilk_update_plane(struct drm_plane *plane, */ dvscntr |= DVS_GAMMA_ENABLE; - if (obj->tiling_mode != I915_TILING_NONE) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) dvscntr |= DVS_TILED; if (IS_GEN6(dev)) @@ -709,19 +693,18 @@ ilk_update_plane(struct drm_plane *plane, if (crtc_w != src_w || crtc_h != src_h) dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h; - linear_offset = y * fb->pitches[0] + x * cpp; - dvssurf_offset = intel_compute_tile_offset(&x, &y, fb, 0, - fb->pitches[0], rotation); - linear_offset -= dvssurf_offset; + intel_add_fb_offsets(&x, &y, plane_state, 0); + dvssurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { dvscntr |= DVS_ROTATE_180; x += src_w; y += src_h; - linear_offset += src_h * fb->pitches[0] + src_w * cpp; } + linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + if (key->flags) { I915_WRITE(DVSKEYVAL(pipe), key->min_value); I915_WRITE(DVSKEYMAX(pipe), key->max_value); @@ -736,7 +719,7 @@ ilk_update_plane(struct drm_plane *plane, I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]); I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x); - if (obj->tiling_mode != I915_TILING_NONE) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x); else I915_WRITE(DVSLINOFF(pipe), linear_offset); @@ -745,7 +728,7 @@ ilk_update_plane(struct drm_plane *plane, I915_WRITE(DVSSCALE(pipe), dvsscale); I915_WRITE(DVSCNTR(pipe), dvscntr); I915_WRITE(DVSSURF(pipe), - i915_gem_obj_ggtt_offset(obj) + dvssurf_offset); + intel_fb_gtt_offset(fb, rotation) + dvssurf_offset); POSTING_READ(DVSSURF(pipe)); } @@ -778,15 +761,26 @@ intel_check_sprite_plane(struct drm_plane *plane, int crtc_x, crtc_y; unsigned int crtc_w, crtc_h; uint32_t src_x, src_y, src_w, src_h; - struct drm_rect *src = &state->src; - struct drm_rect *dst = &state->dst; + struct drm_rect *src = &state->base.src; + struct drm_rect *dst = &state->base.dst; const struct drm_rect *clip = &state->clip; int hscale, vscale; int max_scale, min_scale; bool can_scale; + int ret; + + src->x1 = state->base.src_x; + src->y1 = state->base.src_y; + src->x2 = state->base.src_x + state->base.src_w; + src->y2 = state->base.src_y + state->base.src_h; + + dst->x1 = state->base.crtc_x; + dst->y1 = state->base.crtc_y; + dst->x2 = state->base.crtc_x + state->base.crtc_w; + dst->y2 = state->base.crtc_y + state->base.crtc_h; if (!fb) { - state->visible = false; + state->base.visible = false; return 0; } @@ -834,14 +828,14 @@ intel_check_sprite_plane(struct drm_plane *plane, vscale = drm_rect_calc_vscale_relaxed(src, dst, min_scale, max_scale); BUG_ON(vscale < 0); - state->visible = drm_rect_clip_scaled(src, dst, clip, hscale, vscale); + state->base.visible = drm_rect_clip_scaled(src, dst, clip, hscale, vscale); crtc_x = dst->x1; crtc_y = dst->y1; crtc_w = drm_rect_width(dst); crtc_h = drm_rect_height(dst); - if (state->visible) { + if (state->base.visible) { /* check again in case clipping clamped the results */ hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale); if (hscale < 0) { @@ -898,12 +892,12 @@ intel_check_sprite_plane(struct drm_plane *plane, crtc_w &= ~1; if (crtc_w == 0) - state->visible = false; + state->base.visible = false; } } /* Check size restrictions when scaling */ - if (state->visible && (src_w != crtc_w || src_h != crtc_h)) { + if (state->base.visible && (src_w != crtc_w || src_h != crtc_h)) { unsigned int width_bytes; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); @@ -912,10 +906,10 @@ intel_check_sprite_plane(struct drm_plane *plane, /* FIXME interlacing min height is 6 */ if (crtc_w < 3 || crtc_h < 3) - state->visible = false; + state->base.visible = false; if (src_w < 3 || src_h < 3) - state->visible = false; + state->base.visible = false; width_bytes = ((src_x * cpp) & 63) + src_w * cpp; @@ -926,7 +920,7 @@ intel_check_sprite_plane(struct drm_plane *plane, } } - if (state->visible) { + if (state->base.visible) { src->x1 = src_x << 16; src->x2 = (src_x + src_w) << 16; src->y1 = src_y << 16; @@ -938,6 +932,12 @@ intel_check_sprite_plane(struct drm_plane *plane, dst->y1 = crtc_y; dst->y2 = crtc_y + crtc_h; + if (INTEL_GEN(dev) >= 9) { + ret = skl_check_plane_surface(state); + if (ret) + return ret; + } + return 0; } diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 49136ad5473e..d960e4866595 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -838,7 +838,9 @@ intel_tv_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) } static void -intel_enable_tv(struct intel_encoder *encoder) +intel_enable_tv(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -851,7 +853,9 @@ intel_enable_tv(struct intel_encoder *encoder) } static void -intel_disable_tv(struct intel_encoder *encoder) +intel_disable_tv(struct intel_encoder *encoder, + struct intel_crtc_state *old_crtc_state, + struct drm_connector_state *old_conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -908,7 +912,8 @@ intel_tv_get_config(struct intel_encoder *encoder, static bool intel_tv_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_tv *intel_tv = enc_to_tv(encoder); const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); @@ -1010,7 +1015,9 @@ static void set_color_conversion(struct drm_i915_private *dev_priv, color_conversion->av); } -static void intel_tv_pre_enable(struct intel_encoder *encoder) +static void intel_tv_pre_enable(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index ff80a81b1a84..ee2306a79747 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -435,7 +435,7 @@ void intel_uncore_sanitize(struct drm_i915_private *dev_priv) i915.enable_rc6 = sanitize_rc6_option(dev_priv, i915.enable_rc6); /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_disable_gt_powersave(dev_priv); + intel_sanitize_gt_powersave(dev_priv); } static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, @@ -796,10 +796,9 @@ __unclaimed_reg_debug(struct drm_i915_private *dev_priv, const bool read, const bool before) { - if (WARN(check_for_unclaimed_mmio(dev_priv), - "Unclaimed register detected %s %s register 0x%x\n", - before ? "before" : "after", - read ? "reading" : "writing to", + if (WARN(check_for_unclaimed_mmio(dev_priv) && !before, + "Unclaimed %s register 0x%x\n", + read ? "read from" : "write to", i915_mmio_reg_offset(reg))) i915.mmio_debug--; /* Only report the first N failures */ } @@ -1018,11 +1017,9 @@ gen5_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool __gen5_write(8) __gen5_write(16) __gen5_write(32) -__gen5_write(64) __gen2_write(8) __gen2_write(16) __gen2_write(32) -__gen2_write(64) #undef __gen5_write #undef __gen2_write @@ -1112,23 +1109,18 @@ gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \ __gen9_write(8) __gen9_write(16) __gen9_write(32) -__gen9_write(64) __chv_write(8) __chv_write(16) __chv_write(32) -__chv_write(64) __gen8_write(8) __gen8_write(16) __gen8_write(32) -__gen8_write(64) __hsw_write(8) __hsw_write(16) __hsw_write(32) -__hsw_write(64) __gen6_write(8) __gen6_write(16) __gen6_write(32) -__gen6_write(64) #undef __gen9_write #undef __chv_write @@ -1158,7 +1150,6 @@ static void vgpu_write##x(struct drm_i915_private *dev_priv, \ __vgpu_write(8) __vgpu_write(16) __vgpu_write(32) -__vgpu_write(64) #undef __vgpu_write #undef VGPU_WRITE_FOOTER @@ -1169,7 +1160,6 @@ do { \ dev_priv->uncore.funcs.mmio_writeb = x##_write8; \ dev_priv->uncore.funcs.mmio_writew = x##_write16; \ dev_priv->uncore.funcs.mmio_writel = x##_write32; \ - dev_priv->uncore.funcs.mmio_writeq = x##_write64; \ } while (0) #define ASSIGN_READ_MMIO_VFUNCS(x) \ @@ -1597,8 +1587,10 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, if (engine_mask == ALL_ENGINES) { hw_mask = GEN6_GRDOM_FULL; } else { + unsigned int tmp; + hw_mask = 0; - for_each_engine_masked(engine, dev_priv, engine_mask) + for_each_engine_masked(engine, dev_priv, engine_mask, tmp) hw_mask |= hw_engine_mask[engine->id]; } @@ -1618,8 +1610,10 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, * @timeout_ms: timeout in millisecond * * This routine waits until the target register @reg contains the expected - * @value after applying the @mask, i.e. it waits until - * (I915_READ_FW(@reg) & @mask) == @value + * @value after applying the @mask, i.e. it waits until :: + * + * (I915_READ_FW(reg) & mask) == value + * * Otherwise, the wait will timeout after @timeout_ms milliseconds. * * Note that this routine assumes the caller holds forcewake asserted, it is @@ -1652,8 +1646,10 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, * @timeout_ms: timeout in millisecond * * This routine waits until the target register @reg contains the expected - * @value after applying the @mask, i.e. it waits until - * (I915_READ(@reg) & @mask) == @value + * @value after applying the @mask, i.e. it waits until :: + * + * (I915_READ(reg) & mask) == value + * * Otherwise, the wait will timeout after @timeout_ms milliseconds. * * Returns 0 if the register matches the desired condition, or -ETIMEOUT. @@ -1710,15 +1706,16 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv, unsigned engine_mask) { struct intel_engine_cs *engine; + unsigned int tmp; - for_each_engine_masked(engine, dev_priv, engine_mask) + for_each_engine_masked(engine, dev_priv, engine_mask, tmp) if (gen8_request_engine_reset(engine)) goto not_ready; return gen6_reset_engines(dev_priv, engine_mask); not_ready: - for_each_engine_masked(engine, dev_priv, engine_mask) + for_each_engine_masked(engine, dev_priv, engine_mask, tmp) gen8_unrequest_engine_reset(engine); return -EIO; |