diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.h | 289 |
1 files changed, 89 insertions, 200 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a1a7cc29fdd1..710ffb221775 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,7 @@ #include <drm/drm_util.h> #include <linux/hashtable.h> +#include <linux/irq_work.h> #include <linux/seqlock.h> #include "i915_gem_batch_pool.h" @@ -28,12 +29,11 @@ struct i915_sched_attr; * workarounds! */ #define CACHELINE_BYTES 64 -#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) +#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) struct intel_hw_status_page { struct i915_vma *vma; - u32 *page_addr; - u32 ggtt_offset; + u32 *addr; }; #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) @@ -94,12 +94,12 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) #define I915_MAX_SUBSLICES 8 #define instdone_slice_mask(dev_priv__) \ - (IS_GEN7(dev_priv__) ? \ - 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask) + (IS_GEN(dev_priv__, 7) ? \ + 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) #define instdone_subslice_mask(dev_priv__) \ - (IS_GEN7(dev_priv__) ? \ - 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0]) + (IS_GEN(dev_priv__, 7) ? \ + 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ for ((slice__) = 0, (subslice__) = 0; \ @@ -120,13 +120,8 @@ struct intel_instdone { struct intel_engine_hangcheck { u64 acthd; u32 seqno; - enum intel_engine_hangcheck_action action; unsigned long action_timestamp; - int deadlock; struct intel_instdone instdone; - struct i915_request *active_request; - bool stalled:1; - bool wedged:1; }; struct intel_ring { @@ -209,6 +204,7 @@ struct i915_priolist { struct st_preempt_hang { struct completion completion; + unsigned int count; bool inject_hang; }; @@ -299,14 +295,18 @@ struct intel_engine_execlists { unsigned int port_mask; /** - * @queue_priority: Highest pending priority. + * @queue_priority_hint: Highest pending priority. * * When we add requests into the queue, or adjust the priority of * executing requests, we compute the maximum priority of those * pending requests. We can then use this value to determine if * we need to preempt the executing requests to service the queue. + * However, since the we may have recorded the priority of an inflight + * request we wanted to preempt but since completed, at the time of + * dequeuing the priority hint may no longer may match the highest + * available request priority. */ - int queue_priority; + int queue_priority_hint; /** * @queue: queue of requests, in priority lists @@ -365,9 +365,6 @@ struct intel_engine_cs { struct drm_i915_gem_object *default_state; void *pinned_default_state; - unsigned long irq_posted; -#define ENGINE_IRQ_BREADCRUMB 0 - /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the @@ -385,23 +382,14 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - spinlock_t irq_lock; /* protects irq_*; irqsafe */ - struct intel_wait *irq_wait; /* oldest waiter by retirement */ - - spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ - struct rb_root waiters; /* sorted by retirement, priority */ - struct list_head signals; /* sorted by retirement */ - struct task_struct *signaler; /* used for fence signalling */ + spinlock_t irq_lock; + struct list_head signalers; - struct timer_list fake_irq; /* used after a missed interrupt */ - struct timer_list hangcheck; /* detect missed interrupts */ + struct irq_work irq_work; /* for use from inside irq_lock */ - unsigned int hangcheck_interrupts; unsigned int irq_enabled; - unsigned int irq_count; - bool irq_armed : 1; - I915_SELFTEST_DECLARE(bool mock : 1); + bool irq_armed; } breadcrumbs; struct { @@ -449,9 +437,8 @@ struct intel_engine_cs { int (*init_hw)(struct intel_engine_cs *engine); struct { - struct i915_request *(*prepare)(struct intel_engine_cs *engine); - void (*reset)(struct intel_engine_cs *engine, - struct i915_request *rq); + void (*prepare)(struct intel_engine_cs *engine); + void (*reset)(struct intel_engine_cs *engine, bool stalled); void (*finish)(struct intel_engine_cs *engine); } reset; @@ -475,8 +462,10 @@ struct intel_engine_cs { unsigned int dispatch_flags); #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) - void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); - int emit_breadcrumb_sz; + int (*emit_init_breadcrumb)(struct i915_request *rq); + u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, + u32 *cs); + unsigned int emit_fini_breadcrumb_dw; /* Pass the request to the hardware queue (e.g. directly into * the legacy ringbuffer or to the end of an execlist). @@ -502,69 +491,8 @@ struct intel_engine_cs { */ void (*cancel_requests)(struct intel_engine_cs *engine); - /* Some chipsets are not quite as coherent as advertised and need - * an expensive kick to force a true read of the up-to-date seqno. - * However, the up-to-date seqno is not always required and the last - * seen value is good enough. Note that the seqno will always be - * monotonic, even if not coherent. - */ - void (*irq_seqno_barrier)(struct intel_engine_cs *engine); void (*cleanup)(struct intel_engine_cs *engine); - /* GEN8 signal/wait table - never trust comments! - * signal to signal to signal to signal to signal to - * RCS VCS BCS VECS VCS2 - * -------------------------------------------------------------------- - * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | - * |------------------------------------------------------------------- - * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | - * |------------------------------------------------------------------- - * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | - * |------------------------------------------------------------------- - * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) | - * |------------------------------------------------------------------- - * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) | - * |------------------------------------------------------------------- - * - * Generalization: - * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) - * ie. transpose of g(x, y) - * - * sync from sync from sync from sync from sync from - * RCS VCS BCS VECS VCS2 - * -------------------------------------------------------------------- - * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | - * |------------------------------------------------------------------- - * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | - * |------------------------------------------------------------------- - * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | - * |------------------------------------------------------------------- - * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) | - * |------------------------------------------------------------------- - * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) | - * |------------------------------------------------------------------- - * - * Generalization: - * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) - * ie. transpose of f(x, y) - */ - struct { -#define GEN6_SEMAPHORE_LAST VECS_HW -#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) -#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) - struct { - /* our mbox written by others */ - u32 wait[GEN6_NUM_SEMAPHORES]; - /* mboxes this ring signals to */ - i915_reg_t signal[GEN6_NUM_SEMAPHORES]; - } mbox; - - /* AKA wait() */ - int (*sync_to)(struct i915_request *rq, - struct i915_request *signal); - u32 *(*signal)(struct i915_request *rq, u32 *cs); - } semaphore; - struct intel_engine_execlists execlists; /* Contexts are pinned whilst they are active on the GPU. The last @@ -665,7 +593,20 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine) static inline bool __execlists_need_preempt(int prio, int last) { - return prio > max(0, last); + /* + * Allow preemption of low -> normal -> high, but we do + * not allow low priority tasks to preempt other low priority + * tasks under the impression that latency for low priority + * tasks does not matter (as much as background throughput), + * so kiss. + * + * More naturally we would write + * prio >= max(0, last); + * except that we wish to prevent triggering preemption at the same + * priority level: the task that is running should remain running + * to preserve FIFO ordering of dependencies. + */ + return prio > max(I915_PRIORITY_NORMAL - 1, last); } static inline void @@ -743,7 +684,7 @@ static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { /* Ensure that the compiler doesn't optimize away the load. */ - return READ_ONCE(engine->status_page.page_addr[reg]); + return READ_ONCE(engine->status_page.addr[reg]); } static inline void @@ -756,12 +697,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) */ if (static_cpu_has(X86_FEATURE_CLFLUSH)) { mb(); - clflush(&engine->status_page.page_addr[reg]); - engine->status_page.page_addr[reg] = value; - clflush(&engine->status_page.page_addr[reg]); + clflush(&engine->status_page.addr[reg]); + engine->status_page.addr[reg] = value; + clflush(&engine->status_page.addr[reg]); mb(); } else { - WRITE_ONCE(engine->status_page.page_addr[reg], value); + WRITE_ONCE(engine->status_page.addr[reg], value); } } @@ -782,11 +723,13 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) * The area from dword 0x30 to 0x3ff is available for driver usage. */ #define I915_GEM_HWS_INDEX 0x30 -#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_PREEMPT_INDEX 0x32 -#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_SCRATCH_INDEX 0x40 -#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) +#define I915_GEM_HWS_PREEMPT 0x32 +#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) +#define I915_GEM_HWS_SEQNO 0x40 +#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) +#define I915_GEM_HWS_SCRATCH 0x80 +#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) #define I915_HWS_CSB_BUF0_INDEX 0x10 #define I915_HWS_CSB_WRITE_INDEX 0x1f @@ -809,7 +752,6 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); int __must_check intel_ring_cacheline_align(struct i915_request *rq); -int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) @@ -890,9 +832,21 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) return tail; } -void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); +static inline unsigned int +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) +{ + /* + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + */ + GEM_BUG_ON(!is_power_of_2(size)); + return (head - tail - CACHELINE_BYTES) & (size - 1); +} + +void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); -void intel_engine_setup_common(struct intel_engine_cs *engine); +int intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); @@ -904,6 +858,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); int intel_engine_stop_cs(struct intel_engine_cs *engine); void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); +void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); + u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); @@ -948,102 +904,29 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine, void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); -/* - * Arbitrary size for largest possible 'add request' sequence. The code paths - * are complex and variable. Empirical measurement shows that the worst case - * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However, - * we need to allocate double the largest single packet within that emission - * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW). - */ -#define MIN_SPACE_FOR_ADD_REQUEST 336 - -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ - return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; -} - -static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) -{ - return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; -} - -/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ -int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); - -static inline void intel_wait_init(struct intel_wait *wait) -{ - wait->tsk = current; - wait->request = NULL; -} - -static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) -{ - wait->tsk = current; - wait->seqno = seqno; -} - -static inline bool intel_wait_has_seqno(const struct intel_wait *wait) -{ - return wait->seqno; -} - -static inline bool -intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) -{ - wait->seqno = seqno; - return intel_wait_has_seqno(wait); -} - -static inline bool -intel_wait_update_request(struct intel_wait *wait, - const struct i915_request *rq) -{ - return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); -} - -static inline bool -intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) -{ - return wait->seqno == seqno; -} - -static inline bool -intel_wait_check_request(const struct intel_wait *wait, - const struct i915_request *rq) -{ - return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); -} +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -static inline bool intel_wait_complete(const struct intel_wait *wait) -{ - return RB_EMPTY_NODE(&wait->node); -} +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); -bool intel_engine_add_wait(struct intel_engine_cs *engine, - struct intel_wait *wait); -void intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait); -bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); -void intel_engine_cancel_signaling(struct i915_request *request); +bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); -static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) +static inline void +intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine) { - return READ_ONCE(engine->breadcrumbs.irq_wait); + irq_work_queue(&engine->breadcrumbs.irq_work); } -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); -#define ENGINE_WAKEUP_WAITER BIT(0) -#define ENGINE_WAKEUP_ASLEEP BIT(1) - -void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); -void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); - -void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); +bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine); void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p); + static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) { memset(batch, 0, 6 * sizeof(u32)); @@ -1056,7 +939,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) } static inline u32 * -gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset) +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) { /* We're using qword write, offset should be aligned to 8 bytes. */ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); @@ -1066,8 +949,7 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset) * following the batch. */ *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE; + *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; *cs++ = gtt_offset; *cs++ = 0; *cs++ = value; @@ -1093,7 +975,14 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) return cs; } -void intel_engines_sanitize(struct drm_i915_private *i915); +static inline void intel_engine_reset(struct intel_engine_cs *engine, + bool stalled) +{ + if (engine->reset.reset) + engine->reset.reset(engine, stalled); +} + +void intel_engines_sanitize(struct drm_i915_private *i915, bool force); bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); |