summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_ringbuffer.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h263
1 files changed, 235 insertions, 28 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2863d5a65187..c5ff203e42d6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -6,6 +6,7 @@
#include "i915_gem_batch_pool.h"
#include "i915_gem_request.h"
#include "i915_gem_timeline.h"
+#include "i915_pmu.h"
#include "i915_selftest.h"
struct drm_printer;
@@ -47,16 +48,6 @@ struct intel_hw_status_page {
/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
* do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
*/
-#define gen8_semaphore_seqno_size sizeof(uint64_t)
-#define GEN8_SEMAPHORE_OFFSET(__from, __to) \
- (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size)
-#define GEN8_SIGNAL_OFFSET(__ring, to) \
- (dev_priv->semaphore->node.start + \
- GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
-#define GEN8_WAIT_OFFSET(__ring, from) \
- (dev_priv->semaphore->node.start + \
- GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
-
enum intel_engine_hangcheck_action {
ENGINE_IDLE = 0,
ENGINE_WAIT,
@@ -166,7 +157,6 @@ struct i915_ctx_workarounds {
};
struct drm_i915_gem_request;
-struct intel_render_state;
/*
* Engine IDs definitions.
@@ -195,9 +185,9 @@ struct i915_priolist {
*/
struct intel_engine_execlists {
/**
- * @irq_tasklet: softirq tasklet for bottom handler
+ * @tasklet: softirq tasklet for bottom handler
*/
- struct tasklet_struct irq_tasklet;
+ struct tasklet_struct tasklet;
/**
* @default_priolist: priority list for I915_PRIORITY_NORMAL
@@ -210,6 +200,11 @@ struct intel_engine_execlists {
bool no_priolist;
/**
+ * @elsp: the ExecList Submission Port register
+ */
+ u32 __iomem *elsp;
+
+ /**
* @port: execlist port states
*
* For each hardware ELSP (ExecList Submission Port) we keep
@@ -253,6 +248,7 @@ struct intel_engine_execlists {
unsigned int active;
#define EXECLISTS_ACTIVE_USER 0
#define EXECLISTS_ACTIVE_PREEMPT 1
+#define EXECLISTS_ACTIVE_HWACK 2
/**
* @port_mask: number of execlist ports - 1
@@ -290,11 +286,14 @@ struct intel_engine_execlists {
struct intel_engine_cs {
struct drm_i915_private *i915;
char name[INTEL_ENGINE_CS_MAX_NAME];
+
enum intel_engine_id id;
- unsigned int uabi_id;
unsigned int hw_id;
unsigned int guc_id;
+ u8 uabi_id;
+ u8 uabi_class;
+
u8 class;
u8 instance;
u32 context_size;
@@ -304,7 +303,7 @@ struct intel_engine_cs {
struct intel_ring *buffer;
struct intel_timeline *timeline;
- struct intel_render_state *render_state;
+ struct drm_i915_gem_object *default_state;
atomic_t irq_count;
unsigned long irq_posted;
@@ -340,12 +339,49 @@ struct intel_engine_cs {
struct timer_list hangcheck; /* detect missed interrupts */
unsigned int hangcheck_interrupts;
+ unsigned int irq_enabled;
bool irq_armed : 1;
- bool irq_enabled : 1;
I915_SELFTEST_DECLARE(bool mock : 1);
} breadcrumbs;
+ struct {
+ /**
+ * @enable: Bitmask of enable sample events on this engine.
+ *
+ * Bits correspond to sample event types, for instance
+ * I915_SAMPLE_QUEUED is bit 0 etc.
+ */
+ u32 enable;
+ /**
+ * @enable_count: Reference count for the enabled samplers.
+ *
+ * Index number corresponds to the bit number from @enable.
+ */
+ unsigned int enable_count[I915_PMU_SAMPLE_BITS];
+ /**
+ * @sample: Counter values for sampling events.
+ *
+ * Our internal timer stores the current counters in this field.
+ */
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
+ struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
+ /**
+ * @busy_stats: Has enablement of engine stats tracking been
+ * requested.
+ */
+ bool busy_stats;
+ /**
+ * @disable_busy_stats: Work item for busy stats disabling.
+ *
+ * Same as with @enable_busy_stats action, with the difference
+ * that we delay it in case there are rapid enable-disable
+ * actions, which can happen during tool startup (like perf
+ * stat).
+ */
+ struct delayed_work disable_busy_stats;
+ } pmu;
+
/*
* A pool of objects to use as shadow copies of client batch buffers
* when the command parser is enabled. Prevents the client from
@@ -366,6 +402,9 @@ struct intel_engine_cs {
void (*reset_hw)(struct intel_engine_cs *engine,
struct drm_i915_gem_request *req);
+ void (*park)(struct intel_engine_cs *engine);
+ void (*unpark)(struct intel_engine_cs *engine);
+
void (*set_default_submission)(struct intel_engine_cs *engine);
struct intel_ring *(*context_pin)(struct intel_engine_cs *engine,
@@ -462,18 +501,15 @@ struct intel_engine_cs {
* ie. transpose of f(x, y)
*/
struct {
- union {
#define GEN6_SEMAPHORE_LAST VECS_HW
#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1)
#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0)
- struct {
- /* our mbox written by others */
- u32 wait[GEN6_NUM_SEMAPHORES];
- /* mboxes this ring signals to */
- i915_reg_t signal[GEN6_NUM_SEMAPHORES];
- } mbox;
- u64 signal_ggtt[I915_NUM_ENGINES];
- };
+ struct {
+ /* our mbox written by others */
+ u32 wait[GEN6_NUM_SEMAPHORES];
+ /* mboxes this ring signals to */
+ i915_reg_t signal[GEN6_NUM_SEMAPHORES];
+ } mbox;
/* AKA wait() */
int (*sync_to)(struct drm_i915_gem_request *req,
@@ -501,13 +537,16 @@ struct intel_engine_cs {
* stream (ring).
*/
struct i915_gem_context *legacy_active_context;
+ struct i915_hw_ppgtt *legacy_active_ppgtt;
/* status_notifier: list of callbacks for context-switch changes */
struct atomic_notifier_head context_status_notifier;
struct intel_engine_hangcheck hangcheck;
- bool needs_cmd_parser;
+#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_SUPPORTS_STATS BIT(1)
+ unsigned int flags;
/*
* Table of commands the command parser needs to know about
@@ -532,8 +571,50 @@ struct intel_engine_cs {
* certain bits to encode the command length in the header).
*/
u32 (*get_cmd_length_mask)(u32 cmd_header);
+
+ struct {
+ /**
+ * @lock: Lock protecting the below fields.
+ */
+ spinlock_t lock;
+ /**
+ * @enabled: Reference count indicating number of listeners.
+ */
+ unsigned int enabled;
+ /**
+ * @active: Number of contexts currently scheduled in.
+ */
+ unsigned int active;
+ /**
+ * @enabled_at: Timestamp when busy stats were enabled.
+ */
+ ktime_t enabled_at;
+ /**
+ * @start: Timestamp of the last idle to active transition.
+ *
+ * Idle is defined as active == 0, active is active > 0.
+ */
+ ktime_t start;
+ /**
+ * @total: Total time this engine was busy.
+ *
+ * Accumulated time not counting the most recent block in cases
+ * where engine is currently busy (active > 0).
+ */
+ ktime_t total;
+ } stats;
};
+static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+}
+
+static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_SUPPORTS_STATS;
+}
+
static inline void
execlists_set_active(struct intel_engine_execlists *execlists,
unsigned int bit)
@@ -555,6 +636,12 @@ execlists_is_active(const struct intel_engine_execlists *execlists,
return test_bit(bit, (unsigned long *)&execlists->active);
}
+void
+execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
+
+void
+execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
+
static inline unsigned int
execlists_num_ports(const struct intel_engine_execlists * const execlists)
{
@@ -624,6 +711,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
*/
#define I915_GEM_HWS_INDEX 0x30
#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
+#define I915_GEM_HWS_PREEMPT_INDEX 0x32
+#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
#define I915_GEM_HWS_SCRATCH_INDEX 0x40
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
@@ -648,6 +737,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
+int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req,
unsigned int n);
@@ -776,6 +866,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
}
+static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
+{
+ return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
+}
+
/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
@@ -846,6 +941,9 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
#define ENGINE_WAKEUP_WAITER BIT(0)
#define ENGINE_WAKEUP_ASLEEP BIT(1)
+void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
+void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
+
void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
@@ -864,14 +962,123 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
return batch + 6;
}
+static inline u32 *
+gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)
+{
+ /* We're using qword write, offset should be aligned to 8 bytes. */
+ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+ /* w/a for post sync ops following a GPGPU operation we
+ * need a prior CS_STALL, which is emitted by the flush
+ * following the batch.
+ */
+ *cs++ = GFX_OP_PIPE_CONTROL(6);
+ *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE;
+ *cs++ = gtt_offset;
+ *cs++ = 0;
+ *cs++ = value;
+ /* We're thrashing one dword of HWS. */
+ *cs++ = 0;
+
+ return cs;
+}
+
+static inline u32 *
+gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
+{
+ /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
+ GEM_BUG_ON(gtt_offset & (1 << 5));
+ /* Offset should be aligned to 8 bytes for both (QW/DW) write types */
+ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+ *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+ *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
+ *cs++ = 0;
+ *cs++ = value;
+
+ return cs;
+}
+
bool intel_engine_is_idle(struct intel_engine_cs *engine);
bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
-void intel_engines_mark_idle(struct drm_i915_private *i915);
+bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
+
+void intel_engines_park(struct drm_i915_private *i915);
+void intel_engines_unpark(struct drm_i915_private *i915);
+
void intel_engines_reset_default_submission(struct drm_i915_private *i915);
+unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
-void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p);
+__printf(3, 4)
+void intel_engine_dump(struct intel_engine_cs *engine,
+ struct drm_printer *m,
+ const char *header, ...);
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
+
+static inline void intel_engine_context_in(struct intel_engine_cs *engine)
+{
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ spin_lock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ if (engine->stats.active++ == 0)
+ engine->stats.start = ktime_get();
+ GEM_BUG_ON(engine->stats.active == 0);
+ }
+
+ spin_unlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static inline void intel_engine_context_out(struct intel_engine_cs *engine)
+{
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ spin_lock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ ktime_t last;
+
+ if (engine->stats.active && --engine->stats.active == 0) {
+ /*
+ * Decrement the active context count and in case GPU
+ * is now idle add up to the running total.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.start);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ } else if (engine->stats.active == 0) {
+ /*
+ * After turning on engine stats, context out might be
+ * the first event in which case we account from the
+ * time stats gathering was turned on.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ }
+ }
+
+ spin_unlock_irqrestore(&engine->stats.lock, flags);
+}
+
+int intel_enable_engine_stats(struct intel_engine_cs *engine);
+void intel_disable_engine_stats(struct intel_engine_cs *engine);
+
+ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
#endif /* _INTEL_RINGBUFFER_H_ */