summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h6
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c66
2 files changed, 60 insertions, 12 deletions
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 68172b0248a6..dc5a4fafa70c 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -96,6 +96,12 @@
#define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13)
#define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
+
+#define CS_MMIO_GROUP_INSTANCE_SELECT(base) XE_REG((base) + 0xcc)
+#define SELECTIVE_READ_ADDRESSING REG_BIT(30)
+#define SELECTIVE_READ_GROUP REG_GENMASK(29, 23)
+#define SELECTIVE_READ_INSTANCE REG_GENMASK(22, 16)
+
/*
* CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
* The lsb of each can be considered a separate enabling bit for encryption.
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 9d090d0f2438..df6d04704823 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -210,11 +210,15 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
return ret;
}
+/* Dwords required to emit a RMW of a register */
+#define EMIT_RMW_DW 20
+
static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
{
- struct xe_reg_sr *sr = &q->hwe->reg_lrc;
+ struct xe_hw_engine *hwe = q->hwe;
+ struct xe_reg_sr *sr = &hwe->reg_lrc;
struct xe_reg_sr_entry *entry;
- int count_rmw = 0, count = 0, ret;
+ int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret;
unsigned long idx;
struct xe_bb *bb;
size_t bb_len = 0;
@@ -224,6 +228,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
xa_for_each(&sr->xa, idx, entry) {
if (entry->reg.masked || entry->clr_bits == ~0)
++count;
+ else if (entry->reg.mcr)
+ ++count_rmw_mcr;
else
++count_rmw;
}
@@ -231,17 +237,35 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
if (count)
bb_len += count * 2 + 1;
- if (count_rmw)
- bb_len += count_rmw * 20 + 7;
+ /*
+ * RMW of MCR registers is the same as a normal RMW, except an
+ * additional LRI (3 dwords) is required per register to steer the read
+ * to a nom-terminated instance.
+ *
+ * We could probably shorten the batch slightly by eliding the
+ * steering for consecutive MCR registers that have the same
+ * group/instance target, but it's not worth the extra complexity to do
+ * so.
+ */
+ bb_len += count_rmw * EMIT_RMW_DW;
+ bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3);
+
+ /*
+ * After doing all RMW, we need 7 trailing dwords to clean up,
+ * plus an additional 3 dwords to reset steering if any of the
+ * registers were MCR.
+ */
+ if (count_rmw || count_rmw_mcr)
+ bb_len += 7 + (count_rmw_mcr ? 3 : 0);
- if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
+ if (hwe->class == XE_ENGINE_CLASS_RENDER)
/*
* Big enough to emit all of the context's 3DSTATE via
* xe_lrc_emit_hwe_state_instructions()
*/
- bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32);
+ bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32);
- xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len);
+ xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len);
bb = xe_bb_new(gt, bb_len, false);
if (IS_ERR(bb))
@@ -276,13 +300,23 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
}
}
- if (count_rmw) {
- /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */
-
+ if (count_rmw || count_rmw_mcr) {
xa_for_each(&sr->xa, idx, entry) {
if (entry->reg.masked || entry->clr_bits == ~0)
continue;
+ if (entry->reg.mcr) {
+ struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw };
+ u8 group, instance;
+
+ xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
+ *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
+ *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr;
+ *cs++ = SELECTIVE_READ_ADDRESSING |
+ REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) |
+ REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance);
+ }
+
*cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
*cs++ = entry->reg.addr;
*cs++ = CS_GPR_REG(0, 0).addr;
@@ -308,8 +342,9 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
*cs++ = CS_GPR_REG(0, 0).addr;
*cs++ = entry->reg.addr;
- xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n",
- entry->reg.addr, entry->clr_bits, entry->set_bits);
+ xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n",
+ entry->reg.addr, entry->clr_bits, entry->set_bits,
+ entry->reg.mcr ? " (MCR)" : "");
}
/* reset used GPR */
@@ -321,6 +356,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
*cs++ = 0;
*cs++ = CS_GPR_REG(0, 2).addr;
*cs++ = 0;
+
+ /* reset steering */
+ if (count_rmw_mcr) {
+ *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
+ *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr;
+ *cs++ = 0;
+ }
}
cs = xe_lrc_emit_hwe_state_instructions(q, cs);