1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
|
// SPDX-License-Identifier: MIT
/*
* Copyright © 2022 Intel Corporation
*/
#include "xe_wa.h"
#include <linux/compiler_types.h>
#include "xe_device_types.h"
#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_hw_engine_types.h"
#include "xe_mmio.h"
#include "xe_platform_types.h"
#include "xe_rtp.h"
#include "xe_step.h"
#include "gt/intel_engine_regs.h"
#include "gt/intel_gt_regs.h"
#include "i915_reg.h"
/**
* DOC: Hardware workarounds
*
* Hardware workarounds are register programming documented to be executed in
* the driver that fall outside of the normal programming sequences for a
* platform. There are some basic categories of workarounds, depending on
* how/when they are applied:
*
* - LRC workarounds: workarounds that touch registers that are
* saved/restored to/from the HW context image. The list is emitted (via Load
* Register Immediate commands) once when initializing the device and saved in
* the default context. That default context is then used on every context
* creation to have a "primed golden context", i.e. a context image that
* already contains the changes needed to all the registers.
*
* TODO: Although these workarounds are maintained here, they are not
* currently being applied.
*
* - Engine workarounds: the list of these WAs is applied whenever the specific
* engine is reset. It's also possible that a set of engine classes share a
* common power domain and they are reset together. This happens on some
* platforms with render and compute engines. In this case (at least) one of
* them need to keeep the workaround programming: the approach taken in the
* driver is to tie those workarounds to the first compute/render engine that
* is registered. When executing with GuC submission, engine resets are
* outside of kernel driver control, hence the list of registers involved in
* written once, on engine initialization, and then passed to GuC, that
* saves/restores their values before/after the reset takes place. See
* ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference.
*
* - GT workarounds: the list of these WAs is applied whenever these registers
* revert to their default values: on GPU reset, suspend/resume [1]_, etc.
*
* - Register whitelist: some workarounds need to be implemented in userspace,
* but need to touch privileged registers. The whitelist in the kernel
* instructs the hardware to allow the access to happen. From the kernel side,
* this is just a special case of a MMIO workaround (as we write the list of
* these to/be-whitelisted registers to some special HW registers).
*
* - Workaround batchbuffers: buffers that get executed automatically by the
* hardware on every HW context restore. These buffers are created and
* programmed in the default context so the hardware always go through those
* programming sequences when switching contexts. The support for workaround
* batchbuffers is enabled these hardware mechanisms:
*
* #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
* context, pointing the hardware to jump to that location when that offset
* is reached in the context restore. Workaround batchbuffer in the driver
* currently uses this mechanism for all platforms.
*
* #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
* pointing the hardware to a buffer to continue executing after the
* engine registers are restored in a context restore sequence. This is
* currently not used in the driver.
*
* - Other: There are WAs that, due to their nature, cannot be applied from a
* central place. Those are peppered around the rest of the code, as needed.
* Workarounds related to the display IP are the main example.
*
* .. [1] Technically, some registers are powercontext saved & restored, so they
* survive a suspend/resume. In practice, writing them again is not too
* costly and simplifies things, so it's the approach taken in the driver.
*
* .. note::
* Hardware workarounds in xe work the same way as in i915, with the
* difference of how they are maintained in the code. In xe it uses the
* xe_rtp infrastructure so the workarounds can be kept in tables, following
* a more declarative approach rather than procedural.
*/
#undef _MMIO
#undef MCR_REG
#define _MMIO(x) _XE_RTP_REG(x)
#define MCR_REG(x) _XE_RTP_MCR_REG(x)
static bool match_14011060649(const struct xe_gt *gt,
const struct xe_hw_engine *hwe)
{
return hwe->instance % 2 == 0;
}
static const struct xe_rtp_entry gt_was[] = {
{ XE_RTP_NAME("14011060649"),
XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255),
ENGINE_CLASS(VIDEO_DECODE),
FUNC(match_14011060649)),
XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS,
XE_RTP_FLAG(FOREACH_ENGINE))
},
{ XE_RTP_NAME("16010515920"),
XE_RTP_RULES(SUBPLATFORM(DG2, G10),
STEP(A0, B0),
ENGINE_CLASS(VIDEO_DECODE)),
XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS,
XE_RTP_FLAG(FOREACH_ENGINE))
},
{ XE_RTP_NAME("22010523718"),
XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS)
},
{ XE_RTP_NAME("14011006942"),
XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
XE_RTP_SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS)
},
{ XE_RTP_NAME("14010948348"),
XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
XE_RTP_SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS)
},
{ XE_RTP_NAME("14011037102"),
XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
XE_RTP_SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS)
},
{ XE_RTP_NAME("14011371254"),
XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
XE_RTP_SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS)
},
{ XE_RTP_NAME("14011431319/0"),
XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
XE_RTP_SET(UNSLCGCTL9440,
GAMTLBOACS_CLKGATE_DIS |
GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS |
GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS |
GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS |
GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS |
GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS |
GAMTLBBLT_CLKGATE_DIS)
},
{ XE_RTP_NAME("14011431319/1"),
XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
XE_RTP_SET(UNSLCGCTL9444,
GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS |
GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS |
GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS |
GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS |
GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS |
GAMTLBMERT_CLKGATE_DIS |
GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS |
GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS)
},
{ XE_RTP_NAME("14010569222"),
XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS)
},
{ XE_RTP_NAME("14011028019"),
XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
XE_RTP_SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS)
},
{ XE_RTP_NAME("14014830051"),
XE_RTP_RULES(PLATFORM(DG2)),
XE_RTP_CLR(SARB_CHICKEN1, COMP_CKN_IN)
},
{ XE_RTP_NAME("14015795083"),
XE_RTP_RULES(PLATFORM(DG2)),
XE_RTP_CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE)
},
{ XE_RTP_NAME("14011059788"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
XE_RTP_SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)
},
{ XE_RTP_NAME("1409420604"),
XE_RTP_RULES(PLATFORM(DG1)),
XE_RTP_SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS)
},
{ XE_RTP_NAME("1408615072"),
XE_RTP_RULES(PLATFORM(DG1)),
XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL)
},
{}
};
static const struct xe_rtp_entry engine_was[] = {
{ XE_RTP_NAME("14015227452"),
XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
XE_RTP_SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
XE_RTP_FLAG(MASKED_REG))
},
{ XE_RTP_NAME("1606931601"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
XE_RTP_FLAG(MASKED_REG))
},
{ XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)),
XE_RTP_SET(GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE)
},
{ XE_RTP_NAME("14010826681, 1606700617, 22010271021"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
XE_RTP_FLAG(MASKED_REG))
},
{ XE_RTP_NAME("18019627453"),
XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
XE_RTP_FLAG(MASKED_REG))
},
{ XE_RTP_NAME("1409804808"),
XE_RTP_RULES(GRAPHICS_VERSION(1200),
ENGINE_CLASS(RENDER),
IS_INTEGRATED),
XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS,
XE_RTP_FLAG(MASKED_REG))
},
{ XE_RTP_NAME("14010229206, 1409085225"),
XE_RTP_RULES(GRAPHICS_VERSION(1200),
ENGINE_CLASS(RENDER),
IS_INTEGRATED),
XE_RTP_SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH,
XE_RTP_FLAG(MASKED_REG))
},
{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)),
XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE),
GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG))
},
{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)),
XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE),
GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG))
},
{ XE_RTP_NAME("1406941453"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_FLAG(MASKED_REG))
},
{ XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
XE_RTP_SET(GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL,
XE_RTP_FLAG(MASKED_REG))
},
{}
};
static const struct xe_rtp_entry lrc_was[] = {
{ XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
XE_RTP_SET(GEN11_COMMON_SLICE_CHICKEN3,
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE,
XE_RTP_FLAG(MASKED_REG))
},
{ XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
XE_RTP_FIELD_SET(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL,
XE_RTP_FLAG(MASKED_REG))
},
{ XE_RTP_NAME("16011163337"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
/* read verification is ignored due to 1608008084. */
XE_RTP_FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, FF_MODE2_GS_TIMER_MASK,
FF_MODE2_GS_TIMER_224)
},
{ XE_RTP_NAME("1409044764"),
XE_RTP_RULES(PLATFORM(DG1)),
XE_RTP_CLR(GEN11_COMMON_SLICE_CHICKEN3,
DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN,
XE_RTP_FLAG(MASKED_REG))
},
{ XE_RTP_NAME("22010493298"),
XE_RTP_RULES(PLATFORM(DG1)),
XE_RTP_SET(HIZ_CHICKEN,
DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE,
XE_RTP_FLAG(MASKED_REG))
},
{}
};
/**
* xe_wa_process_gt - process GT workaround table
* @gt: GT instance to process workarounds for
*
* Process GT workaround table for this platform, saving in @gt all the
* workarounds that need to be applied at the GT level.
*/
void xe_wa_process_gt(struct xe_gt *gt)
{
xe_rtp_process(gt_was, >->reg_sr, gt, NULL);
}
/**
* xe_wa_process_engine - process engine workaround table
* @hwe: engine instance to process workarounds for
*
* Process engine workaround table for this platform, saving in @hwe all the
* workarounds that need to be applied at the engine level that match this
* engine.
*/
void xe_wa_process_engine(struct xe_hw_engine *hwe)
{
xe_rtp_process(engine_was, &hwe->reg_sr, hwe->gt, hwe);
}
/**
* xe_wa_process_lrc - process context workaround table
* @hwe: engine instance to process workarounds for
*
* Process context workaround table for this platform, saving in @hwe all the
* workarounds that need to be applied on context restore. These are workarounds
* touching registers that are part of the HW context image.
*/
void xe_wa_process_lrc(struct xe_hw_engine *hwe)
{
xe_rtp_process(lrc_was, &hwe->reg_lrc, hwe->gt, hwe);
}
|