xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_workarounds.c (revision f8a11425075ff11b4b5784f077cb84f3d2dfb3f0)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014-2018 Intel Corporation
4  */
5 
6 #include "i915_drv.h"
7 #include "intel_context.h"
8 #include "intel_engine_pm.h"
9 #include "intel_gpu_commands.h"
10 #include "intel_gt.h"
11 #include "intel_ring.h"
12 #include "intel_workarounds.h"
13 
14 /**
15  * DOC: Hardware workarounds
16  *
17  * This file is intended as a central place to implement most [1]_ of the
18  * required workarounds for hardware to work as originally intended. They fall
19  * in five basic categories depending on how/when they are applied:
20  *
21  * - Workarounds that touch registers that are saved/restored to/from the HW
22  *   context image. The list is emitted (via Load Register Immediate commands)
23  *   everytime a new context is created.
24  * - GT workarounds. The list of these WAs is applied whenever these registers
25  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
26  * - Display workarounds. The list is applied during display clock-gating
27  *   initialization.
28  * - Workarounds that whitelist a privileged register, so that UMDs can manage
29  *   them directly. This is just a special case of a MMMIO workaround (as we
30  *   write the list of these to/be-whitelisted registers to some special HW
31  *   registers).
32  * - Workaround batchbuffers, that get executed automatically by the hardware
33  *   on every HW context restore.
34  *
35  * .. [1] Please notice that there are other WAs that, due to their nature,
36  *    cannot be applied from a central place. Those are peppered around the rest
37  *    of the code, as needed.
38  *
39  * .. [2] Technically, some registers are powercontext saved & restored, so they
40  *    survive a suspend/resume. In practice, writing them again is not too
41  *    costly and simplifies things. We can revisit this in the future.
42  *
43  * Layout
44  * ~~~~~~
45  *
46  * Keep things in this file ordered by WA type, as per the above (context, GT,
47  * display, register whitelist, batchbuffer). Then, inside each type, keep the
48  * following order:
49  *
50  * - Infrastructure functions and macros
51  * - WAs per platform in standard gen/chrono order
52  * - Public functions to init or apply the given workaround type.
53  */
54 
55 static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
56 {
57 	wal->name = name;
58 	wal->engine_name = engine_name;
59 }
60 
61 #define WA_LIST_CHUNK (1 << 4)
62 
63 static void wa_init_finish(struct i915_wa_list *wal)
64 {
65 	/* Trim unused entries. */
66 	if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
67 		struct i915_wa *list = kmemdup(wal->list,
68 					       wal->count * sizeof(*list),
69 					       GFP_KERNEL);
70 
71 		if (list) {
72 			kfree(wal->list);
73 			wal->list = list;
74 		}
75 	}
76 
77 	if (!wal->count)
78 		return;
79 
80 	DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
81 			 wal->wa_count, wal->name, wal->engine_name);
82 }
83 
84 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
85 {
86 	unsigned int addr = i915_mmio_reg_offset(wa->reg);
87 	unsigned int start = 0, end = wal->count;
88 	const unsigned int grow = WA_LIST_CHUNK;
89 	struct i915_wa *wa_;
90 
91 	GEM_BUG_ON(!is_power_of_2(grow));
92 
93 	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
94 		struct i915_wa *list;
95 
96 		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
97 				     GFP_KERNEL);
98 		if (!list) {
99 			DRM_ERROR("No space for workaround init!\n");
100 			return;
101 		}
102 
103 		if (wal->list) {
104 			memcpy(list, wal->list, sizeof(*wa) * wal->count);
105 			kfree(wal->list);
106 		}
107 
108 		wal->list = list;
109 	}
110 
111 	while (start < end) {
112 		unsigned int mid = start + (end - start) / 2;
113 
114 		if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
115 			start = mid + 1;
116 		} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
117 			end = mid;
118 		} else {
119 			wa_ = &wal->list[mid];
120 
121 			if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
122 				DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
123 					  i915_mmio_reg_offset(wa_->reg),
124 					  wa_->clr, wa_->set);
125 
126 				wa_->set &= ~wa->clr;
127 			}
128 
129 			wal->wa_count++;
130 			wa_->set |= wa->set;
131 			wa_->clr |= wa->clr;
132 			wa_->read |= wa->read;
133 			return;
134 		}
135 	}
136 
137 	wal->wa_count++;
138 	wa_ = &wal->list[wal->count++];
139 	*wa_ = *wa;
140 
141 	while (wa_-- > wal->list) {
142 		GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
143 			   i915_mmio_reg_offset(wa_[1].reg));
144 		if (i915_mmio_reg_offset(wa_[1].reg) >
145 		    i915_mmio_reg_offset(wa_[0].reg))
146 			break;
147 
148 		swap(wa_[1], wa_[0]);
149 	}
150 }
151 
152 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
153 		   u32 clear, u32 set, u32 read_mask)
154 {
155 	struct i915_wa wa = {
156 		.reg  = reg,
157 		.clr  = clear,
158 		.set  = set,
159 		.read = read_mask,
160 	};
161 
162 	_wa_add(wal, &wa);
163 }
164 
165 static void
166 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
167 {
168 	wa_add(wal, reg, clear, set, clear);
169 }
170 
171 static void
172 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
173 {
174 	wa_write_clr_set(wal, reg, ~0, set);
175 }
176 
177 static void
178 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
179 {
180 	wa_write_clr_set(wal, reg, set, set);
181 }
182 
183 static void
184 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
185 {
186 	wa_write_clr_set(wal, reg, clr, 0);
187 }
188 
189 /*
190  * WA operations on "masked register". A masked register has the upper 16 bits
191  * documented as "masked" in b-spec. Its purpose is to allow writing to just a
192  * portion of the register without a rmw: you simply write in the upper 16 bits
193  * the mask of bits you are going to modify.
194  *
195  * The wa_masked_* family of functions already does the necessary operations to
196  * calculate the mask based on the parameters passed, so user only has to
197  * provide the lower 16 bits of that register.
198  */
199 
200 static void
201 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
202 {
203 	wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
204 }
205 
206 static void
207 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
208 {
209 	wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val);
210 }
211 
212 static void
213 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
214 		    u32 mask, u32 val)
215 {
216 	wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask);
217 }
218 
219 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
220 				      struct i915_wa_list *wal)
221 {
222 	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
223 }
224 
225 static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
226 				      struct i915_wa_list *wal)
227 {
228 	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
229 }
230 
231 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
232 				      struct i915_wa_list *wal)
233 {
234 	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
235 
236 	/* WaDisableAsyncFlipPerfMode:bdw,chv */
237 	wa_masked_en(wal, MI_MODE, ASYNC_FLIP_PERF_DISABLE);
238 
239 	/* WaDisablePartialInstShootdown:bdw,chv */
240 	wa_masked_en(wal, GEN8_ROW_CHICKEN,
241 		     PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
242 
243 	/* Use Force Non-Coherent whenever executing a 3D context. This is a
244 	 * workaround for a possible hang in the unlikely event a TLB
245 	 * invalidation occurs during a PSD flush.
246 	 */
247 	/* WaForceEnableNonCoherent:bdw,chv */
248 	/* WaHdcDisableFetchWhenMasked:bdw,chv */
249 	wa_masked_en(wal, HDC_CHICKEN0,
250 		     HDC_DONOT_FETCH_MEM_WHEN_MASKED |
251 		     HDC_FORCE_NON_COHERENT);
252 
253 	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
254 	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
255 	 *  polygons in the same 8x4 pixel/sample area to be processed without
256 	 *  stalling waiting for the earlier ones to write to Hierarchical Z
257 	 *  buffer."
258 	 *
259 	 * This optimization is off by default for BDW and CHV; turn it on.
260 	 */
261 	wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
262 
263 	/* Wa4x4STCOptimizationDisable:bdw,chv */
264 	wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
265 
266 	/*
267 	 * BSpec recommends 8x4 when MSAA is used,
268 	 * however in practice 16x4 seems fastest.
269 	 *
270 	 * Note that PS/WM thread counts depend on the WIZ hashing
271 	 * disable bit, which we don't touch here, but it's good
272 	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
273 	 */
274 	wa_masked_field_set(wal, GEN7_GT_MODE,
275 			    GEN6_WIZ_HASHING_MASK,
276 			    GEN6_WIZ_HASHING_16x4);
277 }
278 
279 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
280 				     struct i915_wa_list *wal)
281 {
282 	struct drm_i915_private *i915 = engine->i915;
283 
284 	gen8_ctx_workarounds_init(engine, wal);
285 
286 	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
287 	wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
288 
289 	/* WaDisableDopClockGating:bdw
290 	 *
291 	 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
292 	 * to disable EUTC clock gating.
293 	 */
294 	wa_masked_en(wal, GEN7_ROW_CHICKEN2,
295 		     DOP_CLOCK_GATING_DISABLE);
296 
297 	wa_masked_en(wal, HALF_SLICE_CHICKEN3,
298 		     GEN8_SAMPLER_POWER_BYPASS_DIS);
299 
300 	wa_masked_en(wal, HDC_CHICKEN0,
301 		     /* WaForceContextSaveRestoreNonCoherent:bdw */
302 		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
303 		     /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
304 		     (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
305 }
306 
307 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
308 				     struct i915_wa_list *wal)
309 {
310 	gen8_ctx_workarounds_init(engine, wal);
311 
312 	/* WaDisableThreadStallDopClockGating:chv */
313 	wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
314 
315 	/* Improve HiZ throughput on CHV. */
316 	wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
317 }
318 
319 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
320 				      struct i915_wa_list *wal)
321 {
322 	struct drm_i915_private *i915 = engine->i915;
323 
324 	if (HAS_LLC(i915)) {
325 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
326 		 *
327 		 * Must match Display Engine. See
328 		 * WaCompressedResourceDisplayNewHashMode.
329 		 */
330 		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
331 			     GEN9_PBE_COMPRESSED_HASH_SELECTION);
332 		wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
333 			     GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
334 	}
335 
336 	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
337 	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
338 	wa_masked_en(wal, GEN8_ROW_CHICKEN,
339 		     FLOW_CONTROL_ENABLE |
340 		     PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
341 
342 	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
343 	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
344 	wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
345 		     GEN9_ENABLE_YV12_BUGFIX |
346 		     GEN9_ENABLE_GPGPU_PREEMPTION);
347 
348 	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
349 	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
350 	wa_masked_en(wal, CACHE_MODE_1,
351 		     GEN8_4x4_STC_OPTIMIZATION_DISABLE |
352 		     GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
353 
354 	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
355 	wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
356 		      GEN9_CCS_TLB_PREFETCH_ENABLE);
357 
358 	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
359 	wa_masked_en(wal, HDC_CHICKEN0,
360 		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
361 		     HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
362 
363 	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
364 	 * both tied to WaForceContextSaveRestoreNonCoherent
365 	 * in some hsds for skl. We keep the tie for all gen9. The
366 	 * documentation is a bit hazy and so we want to get common behaviour,
367 	 * even though there is no clear evidence we would need both on kbl/bxt.
368 	 * This area has been source of system hangs so we play it safe
369 	 * and mimic the skl regardless of what bspec says.
370 	 *
371 	 * Use Force Non-Coherent whenever executing a 3D context. This
372 	 * is a workaround for a possible hang in the unlikely event
373 	 * a TLB invalidation occurs during a PSD flush.
374 	 */
375 
376 	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
377 	wa_masked_en(wal, HDC_CHICKEN0,
378 		     HDC_FORCE_NON_COHERENT);
379 
380 	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
381 	if (IS_SKYLAKE(i915) ||
382 	    IS_KABYLAKE(i915) ||
383 	    IS_COFFEELAKE(i915) ||
384 	    IS_COMETLAKE(i915))
385 		wa_masked_en(wal, HALF_SLICE_CHICKEN3,
386 			     GEN8_SAMPLER_POWER_BYPASS_DIS);
387 
388 	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
389 	wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
390 
391 	/*
392 	 * Supporting preemption with fine-granularity requires changes in the
393 	 * batch buffer programming. Since we can't break old userspace, we
394 	 * need to set our default preemption level to safe value. Userspace is
395 	 * still able to use more fine-grained preemption levels, since in
396 	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
397 	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
398 	 * not real HW workarounds, but merely a way to start using preemption
399 	 * while maintaining old contract with userspace.
400 	 */
401 
402 	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
403 	wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
404 
405 	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
406 	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
407 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
408 			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
409 
410 	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
411 	if (IS_GEN9_LP(i915))
412 		wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
413 }
414 
415 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
416 				struct i915_wa_list *wal)
417 {
418 	struct intel_gt *gt = engine->gt;
419 	u8 vals[3] = { 0, 0, 0 };
420 	unsigned int i;
421 
422 	for (i = 0; i < 3; i++) {
423 		u8 ss;
424 
425 		/*
426 		 * Only consider slices where one, and only one, subslice has 7
427 		 * EUs
428 		 */
429 		if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
430 			continue;
431 
432 		/*
433 		 * subslice_7eu[i] != 0 (because of the check above) and
434 		 * ss_max == 4 (maximum number of subslices possible per slice)
435 		 *
436 		 * ->    0 <= ss <= 3;
437 		 */
438 		ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
439 		vals[i] = 3 - ss;
440 	}
441 
442 	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
443 		return;
444 
445 	/* Tune IZ hashing. See intel_device_info_runtime_init() */
446 	wa_masked_field_set(wal, GEN7_GT_MODE,
447 			    GEN9_IZ_HASHING_MASK(2) |
448 			    GEN9_IZ_HASHING_MASK(1) |
449 			    GEN9_IZ_HASHING_MASK(0),
450 			    GEN9_IZ_HASHING(2, vals[2]) |
451 			    GEN9_IZ_HASHING(1, vals[1]) |
452 			    GEN9_IZ_HASHING(0, vals[0]));
453 }
454 
455 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
456 				     struct i915_wa_list *wal)
457 {
458 	gen9_ctx_workarounds_init(engine, wal);
459 	skl_tune_iz_hashing(engine, wal);
460 }
461 
462 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
463 				     struct i915_wa_list *wal)
464 {
465 	gen9_ctx_workarounds_init(engine, wal);
466 
467 	/* WaDisableThreadStallDopClockGating:bxt */
468 	wa_masked_en(wal, GEN8_ROW_CHICKEN,
469 		     STALL_DOP_GATING_DISABLE);
470 
471 	/* WaToEnableHwFixForPushConstHWBug:bxt */
472 	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
473 		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
474 }
475 
476 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
477 				     struct i915_wa_list *wal)
478 {
479 	struct drm_i915_private *i915 = engine->i915;
480 
481 	gen9_ctx_workarounds_init(engine, wal);
482 
483 	/* WaToEnableHwFixForPushConstHWBug:kbl */
484 	if (IS_KBL_GT_STEP(i915, STEP_C0, STEP_FOREVER))
485 		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
486 			     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
487 
488 	/* WaDisableSbeCacheDispatchPortSharing:kbl */
489 	wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
490 		     GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
491 }
492 
493 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
494 				     struct i915_wa_list *wal)
495 {
496 	gen9_ctx_workarounds_init(engine, wal);
497 
498 	/* WaToEnableHwFixForPushConstHWBug:glk */
499 	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
500 		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
501 }
502 
503 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
504 				     struct i915_wa_list *wal)
505 {
506 	gen9_ctx_workarounds_init(engine, wal);
507 
508 	/* WaToEnableHwFixForPushConstHWBug:cfl */
509 	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
510 		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
511 
512 	/* WaDisableSbeCacheDispatchPortSharing:cfl */
513 	wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
514 		     GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
515 }
516 
517 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
518 				     struct i915_wa_list *wal)
519 {
520 	/* WaForceContextSaveRestoreNonCoherent:cnl */
521 	wa_masked_en(wal, CNL_HDC_CHICKEN0,
522 		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
523 
524 	/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
525 	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
526 		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
527 
528 	/* WaPushConstantDereferenceHoldDisable:cnl */
529 	wa_masked_en(wal, GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
530 
531 	/* FtrEnableFastAnisoL1BankingFix:cnl */
532 	wa_masked_en(wal, HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
533 
534 	/* WaDisable3DMidCmdPreemption:cnl */
535 	wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
536 
537 	/* WaDisableGPGPUMidCmdPreemption:cnl */
538 	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
539 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
540 			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
541 
542 	/* WaDisableEarlyEOT:cnl */
543 	wa_masked_en(wal, GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
544 }
545 
546 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
547 				     struct i915_wa_list *wal)
548 {
549 	struct drm_i915_private *i915 = engine->i915;
550 
551 	/* WaDisableBankHangMode:icl */
552 	wa_write(wal,
553 		 GEN8_L3CNTLREG,
554 		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
555 		 GEN8_ERRDETBCTRL);
556 
557 	/* Wa_1604370585:icl (pre-prod)
558 	 * Formerly known as WaPushConstantDereferenceHoldDisable
559 	 */
560 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
561 		wa_masked_en(wal, GEN7_ROW_CHICKEN2,
562 			     PUSH_CONSTANT_DEREF_DISABLE);
563 
564 	/* WaForceEnableNonCoherent:icl
565 	 * This is not the same workaround as in early Gen9 platforms, where
566 	 * lacking this could cause system hangs, but coherency performance
567 	 * overhead is high and only a few compute workloads really need it
568 	 * (the register is whitelisted in hardware now, so UMDs can opt in
569 	 * for coherency if they have a good reason).
570 	 */
571 	wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
572 
573 	/* Wa_2006611047:icl (pre-prod)
574 	 * Formerly known as WaDisableImprovedTdlClkGating
575 	 */
576 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
577 		wa_masked_en(wal, GEN7_ROW_CHICKEN2,
578 			     GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
579 
580 	/* Wa_2006665173:icl (pre-prod) */
581 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
582 		wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
583 			     GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
584 
585 	/* WaEnableFloatBlendOptimization:icl */
586 	wa_write_clr_set(wal,
587 			 GEN10_CACHE_MODE_SS,
588 			 0, /* write-only, so skip validation */
589 			 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
590 
591 	/* WaDisableGPGPUMidThreadPreemption:icl */
592 	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
593 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
594 			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
595 
596 	/* allow headerless messages for preemptible GPGPU context */
597 	wa_masked_en(wal, GEN10_SAMPLER_MODE,
598 		     GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
599 
600 	/* Wa_1604278689:icl,ehl */
601 	wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
602 	wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
603 			 0, /* write-only register; skip validation */
604 			 0xFFFFFFFF);
605 
606 	/* Wa_1406306137:icl,ehl */
607 	wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
608 }
609 
610 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
611 				       struct i915_wa_list *wal)
612 {
613 	/*
614 	 * Wa_1409142259:tgl
615 	 * Wa_1409347922:tgl
616 	 * Wa_1409252684:tgl
617 	 * Wa_1409217633:tgl
618 	 * Wa_1409207793:tgl
619 	 * Wa_1409178076:tgl
620 	 * Wa_1408979724:tgl
621 	 * Wa_14010443199:rkl
622 	 * Wa_14010698770:rkl
623 	 */
624 	wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
625 		     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
626 
627 	/* WaDisableGPGPUMidThreadPreemption:gen12 */
628 	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
629 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
630 			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
631 }
632 
633 static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
634 				     struct i915_wa_list *wal)
635 {
636 	gen12_ctx_workarounds_init(engine, wal);
637 
638 	/*
639 	 * Wa_1604555607:tgl,rkl
640 	 *
641 	 * Note that the implementation of this workaround is further modified
642 	 * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
643 	 * FF_MODE2 register will return the wrong value when read. The default
644 	 * value for this register is zero for all fields and there are no bit
645 	 * masks. So instead of doing a RMW we should just write the GS Timer
646 	 * and TDS timer values for Wa_1604555607 and Wa_16011163337.
647 	 */
648 	wa_add(wal,
649 	       FF_MODE2,
650 	       FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK,
651 	       FF_MODE2_GS_TIMER_224  | FF_MODE2_TDS_TIMER_128,
652 	       0);
653 }
654 
655 static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
656 				     struct i915_wa_list *wal)
657 {
658 	gen12_ctx_workarounds_init(engine, wal);
659 
660 	/* Wa_1409044764 */
661 	wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
662 		      DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
663 
664 	/* Wa_22010493298 */
665 	wa_masked_en(wal, HIZ_CHICKEN,
666 		     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
667 
668 	/*
669 	 * Wa_16011163337
670 	 *
671 	 * Like in tgl_ctx_workarounds_init(), read verification is ignored due
672 	 * to Wa_1608008084.
673 	 */
674 	wa_add(wal,
675 	       FF_MODE2,
676 	       FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0);
677 }
678 
679 static void
680 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
681 			   struct i915_wa_list *wal,
682 			   const char *name)
683 {
684 	struct drm_i915_private *i915 = engine->i915;
685 
686 	if (engine->class != RENDER_CLASS)
687 		return;
688 
689 	wa_init_start(wal, name, engine->name);
690 
691 	if (IS_DG1(i915))
692 		dg1_ctx_workarounds_init(engine, wal);
693 	else if (IS_ALDERLAKE_S(i915) || IS_ROCKETLAKE(i915) ||
694 		 IS_TIGERLAKE(i915))
695 		tgl_ctx_workarounds_init(engine, wal);
696 	else if (IS_GEN(i915, 12))
697 		gen12_ctx_workarounds_init(engine, wal);
698 	else if (IS_GEN(i915, 11))
699 		icl_ctx_workarounds_init(engine, wal);
700 	else if (IS_CANNONLAKE(i915))
701 		cnl_ctx_workarounds_init(engine, wal);
702 	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
703 		cfl_ctx_workarounds_init(engine, wal);
704 	else if (IS_GEMINILAKE(i915))
705 		glk_ctx_workarounds_init(engine, wal);
706 	else if (IS_KABYLAKE(i915))
707 		kbl_ctx_workarounds_init(engine, wal);
708 	else if (IS_BROXTON(i915))
709 		bxt_ctx_workarounds_init(engine, wal);
710 	else if (IS_SKYLAKE(i915))
711 		skl_ctx_workarounds_init(engine, wal);
712 	else if (IS_CHERRYVIEW(i915))
713 		chv_ctx_workarounds_init(engine, wal);
714 	else if (IS_BROADWELL(i915))
715 		bdw_ctx_workarounds_init(engine, wal);
716 	else if (IS_GEN(i915, 7))
717 		gen7_ctx_workarounds_init(engine, wal);
718 	else if (IS_GEN(i915, 6))
719 		gen6_ctx_workarounds_init(engine, wal);
720 	else if (INTEL_GEN(i915) < 8)
721 		;
722 	else
723 		MISSING_CASE(INTEL_GEN(i915));
724 
725 	wa_init_finish(wal);
726 }
727 
728 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
729 {
730 	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
731 }
732 
733 int intel_engine_emit_ctx_wa(struct i915_request *rq)
734 {
735 	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
736 	struct i915_wa *wa;
737 	unsigned int i;
738 	u32 *cs;
739 	int ret;
740 
741 	if (wal->count == 0)
742 		return 0;
743 
744 	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
745 	if (ret)
746 		return ret;
747 
748 	cs = intel_ring_begin(rq, (wal->count * 2 + 2));
749 	if (IS_ERR(cs))
750 		return PTR_ERR(cs);
751 
752 	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
753 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
754 		*cs++ = i915_mmio_reg_offset(wa->reg);
755 		*cs++ = wa->set;
756 	}
757 	*cs++ = MI_NOOP;
758 
759 	intel_ring_advance(rq, cs);
760 
761 	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
762 	if (ret)
763 		return ret;
764 
765 	return 0;
766 }
767 
768 static void
769 gen4_gt_workarounds_init(struct drm_i915_private *i915,
770 			 struct i915_wa_list *wal)
771 {
772 	/* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
773 	wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
774 }
775 
776 static void
777 g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
778 {
779 	gen4_gt_workarounds_init(i915, wal);
780 
781 	/* WaDisableRenderCachePipelinedFlush:g4x,ilk */
782 	wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
783 }
784 
785 static void
786 ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
787 {
788 	g4x_gt_workarounds_init(i915, wal);
789 
790 	wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
791 }
792 
793 static void
794 snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
795 {
796 }
797 
798 static void
799 ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
800 {
801 	/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
802 	wa_masked_dis(wal,
803 		      GEN7_COMMON_SLICE_CHICKEN1,
804 		      GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
805 
806 	/* WaApplyL3ControlAndL3ChickenMode:ivb */
807 	wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
808 	wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
809 
810 	/* WaForceL3Serialization:ivb */
811 	wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
812 }
813 
814 static void
815 vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
816 {
817 	/* WaForceL3Serialization:vlv */
818 	wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
819 
820 	/*
821 	 * WaIncreaseL3CreditsForVLVB0:vlv
822 	 * This is the hardware default actually.
823 	 */
824 	wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
825 }
826 
827 static void
828 hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
829 {
830 	/* L3 caching of data atomics doesn't work -- disable it. */
831 	wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
832 
833 	wa_add(wal,
834 	       HSW_ROW_CHICKEN3, 0,
835 	       _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
836 		0 /* XXX does this reg exist? */);
837 
838 	/* WaVSRefCountFullforceMissDisable:hsw */
839 	wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
840 }
841 
842 static void
843 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
844 {
845 	/* WaDisableKillLogic:bxt,skl,kbl */
846 	if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
847 		wa_write_or(wal,
848 			    GAM_ECOCHK,
849 			    ECOCHK_DIS_TLB);
850 
851 	if (HAS_LLC(i915)) {
852 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
853 		 *
854 		 * Must match Display Engine. See
855 		 * WaCompressedResourceDisplayNewHashMode.
856 		 */
857 		wa_write_or(wal,
858 			    MMCD_MISC_CTRL,
859 			    MMCD_PCLA | MMCD_HOTSPOT_EN);
860 	}
861 
862 	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
863 	wa_write_or(wal,
864 		    GAM_ECOCHK,
865 		    BDW_DISABLE_HDC_INVALIDATION);
866 }
867 
868 static void
869 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
870 {
871 	gen9_gt_workarounds_init(i915, wal);
872 
873 	/* WaDisableGafsUnitClkGating:skl */
874 	wa_write_or(wal,
875 		    GEN7_UCGCTL4,
876 		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
877 
878 	/* WaInPlaceDecompressionHang:skl */
879 	if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
880 		wa_write_or(wal,
881 			    GEN9_GAMT_ECO_REG_RW_IA,
882 			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
883 }
884 
885 static void
886 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
887 {
888 	gen9_gt_workarounds_init(i915, wal);
889 
890 	/* WaInPlaceDecompressionHang:bxt */
891 	wa_write_or(wal,
892 		    GEN9_GAMT_ECO_REG_RW_IA,
893 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
894 }
895 
896 static void
897 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
898 {
899 	gen9_gt_workarounds_init(i915, wal);
900 
901 	/* WaDisableDynamicCreditSharing:kbl */
902 	if (IS_KBL_GT_STEP(i915, 0, STEP_B0))
903 		wa_write_or(wal,
904 			    GAMT_CHKN_BIT_REG,
905 			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
906 
907 	/* WaDisableGafsUnitClkGating:kbl */
908 	wa_write_or(wal,
909 		    GEN7_UCGCTL4,
910 		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
911 
912 	/* WaInPlaceDecompressionHang:kbl */
913 	wa_write_or(wal,
914 		    GEN9_GAMT_ECO_REG_RW_IA,
915 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
916 }
917 
918 static void
919 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
920 {
921 	gen9_gt_workarounds_init(i915, wal);
922 }
923 
924 static void
925 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
926 {
927 	gen9_gt_workarounds_init(i915, wal);
928 
929 	/* WaDisableGafsUnitClkGating:cfl */
930 	wa_write_or(wal,
931 		    GEN7_UCGCTL4,
932 		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
933 
934 	/* WaInPlaceDecompressionHang:cfl */
935 	wa_write_or(wal,
936 		    GEN9_GAMT_ECO_REG_RW_IA,
937 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
938 }
939 
940 static void
941 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
942 {
943 	const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
944 	unsigned int slice, subslice;
945 	u32 l3_en, mcr, mcr_mask;
946 
947 	GEM_BUG_ON(INTEL_GEN(i915) < 10);
948 
949 	/*
950 	 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
951 	 * L3Banks could be fused off in single slice scenario. If that is
952 	 * the case, we might need to program MCR select to a valid L3Bank
953 	 * by default, to make sure we correctly read certain registers
954 	 * later on (in the range 0xB100 - 0xB3FF).
955 	 *
956 	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
957 	 * Before any MMIO read into slice/subslice specific registers, MCR
958 	 * packet control register needs to be programmed to point to any
959 	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
960 	 * This means each subsequent MMIO read will be forwarded to an
961 	 * specific s/ss combination, but this is OK since these registers
962 	 * are consistent across s/ss in almost all cases. In the rare
963 	 * occasions, such as INSTDONE, where this value is dependent
964 	 * on s/ss combo, the read should be done with read_subslice_reg.
965 	 *
966 	 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
967 	 * to which subslice, or to which L3 bank, the respective mmio reads
968 	 * will go, we have to find a common index which works for both
969 	 * accesses.
970 	 *
971 	 * Case where we cannot find a common index fortunately should not
972 	 * happen in production hardware, so we only emit a warning instead of
973 	 * implementing something more complex that requires checking the range
974 	 * of every MMIO read.
975 	 */
976 
977 	if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
978 		u32 l3_fuse =
979 			intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
980 			GEN10_L3BANK_MASK;
981 
982 		drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse);
983 		l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
984 	} else {
985 		l3_en = ~0;
986 	}
987 
988 	slice = fls(sseu->slice_mask) - 1;
989 	subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
990 	if (!subslice) {
991 		drm_warn(&i915->drm,
992 			 "No common index found between subslice mask %x and L3 bank mask %x!\n",
993 			 intel_sseu_get_subslices(sseu, slice), l3_en);
994 		subslice = fls(l3_en);
995 		drm_WARN_ON(&i915->drm, !subslice);
996 	}
997 	subslice--;
998 
999 	if (INTEL_GEN(i915) >= 11) {
1000 		mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
1001 		mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
1002 	} else {
1003 		mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
1004 		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
1005 	}
1006 
1007 	drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
1008 
1009 	wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
1010 }
1011 
1012 static void
1013 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1014 {
1015 	wa_init_mcr(i915, wal);
1016 
1017 	/* WaInPlaceDecompressionHang:cnl */
1018 	wa_write_or(wal,
1019 		    GEN9_GAMT_ECO_REG_RW_IA,
1020 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1021 }
1022 
1023 static void
1024 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1025 {
1026 	wa_init_mcr(i915, wal);
1027 
1028 	/* WaInPlaceDecompressionHang:icl */
1029 	wa_write_or(wal,
1030 		    GEN9_GAMT_ECO_REG_RW_IA,
1031 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1032 
1033 	/* WaModifyGamTlbPartitioning:icl */
1034 	wa_write_clr_set(wal,
1035 			 GEN11_GACB_PERF_CTRL,
1036 			 GEN11_HASH_CTRL_MASK,
1037 			 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
1038 
1039 	/* Wa_1405766107:icl
1040 	 * Formerly known as WaCL2SFHalfMaxAlloc
1041 	 */
1042 	wa_write_or(wal,
1043 		    GEN11_LSN_UNSLCVC,
1044 		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
1045 		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
1046 
1047 	/* Wa_220166154:icl
1048 	 * Formerly known as WaDisCtxReload
1049 	 */
1050 	wa_write_or(wal,
1051 		    GEN8_GAMW_ECO_DEV_RW_IA,
1052 		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
1053 
1054 	/* Wa_1405779004:icl (pre-prod) */
1055 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
1056 		wa_write_or(wal,
1057 			    SLICE_UNIT_LEVEL_CLKGATE,
1058 			    MSCUNIT_CLKGATE_DIS);
1059 
1060 	/* Wa_1406838659:icl (pre-prod) */
1061 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1062 		wa_write_or(wal,
1063 			    INF_UNIT_LEVEL_CLKGATE,
1064 			    CGPSF_CLKGATE_DIS);
1065 
1066 	/* Wa_1406463099:icl
1067 	 * Formerly known as WaGamTlbPendError
1068 	 */
1069 	wa_write_or(wal,
1070 		    GAMT_CHKN_BIT_REG,
1071 		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
1072 
1073 	/* Wa_1607087056:icl,ehl,jsl */
1074 	if (IS_ICELAKE(i915) ||
1075 	    IS_JSL_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0))
1076 		wa_write_or(wal,
1077 			    SLICE_UNIT_LEVEL_CLKGATE,
1078 			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1079 }
1080 
1081 static void
1082 gen12_gt_workarounds_init(struct drm_i915_private *i915,
1083 			  struct i915_wa_list *wal)
1084 {
1085 	wa_init_mcr(i915, wal);
1086 }
1087 
1088 static void
1089 tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1090 {
1091 	gen12_gt_workarounds_init(i915, wal);
1092 
1093 	/* Wa_1409420604:tgl */
1094 	if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0))
1095 		wa_write_or(wal,
1096 			    SUBSLICE_UNIT_LEVEL_CLKGATE2,
1097 			    CPSSUNIT_CLKGATE_DIS);
1098 
1099 	/* Wa_1607087056:tgl also know as BUG:1409180338 */
1100 	if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0))
1101 		wa_write_or(wal,
1102 			    SLICE_UNIT_LEVEL_CLKGATE,
1103 			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1104 
1105 	/* Wa_1408615072:tgl[a0] */
1106 	if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0))
1107 		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1108 			    VSUNIT_CLKGATE_DIS_TGL);
1109 }
1110 
1111 static void
1112 dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1113 {
1114 	gen12_gt_workarounds_init(i915, wal);
1115 
1116 	/* Wa_1607087056:dg1 */
1117 	if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0))
1118 		wa_write_or(wal,
1119 			    SLICE_UNIT_LEVEL_CLKGATE,
1120 			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1121 
1122 	/* Wa_1409420604:dg1 */
1123 	if (IS_DG1(i915))
1124 		wa_write_or(wal,
1125 			    SUBSLICE_UNIT_LEVEL_CLKGATE2,
1126 			    CPSSUNIT_CLKGATE_DIS);
1127 
1128 	/* Wa_1408615072:dg1 */
1129 	/* Empirical testing shows this register is unaffected by engine reset. */
1130 	if (IS_DG1(i915))
1131 		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1132 			    VSUNIT_CLKGATE_DIS_TGL);
1133 }
1134 
1135 static void
1136 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
1137 {
1138 	if (IS_DG1(i915))
1139 		dg1_gt_workarounds_init(i915, wal);
1140 	else if (IS_TIGERLAKE(i915))
1141 		tgl_gt_workarounds_init(i915, wal);
1142 	else if (IS_GEN(i915, 12))
1143 		gen12_gt_workarounds_init(i915, wal);
1144 	else if (IS_GEN(i915, 11))
1145 		icl_gt_workarounds_init(i915, wal);
1146 	else if (IS_CANNONLAKE(i915))
1147 		cnl_gt_workarounds_init(i915, wal);
1148 	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1149 		cfl_gt_workarounds_init(i915, wal);
1150 	else if (IS_GEMINILAKE(i915))
1151 		glk_gt_workarounds_init(i915, wal);
1152 	else if (IS_KABYLAKE(i915))
1153 		kbl_gt_workarounds_init(i915, wal);
1154 	else if (IS_BROXTON(i915))
1155 		bxt_gt_workarounds_init(i915, wal);
1156 	else if (IS_SKYLAKE(i915))
1157 		skl_gt_workarounds_init(i915, wal);
1158 	else if (IS_HASWELL(i915))
1159 		hsw_gt_workarounds_init(i915, wal);
1160 	else if (IS_VALLEYVIEW(i915))
1161 		vlv_gt_workarounds_init(i915, wal);
1162 	else if (IS_IVYBRIDGE(i915))
1163 		ivb_gt_workarounds_init(i915, wal);
1164 	else if (IS_GEN(i915, 6))
1165 		snb_gt_workarounds_init(i915, wal);
1166 	else if (IS_GEN(i915, 5))
1167 		ilk_gt_workarounds_init(i915, wal);
1168 	else if (IS_G4X(i915))
1169 		g4x_gt_workarounds_init(i915, wal);
1170 	else if (IS_GEN(i915, 4))
1171 		gen4_gt_workarounds_init(i915, wal);
1172 	else if (INTEL_GEN(i915) <= 8)
1173 		;
1174 	else
1175 		MISSING_CASE(INTEL_GEN(i915));
1176 }
1177 
1178 void intel_gt_init_workarounds(struct drm_i915_private *i915)
1179 {
1180 	struct i915_wa_list *wal = &i915->gt_wa_list;
1181 
1182 	wa_init_start(wal, "GT", "global");
1183 	gt_init_workarounds(i915, wal);
1184 	wa_init_finish(wal);
1185 }
1186 
1187 static enum forcewake_domains
1188 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1189 {
1190 	enum forcewake_domains fw = 0;
1191 	struct i915_wa *wa;
1192 	unsigned int i;
1193 
1194 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1195 		fw |= intel_uncore_forcewake_for_reg(uncore,
1196 						     wa->reg,
1197 						     FW_REG_READ |
1198 						     FW_REG_WRITE);
1199 
1200 	return fw;
1201 }
1202 
1203 static bool
1204 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
1205 {
1206 	if ((cur ^ wa->set) & wa->read) {
1207 		DRM_ERROR("%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1208 			  name, from, i915_mmio_reg_offset(wa->reg),
1209 			  cur, cur & wa->read, wa->set & wa->read);
1210 
1211 		return false;
1212 	}
1213 
1214 	return true;
1215 }
1216 
1217 static void
1218 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1219 {
1220 	enum forcewake_domains fw;
1221 	unsigned long flags;
1222 	struct i915_wa *wa;
1223 	unsigned int i;
1224 
1225 	if (!wal->count)
1226 		return;
1227 
1228 	fw = wal_get_fw_for_rmw(uncore, wal);
1229 
1230 	spin_lock_irqsave(&uncore->lock, flags);
1231 	intel_uncore_forcewake_get__locked(uncore, fw);
1232 
1233 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1234 		if (wa->clr)
1235 			intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set);
1236 		else
1237 			intel_uncore_write_fw(uncore, wa->reg, wa->set);
1238 		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1239 			wa_verify(wa,
1240 				  intel_uncore_read_fw(uncore, wa->reg),
1241 				  wal->name, "application");
1242 	}
1243 
1244 	intel_uncore_forcewake_put__locked(uncore, fw);
1245 	spin_unlock_irqrestore(&uncore->lock, flags);
1246 }
1247 
1248 void intel_gt_apply_workarounds(struct intel_gt *gt)
1249 {
1250 	wa_list_apply(gt->uncore, &gt->i915->gt_wa_list);
1251 }
1252 
1253 static bool wa_list_verify(struct intel_uncore *uncore,
1254 			   const struct i915_wa_list *wal,
1255 			   const char *from)
1256 {
1257 	struct i915_wa *wa;
1258 	unsigned int i;
1259 	bool ok = true;
1260 
1261 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1262 		ok &= wa_verify(wa,
1263 				intel_uncore_read(uncore, wa->reg),
1264 				wal->name, from);
1265 
1266 	return ok;
1267 }
1268 
1269 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1270 {
1271 	return wa_list_verify(gt->uncore, &gt->i915->gt_wa_list, from);
1272 }
1273 
1274 __maybe_unused
1275 static bool is_nonpriv_flags_valid(u32 flags)
1276 {
1277 	/* Check only valid flag bits are set */
1278 	if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1279 		return false;
1280 
1281 	/* NB: Only 3 out of 4 enum values are valid for access field */
1282 	if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1283 	    RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1284 		return false;
1285 
1286 	return true;
1287 }
1288 
1289 static void
1290 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1291 {
1292 	struct i915_wa wa = {
1293 		.reg = reg
1294 	};
1295 
1296 	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1297 		return;
1298 
1299 	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1300 		return;
1301 
1302 	wa.reg.reg |= flags;
1303 	_wa_add(wal, &wa);
1304 }
1305 
1306 static void
1307 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1308 {
1309 	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1310 }
1311 
1312 static void gen9_whitelist_build(struct i915_wa_list *w)
1313 {
1314 	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1315 	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1316 
1317 	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1318 	whitelist_reg(w, GEN8_CS_CHICKEN1);
1319 
1320 	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1321 	whitelist_reg(w, GEN8_HDC_CHICKEN1);
1322 
1323 	/* WaSendPushConstantsFromMMIO:skl,bxt */
1324 	whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1325 }
1326 
1327 static void skl_whitelist_build(struct intel_engine_cs *engine)
1328 {
1329 	struct i915_wa_list *w = &engine->whitelist;
1330 
1331 	if (engine->class != RENDER_CLASS)
1332 		return;
1333 
1334 	gen9_whitelist_build(w);
1335 
1336 	/* WaDisableLSQCROPERFforOCL:skl */
1337 	whitelist_reg(w, GEN8_L3SQCREG4);
1338 }
1339 
1340 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1341 {
1342 	if (engine->class != RENDER_CLASS)
1343 		return;
1344 
1345 	gen9_whitelist_build(&engine->whitelist);
1346 }
1347 
1348 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1349 {
1350 	struct i915_wa_list *w = &engine->whitelist;
1351 
1352 	if (engine->class != RENDER_CLASS)
1353 		return;
1354 
1355 	gen9_whitelist_build(w);
1356 
1357 	/* WaDisableLSQCROPERFforOCL:kbl */
1358 	whitelist_reg(w, GEN8_L3SQCREG4);
1359 }
1360 
1361 static void glk_whitelist_build(struct intel_engine_cs *engine)
1362 {
1363 	struct i915_wa_list *w = &engine->whitelist;
1364 
1365 	if (engine->class != RENDER_CLASS)
1366 		return;
1367 
1368 	gen9_whitelist_build(w);
1369 
1370 	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1371 	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1372 }
1373 
1374 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1375 {
1376 	struct i915_wa_list *w = &engine->whitelist;
1377 
1378 	if (engine->class != RENDER_CLASS)
1379 		return;
1380 
1381 	gen9_whitelist_build(w);
1382 
1383 	/*
1384 	 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1385 	 *
1386 	 * This covers 4 register which are next to one another :
1387 	 *   - PS_INVOCATION_COUNT
1388 	 *   - PS_INVOCATION_COUNT_UDW
1389 	 *   - PS_DEPTH_COUNT
1390 	 *   - PS_DEPTH_COUNT_UDW
1391 	 */
1392 	whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1393 			  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1394 			  RING_FORCE_TO_NONPRIV_RANGE_4);
1395 }
1396 
1397 static void cml_whitelist_build(struct intel_engine_cs *engine)
1398 {
1399 	struct i915_wa_list *w = &engine->whitelist;
1400 
1401 	if (engine->class != RENDER_CLASS)
1402 		whitelist_reg_ext(w,
1403 				  RING_CTX_TIMESTAMP(engine->mmio_base),
1404 				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1405 
1406 	cfl_whitelist_build(engine);
1407 }
1408 
1409 static void cnl_whitelist_build(struct intel_engine_cs *engine)
1410 {
1411 	struct i915_wa_list *w = &engine->whitelist;
1412 
1413 	if (engine->class != RENDER_CLASS)
1414 		return;
1415 
1416 	/* WaEnablePreemptionGranularityControlByUMD:cnl */
1417 	whitelist_reg(w, GEN8_CS_CHICKEN1);
1418 }
1419 
1420 static void icl_whitelist_build(struct intel_engine_cs *engine)
1421 {
1422 	struct i915_wa_list *w = &engine->whitelist;
1423 
1424 	switch (engine->class) {
1425 	case RENDER_CLASS:
1426 		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
1427 		whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1428 
1429 		/* WaAllowUMDToModifySamplerMode:icl */
1430 		whitelist_reg(w, GEN10_SAMPLER_MODE);
1431 
1432 		/* WaEnableStateCacheRedirectToCS:icl */
1433 		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1434 
1435 		/*
1436 		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1437 		 *
1438 		 * This covers 4 register which are next to one another :
1439 		 *   - PS_INVOCATION_COUNT
1440 		 *   - PS_INVOCATION_COUNT_UDW
1441 		 *   - PS_DEPTH_COUNT
1442 		 *   - PS_DEPTH_COUNT_UDW
1443 		 */
1444 		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1445 				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1446 				  RING_FORCE_TO_NONPRIV_RANGE_4);
1447 		break;
1448 
1449 	case VIDEO_DECODE_CLASS:
1450 		/* hucStatusRegOffset */
1451 		whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1452 				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1453 		/* hucUKernelHdrInfoRegOffset */
1454 		whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1455 				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1456 		/* hucStatus2RegOffset */
1457 		whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1458 				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1459 		whitelist_reg_ext(w,
1460 				  RING_CTX_TIMESTAMP(engine->mmio_base),
1461 				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1462 		break;
1463 
1464 	default:
1465 		whitelist_reg_ext(w,
1466 				  RING_CTX_TIMESTAMP(engine->mmio_base),
1467 				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1468 		break;
1469 	}
1470 }
1471 
1472 static void tgl_whitelist_build(struct intel_engine_cs *engine)
1473 {
1474 	struct i915_wa_list *w = &engine->whitelist;
1475 
1476 	switch (engine->class) {
1477 	case RENDER_CLASS:
1478 		/*
1479 		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
1480 		 * Wa_1408556865:tgl
1481 		 *
1482 		 * This covers 4 registers which are next to one another :
1483 		 *   - PS_INVOCATION_COUNT
1484 		 *   - PS_INVOCATION_COUNT_UDW
1485 		 *   - PS_DEPTH_COUNT
1486 		 *   - PS_DEPTH_COUNT_UDW
1487 		 */
1488 		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1489 				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1490 				  RING_FORCE_TO_NONPRIV_RANGE_4);
1491 
1492 		/* Wa_1808121037:tgl */
1493 		whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
1494 
1495 		/* Wa_1806527549:tgl */
1496 		whitelist_reg(w, HIZ_CHICKEN);
1497 		break;
1498 	default:
1499 		whitelist_reg_ext(w,
1500 				  RING_CTX_TIMESTAMP(engine->mmio_base),
1501 				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1502 		break;
1503 	}
1504 }
1505 
1506 static void dg1_whitelist_build(struct intel_engine_cs *engine)
1507 {
1508 	struct i915_wa_list *w = &engine->whitelist;
1509 
1510 	tgl_whitelist_build(engine);
1511 
1512 	/* GEN:BUG:1409280441:dg1 */
1513 	if (IS_DG1_REVID(engine->i915, DG1_REVID_A0, DG1_REVID_A0) &&
1514 	    (engine->class == RENDER_CLASS ||
1515 	     engine->class == COPY_ENGINE_CLASS))
1516 		whitelist_reg_ext(w, RING_ID(engine->mmio_base),
1517 				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1518 }
1519 
1520 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1521 {
1522 	struct drm_i915_private *i915 = engine->i915;
1523 	struct i915_wa_list *w = &engine->whitelist;
1524 
1525 	wa_init_start(w, "whitelist", engine->name);
1526 
1527 	if (IS_DG1(i915))
1528 		dg1_whitelist_build(engine);
1529 	else if (IS_GEN(i915, 12))
1530 		tgl_whitelist_build(engine);
1531 	else if (IS_GEN(i915, 11))
1532 		icl_whitelist_build(engine);
1533 	else if (IS_CANNONLAKE(i915))
1534 		cnl_whitelist_build(engine);
1535 	else if (IS_COMETLAKE(i915))
1536 		cml_whitelist_build(engine);
1537 	else if (IS_COFFEELAKE(i915))
1538 		cfl_whitelist_build(engine);
1539 	else if (IS_GEMINILAKE(i915))
1540 		glk_whitelist_build(engine);
1541 	else if (IS_KABYLAKE(i915))
1542 		kbl_whitelist_build(engine);
1543 	else if (IS_BROXTON(i915))
1544 		bxt_whitelist_build(engine);
1545 	else if (IS_SKYLAKE(i915))
1546 		skl_whitelist_build(engine);
1547 	else if (INTEL_GEN(i915) <= 8)
1548 		;
1549 	else
1550 		MISSING_CASE(INTEL_GEN(i915));
1551 
1552 	wa_init_finish(w);
1553 }
1554 
1555 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1556 {
1557 	const struct i915_wa_list *wal = &engine->whitelist;
1558 	struct intel_uncore *uncore = engine->uncore;
1559 	const u32 base = engine->mmio_base;
1560 	struct i915_wa *wa;
1561 	unsigned int i;
1562 
1563 	if (!wal->count)
1564 		return;
1565 
1566 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1567 		intel_uncore_write(uncore,
1568 				   RING_FORCE_TO_NONPRIV(base, i),
1569 				   i915_mmio_reg_offset(wa->reg));
1570 
1571 	/* And clear the rest just in case of garbage */
1572 	for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1573 		intel_uncore_write(uncore,
1574 				   RING_FORCE_TO_NONPRIV(base, i),
1575 				   i915_mmio_reg_offset(RING_NOPID(base)));
1576 }
1577 
1578 static void
1579 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1580 {
1581 	struct drm_i915_private *i915 = engine->i915;
1582 
1583 	if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1584 	    IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) {
1585 		/*
1586 		 * Wa_1607138336:tgl[a0],dg1[a0]
1587 		 * Wa_1607063988:tgl[a0],dg1[a0]
1588 		 */
1589 		wa_write_or(wal,
1590 			    GEN9_CTX_PREEMPT_REG,
1591 			    GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
1592 	}
1593 
1594 	if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) {
1595 		/*
1596 		 * Wa_1606679103:tgl
1597 		 * (see also Wa_1606682166:icl)
1598 		 */
1599 		wa_write_or(wal,
1600 			    GEN7_SARCHKMD,
1601 			    GEN7_DISABLE_SAMPLER_PREFETCH);
1602 	}
1603 
1604 	if (IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
1605 	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1606 		/* Wa_1606931601:tgl,rkl,dg1,adl-s */
1607 		wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
1608 
1609 		/*
1610 		 * Wa_1407928979:tgl A*
1611 		 * Wa_18011464164:tgl[B0+],dg1[B0+]
1612 		 * Wa_22010931296:tgl[B0+],dg1[B0+]
1613 		 * Wa_14010919138:rkl,dg1,adl-s
1614 		 */
1615 		wa_write_or(wal, GEN7_FF_THREAD_MODE,
1616 			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1617 
1618 		/*
1619 		 * Wa_1606700617:tgl,dg1
1620 		 * Wa_22010271021:tgl,rkl,dg1, adl-s
1621 		 */
1622 		wa_masked_en(wal,
1623 			     GEN9_CS_DEBUG_MODE1,
1624 			     FF_DOP_CLOCK_GATE_DISABLE);
1625 	}
1626 
1627 	if (IS_ALDERLAKE_S(i915) || IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1628 	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1629 		/* Wa_1409804808:tgl,rkl,dg1[a0],adl-s */
1630 		wa_masked_en(wal, GEN7_ROW_CHICKEN2,
1631 			     GEN12_PUSH_CONST_DEREF_HOLD_DIS);
1632 
1633 		/*
1634 		 * Wa_1409085225:tgl
1635 		 * Wa_14010229206:tgl,rkl,dg1[a0],adl-s
1636 		 */
1637 		wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
1638 	}
1639 
1640 
1641 	if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1642 	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1643 		/*
1644 		 * Wa_1607030317:tgl
1645 		 * Wa_1607186500:tgl
1646 		 * Wa_1607297627:tgl,rkl,dg1[a0]
1647 		 *
1648 		 * On TGL and RKL there are multiple entries for this WA in the
1649 		 * BSpec; some indicate this is an A0-only WA, others indicate
1650 		 * it applies to all steppings so we trust the "all steppings."
1651 		 * For DG1 this only applies to A0.
1652 		 */
1653 		wa_masked_en(wal,
1654 			     GEN6_RC_SLEEP_PSMI_CONTROL,
1655 			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
1656 			     GEN8_RC_SEMA_IDLE_MSG_DISABLE);
1657 	}
1658 
1659 	if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1660 		/* Wa_1406941453:tgl,rkl,dg1 */
1661 		wa_masked_en(wal,
1662 			     GEN10_SAMPLER_MODE,
1663 			     ENABLE_SMALLPL);
1664 	}
1665 
1666 	if (IS_GEN(i915, 11)) {
1667 		/* This is not an Wa. Enable for better image quality */
1668 		wa_masked_en(wal,
1669 			     _3D_CHICKEN3,
1670 			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1671 
1672 		/* WaPipelineFlushCoherentLines:icl */
1673 		wa_write_or(wal,
1674 			    GEN8_L3SQCREG4,
1675 			    GEN8_LQSC_FLUSH_COHERENT_LINES);
1676 
1677 		/*
1678 		 * Wa_1405543622:icl
1679 		 * Formerly known as WaGAPZPriorityScheme
1680 		 */
1681 		wa_write_or(wal,
1682 			    GEN8_GARBCNTL,
1683 			    GEN11_ARBITRATION_PRIO_ORDER_MASK);
1684 
1685 		/*
1686 		 * Wa_1604223664:icl
1687 		 * Formerly known as WaL3BankAddressHashing
1688 		 */
1689 		wa_write_clr_set(wal,
1690 				 GEN8_GARBCNTL,
1691 				 GEN11_HASH_CTRL_EXCL_MASK,
1692 				 GEN11_HASH_CTRL_EXCL_BIT0);
1693 		wa_write_clr_set(wal,
1694 				 GEN11_GLBLINVL,
1695 				 GEN11_BANK_HASH_ADDR_EXCL_MASK,
1696 				 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1697 
1698 		/*
1699 		 * Wa_1405733216:icl
1700 		 * Formerly known as WaDisableCleanEvicts
1701 		 */
1702 		wa_write_or(wal,
1703 			    GEN8_L3SQCREG4,
1704 			    GEN11_LQSC_CLEAN_EVICT_DISABLE);
1705 
1706 		/* WaForwardProgressSoftReset:icl */
1707 		wa_write_or(wal,
1708 			    GEN10_SCRATCH_LNCF2,
1709 			    PMFLUSHDONE_LNICRSDROP |
1710 			    PMFLUSH_GAPL3UNBLOCK |
1711 			    PMFLUSHDONE_LNEBLK);
1712 
1713 		/* Wa_1406609255:icl (pre-prod) */
1714 		if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1715 			wa_write_or(wal,
1716 				    GEN7_SARCHKMD,
1717 				    GEN7_DISABLE_DEMAND_PREFETCH);
1718 
1719 		/* Wa_1606682166:icl */
1720 		wa_write_or(wal,
1721 			    GEN7_SARCHKMD,
1722 			    GEN7_DISABLE_SAMPLER_PREFETCH);
1723 
1724 		/* Wa_1409178092:icl */
1725 		wa_write_clr_set(wal,
1726 				 GEN11_SCRATCH2,
1727 				 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
1728 				 0);
1729 
1730 		/* WaEnable32PlaneMode:icl */
1731 		wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
1732 			     GEN11_ENABLE_32_PLANE_MODE);
1733 
1734 		/*
1735 		 * Wa_1408615072:icl,ehl  (vsunit)
1736 		 * Wa_1407596294:icl,ehl  (hsunit)
1737 		 */
1738 		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1739 			    VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
1740 
1741 		/* Wa_1407352427:icl,ehl */
1742 		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1743 			    PSDUNIT_CLKGATE_DIS);
1744 
1745 		/* Wa_1406680159:icl,ehl */
1746 		wa_write_or(wal,
1747 			    SUBSLICE_UNIT_LEVEL_CLKGATE,
1748 			    GWUNIT_CLKGATE_DIS);
1749 
1750 		/*
1751 		 * Wa_1408767742:icl[a2..forever],ehl[all]
1752 		 * Wa_1605460711:icl[a0..c0]
1753 		 */
1754 		wa_write_or(wal,
1755 			    GEN7_FF_THREAD_MODE,
1756 			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1757 
1758 		/* Wa_22010271021:ehl */
1759 		if (IS_JSL_EHL(i915))
1760 			wa_masked_en(wal,
1761 				     GEN9_CS_DEBUG_MODE1,
1762 				     FF_DOP_CLOCK_GATE_DISABLE);
1763 	}
1764 
1765 	if (IS_GEN_RANGE(i915, 9, 12)) {
1766 		/* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
1767 		wa_masked_en(wal,
1768 			     GEN7_FF_SLICE_CS_CHICKEN1,
1769 			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1770 	}
1771 
1772 	if (IS_SKYLAKE(i915) ||
1773 	    IS_KABYLAKE(i915) ||
1774 	    IS_COFFEELAKE(i915) ||
1775 	    IS_COMETLAKE(i915)) {
1776 		/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1777 		wa_write_or(wal,
1778 			    GEN8_GARBCNTL,
1779 			    GEN9_GAPS_TSV_CREDIT_DISABLE);
1780 	}
1781 
1782 	if (IS_BROXTON(i915)) {
1783 		/* WaDisablePooledEuLoadBalancingFix:bxt */
1784 		wa_masked_en(wal,
1785 			     FF_SLICE_CS_CHICKEN2,
1786 			     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1787 	}
1788 
1789 	if (IS_GEN(i915, 9)) {
1790 		/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1791 		wa_masked_en(wal,
1792 			     GEN9_CSFE_CHICKEN1_RCS,
1793 			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1794 
1795 		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1796 		wa_write_or(wal,
1797 			    BDW_SCRATCH1,
1798 			    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1799 
1800 		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1801 		if (IS_GEN9_LP(i915))
1802 			wa_write_clr_set(wal,
1803 					 GEN8_L3SQCREG1,
1804 					 L3_PRIO_CREDITS_MASK,
1805 					 L3_GENERAL_PRIO_CREDITS(62) |
1806 					 L3_HIGH_PRIO_CREDITS(2));
1807 
1808 		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1809 		wa_write_or(wal,
1810 			    GEN8_L3SQCREG4,
1811 			    GEN8_LQSC_FLUSH_COHERENT_LINES);
1812 
1813 		/* Disable atomics in L3 to prevent unrecoverable hangs */
1814 		wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
1815 				 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
1816 		wa_write_clr_set(wal, GEN8_L3SQCREG4,
1817 				 GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
1818 		wa_write_clr_set(wal, GEN9_SCRATCH1,
1819 				 EVICTION_PERF_FIX_ENABLE, 0);
1820 	}
1821 
1822 	if (IS_HASWELL(i915)) {
1823 		/* WaSampleCChickenBitEnable:hsw */
1824 		wa_masked_en(wal,
1825 			     HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
1826 
1827 		wa_masked_dis(wal,
1828 			      CACHE_MODE_0_GEN7,
1829 			      /* enable HiZ Raw Stall Optimization */
1830 			      HIZ_RAW_STALL_OPT_DISABLE);
1831 
1832 		/* WaDisable4x2SubspanOptimization:hsw */
1833 		wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
1834 	}
1835 
1836 	if (IS_VALLEYVIEW(i915)) {
1837 		/* WaDisableEarlyCull:vlv */
1838 		wa_masked_en(wal,
1839 			     _3D_CHICKEN3,
1840 			     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
1841 
1842 		/*
1843 		 * WaVSThreadDispatchOverride:ivb,vlv
1844 		 *
1845 		 * This actually overrides the dispatch
1846 		 * mode for all thread types.
1847 		 */
1848 		wa_write_clr_set(wal,
1849 				 GEN7_FF_THREAD_MODE,
1850 				 GEN7_FF_SCHED_MASK,
1851 				 GEN7_FF_TS_SCHED_HW |
1852 				 GEN7_FF_VS_SCHED_HW |
1853 				 GEN7_FF_DS_SCHED_HW);
1854 
1855 		/* WaPsdDispatchEnable:vlv */
1856 		/* WaDisablePSDDualDispatchEnable:vlv */
1857 		wa_masked_en(wal,
1858 			     GEN7_HALF_SLICE_CHICKEN1,
1859 			     GEN7_MAX_PS_THREAD_DEP |
1860 			     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
1861 	}
1862 
1863 	if (IS_IVYBRIDGE(i915)) {
1864 		/* WaDisableEarlyCull:ivb */
1865 		wa_masked_en(wal,
1866 			     _3D_CHICKEN3,
1867 			     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
1868 
1869 		if (0) { /* causes HiZ corruption on ivb:gt1 */
1870 			/* enable HiZ Raw Stall Optimization */
1871 			wa_masked_dis(wal,
1872 				      CACHE_MODE_0_GEN7,
1873 				      HIZ_RAW_STALL_OPT_DISABLE);
1874 		}
1875 
1876 		/*
1877 		 * WaVSThreadDispatchOverride:ivb,vlv
1878 		 *
1879 		 * This actually overrides the dispatch
1880 		 * mode for all thread types.
1881 		 */
1882 		wa_write_clr_set(wal,
1883 				 GEN7_FF_THREAD_MODE,
1884 				 GEN7_FF_SCHED_MASK,
1885 				 GEN7_FF_TS_SCHED_HW |
1886 				 GEN7_FF_VS_SCHED_HW |
1887 				 GEN7_FF_DS_SCHED_HW);
1888 
1889 		/* WaDisablePSDDualDispatchEnable:ivb */
1890 		if (IS_IVB_GT1(i915))
1891 			wa_masked_en(wal,
1892 				     GEN7_HALF_SLICE_CHICKEN1,
1893 				     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
1894 	}
1895 
1896 	if (IS_GEN(i915, 7)) {
1897 		/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1898 		wa_masked_en(wal,
1899 			     GFX_MODE_GEN7,
1900 			     GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
1901 
1902 		/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
1903 		wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
1904 
1905 		/*
1906 		 * BSpec says this must be set, even though
1907 		 * WaDisable4x2SubspanOptimization:ivb,hsw
1908 		 * WaDisable4x2SubspanOptimization isn't listed for VLV.
1909 		 */
1910 		wa_masked_en(wal,
1911 			     CACHE_MODE_1,
1912 			     PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
1913 
1914 		/*
1915 		 * BSpec recommends 8x4 when MSAA is used,
1916 		 * however in practice 16x4 seems fastest.
1917 		 *
1918 		 * Note that PS/WM thread counts depend on the WIZ hashing
1919 		 * disable bit, which we don't touch here, but it's good
1920 		 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
1921 		 */
1922 		wa_add(wal, GEN7_GT_MODE, 0,
1923 		       _MASKED_FIELD(GEN6_WIZ_HASHING_MASK,
1924 				     GEN6_WIZ_HASHING_16x4),
1925 		       GEN6_WIZ_HASHING_16x4);
1926 	}
1927 
1928 	if (IS_GEN_RANGE(i915, 6, 7))
1929 		/*
1930 		 * We need to disable the AsyncFlip performance optimisations in
1931 		 * order to use MI_WAIT_FOR_EVENT within the CS. It should
1932 		 * already be programmed to '1' on all products.
1933 		 *
1934 		 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1935 		 */
1936 		wa_masked_en(wal,
1937 			     MI_MODE,
1938 			     ASYNC_FLIP_PERF_DISABLE);
1939 
1940 	if (IS_GEN(i915, 6)) {
1941 		/*
1942 		 * Required for the hardware to program scanline values for
1943 		 * waiting
1944 		 * WaEnableFlushTlbInvalidationMode:snb
1945 		 */
1946 		wa_masked_en(wal,
1947 			     GFX_MODE,
1948 			     GFX_TLB_INVALIDATE_EXPLICIT);
1949 
1950 		/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
1951 		wa_masked_en(wal,
1952 			     _3D_CHICKEN,
1953 			     _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
1954 
1955 		wa_masked_en(wal,
1956 			     _3D_CHICKEN3,
1957 			     /* WaStripsFansDisableFastClipPerformanceFix:snb */
1958 			     _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
1959 			     /*
1960 			      * Bspec says:
1961 			      * "This bit must be set if 3DSTATE_CLIP clip mode is set
1962 			      * to normal and 3DSTATE_SF number of SF output attributes
1963 			      * is more than 16."
1964 			      */
1965 			     _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
1966 
1967 		/*
1968 		 * BSpec recommends 8x4 when MSAA is used,
1969 		 * however in practice 16x4 seems fastest.
1970 		 *
1971 		 * Note that PS/WM thread counts depend on the WIZ hashing
1972 		 * disable bit, which we don't touch here, but it's good
1973 		 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
1974 		 */
1975 		wa_add(wal,
1976 		       GEN6_GT_MODE, 0,
1977 		       _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
1978 		       GEN6_WIZ_HASHING_16x4);
1979 
1980 		/* WaDisable_RenderCache_OperationalFlush:snb */
1981 		wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
1982 
1983 		/*
1984 		 * From the Sandybridge PRM, volume 1 part 3, page 24:
1985 		 * "If this bit is set, STCunit will have LRA as replacement
1986 		 *  policy. [...] This bit must be reset. LRA replacement
1987 		 *  policy is not supported."
1988 		 */
1989 		wa_masked_dis(wal,
1990 			      CACHE_MODE_0,
1991 			      CM0_STC_EVICT_DISABLE_LRA_SNB);
1992 	}
1993 
1994 	if (IS_GEN_RANGE(i915, 4, 6))
1995 		/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
1996 		wa_add(wal, MI_MODE,
1997 		       0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
1998 		       /* XXX bit doesn't stick on Broadwater */
1999 		       IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH);
2000 
2001 	if (IS_GEN(i915, 4))
2002 		/*
2003 		 * Disable CONSTANT_BUFFER before it is loaded from the context
2004 		 * image. For as it is loaded, it is executed and the stored
2005 		 * address may no longer be valid, leading to a GPU hang.
2006 		 *
2007 		 * This imposes the requirement that userspace reload their
2008 		 * CONSTANT_BUFFER on every batch, fortunately a requirement
2009 		 * they are already accustomed to from before contexts were
2010 		 * enabled.
2011 		 */
2012 		wa_add(wal, ECOSKPD,
2013 		       0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
2014 		       0 /* XXX bit doesn't stick on Broadwater */);
2015 }
2016 
2017 static void
2018 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2019 {
2020 	struct drm_i915_private *i915 = engine->i915;
2021 
2022 	/* WaKBLVECSSemaphoreWaitPoll:kbl */
2023 	if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_E0)) {
2024 		wa_write(wal,
2025 			 RING_SEMA_WAIT_POLL(engine->mmio_base),
2026 			 1);
2027 	}
2028 }
2029 
2030 static void
2031 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2032 {
2033 	if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 4))
2034 		return;
2035 
2036 	if (engine->class == RENDER_CLASS)
2037 		rcs_engine_wa_init(engine, wal);
2038 	else
2039 		xcs_engine_wa_init(engine, wal);
2040 }
2041 
2042 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
2043 {
2044 	struct i915_wa_list *wal = &engine->wa_list;
2045 
2046 	if (INTEL_GEN(engine->i915) < 4)
2047 		return;
2048 
2049 	wa_init_start(wal, "engine", engine->name);
2050 	engine_init_workarounds(engine, wal);
2051 	wa_init_finish(wal);
2052 }
2053 
2054 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
2055 {
2056 	wa_list_apply(engine->uncore, &engine->wa_list);
2057 }
2058 
2059 struct mcr_range {
2060 	u32 start;
2061 	u32 end;
2062 };
2063 
2064 static const struct mcr_range mcr_ranges_gen8[] = {
2065 	{ .start = 0x5500, .end = 0x55ff },
2066 	{ .start = 0x7000, .end = 0x7fff },
2067 	{ .start = 0x9400, .end = 0x97ff },
2068 	{ .start = 0xb000, .end = 0xb3ff },
2069 	{ .start = 0xe000, .end = 0xe7ff },
2070 	{},
2071 };
2072 
2073 static const struct mcr_range mcr_ranges_gen12[] = {
2074 	{ .start =  0x8150, .end =  0x815f },
2075 	{ .start =  0x9520, .end =  0x955f },
2076 	{ .start =  0xb100, .end =  0xb3ff },
2077 	{ .start =  0xde80, .end =  0xe8ff },
2078 	{ .start = 0x24a00, .end = 0x24a7f },
2079 	{},
2080 };
2081 
2082 static bool mcr_range(struct drm_i915_private *i915, u32 offset)
2083 {
2084 	const struct mcr_range *mcr_ranges;
2085 	int i;
2086 
2087 	if (INTEL_GEN(i915) >= 12)
2088 		mcr_ranges = mcr_ranges_gen12;
2089 	else if (INTEL_GEN(i915) >= 8)
2090 		mcr_ranges = mcr_ranges_gen8;
2091 	else
2092 		return false;
2093 
2094 	/*
2095 	 * Registers in these ranges are affected by the MCR selector
2096 	 * which only controls CPU initiated MMIO. Routing does not
2097 	 * work for CS access so we cannot verify them on this path.
2098 	 */
2099 	for (i = 0; mcr_ranges[i].start; i++)
2100 		if (offset >= mcr_ranges[i].start &&
2101 		    offset <= mcr_ranges[i].end)
2102 			return true;
2103 
2104 	return false;
2105 }
2106 
2107 static int
2108 wa_list_srm(struct i915_request *rq,
2109 	    const struct i915_wa_list *wal,
2110 	    struct i915_vma *vma)
2111 {
2112 	struct drm_i915_private *i915 = rq->engine->i915;
2113 	unsigned int i, count = 0;
2114 	const struct i915_wa *wa;
2115 	u32 srm, *cs;
2116 
2117 	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
2118 	if (INTEL_GEN(i915) >= 8)
2119 		srm++;
2120 
2121 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2122 		if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
2123 			count++;
2124 	}
2125 
2126 	cs = intel_ring_begin(rq, 4 * count);
2127 	if (IS_ERR(cs))
2128 		return PTR_ERR(cs);
2129 
2130 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2131 		u32 offset = i915_mmio_reg_offset(wa->reg);
2132 
2133 		if (mcr_range(i915, offset))
2134 			continue;
2135 
2136 		*cs++ = srm;
2137 		*cs++ = offset;
2138 		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
2139 		*cs++ = 0;
2140 	}
2141 	intel_ring_advance(rq, cs);
2142 
2143 	return 0;
2144 }
2145 
2146 static int engine_wa_list_verify(struct intel_context *ce,
2147 				 const struct i915_wa_list * const wal,
2148 				 const char *from)
2149 {
2150 	const struct i915_wa *wa;
2151 	struct i915_request *rq;
2152 	struct i915_vma *vma;
2153 	struct i915_gem_ww_ctx ww;
2154 	unsigned int i;
2155 	u32 *results;
2156 	int err;
2157 
2158 	if (!wal->count)
2159 		return 0;
2160 
2161 	vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
2162 					   wal->count * sizeof(u32));
2163 	if (IS_ERR(vma))
2164 		return PTR_ERR(vma);
2165 
2166 	intel_engine_pm_get(ce->engine);
2167 	i915_gem_ww_ctx_init(&ww, false);
2168 retry:
2169 	err = i915_gem_object_lock(vma->obj, &ww);
2170 	if (err == 0)
2171 		err = intel_context_pin_ww(ce, &ww);
2172 	if (err)
2173 		goto err_pm;
2174 
2175 	err = i915_vma_pin_ww(vma, &ww, 0, 0,
2176 			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
2177 	if (err)
2178 		goto err_unpin;
2179 
2180 	rq = i915_request_create(ce);
2181 	if (IS_ERR(rq)) {
2182 		err = PTR_ERR(rq);
2183 		goto err_vma;
2184 	}
2185 
2186 	err = i915_request_await_object(rq, vma->obj, true);
2187 	if (err == 0)
2188 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
2189 	if (err == 0)
2190 		err = wa_list_srm(rq, wal, vma);
2191 
2192 	i915_request_get(rq);
2193 	if (err)
2194 		i915_request_set_error_once(rq, err);
2195 	i915_request_add(rq);
2196 
2197 	if (err)
2198 		goto err_rq;
2199 
2200 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2201 		err = -ETIME;
2202 		goto err_rq;
2203 	}
2204 
2205 	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
2206 	if (IS_ERR(results)) {
2207 		err = PTR_ERR(results);
2208 		goto err_rq;
2209 	}
2210 
2211 	err = 0;
2212 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2213 		if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
2214 			continue;
2215 
2216 		if (!wa_verify(wa, results[i], wal->name, from))
2217 			err = -ENXIO;
2218 	}
2219 
2220 	i915_gem_object_unpin_map(vma->obj);
2221 
2222 err_rq:
2223 	i915_request_put(rq);
2224 err_vma:
2225 	i915_vma_unpin(vma);
2226 err_unpin:
2227 	intel_context_unpin(ce);
2228 err_pm:
2229 	if (err == -EDEADLK) {
2230 		err = i915_gem_ww_ctx_backoff(&ww);
2231 		if (!err)
2232 			goto retry;
2233 	}
2234 	i915_gem_ww_ctx_fini(&ww);
2235 	intel_engine_pm_put(ce->engine);
2236 	i915_vma_put(vma);
2237 	return err;
2238 }
2239 
2240 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
2241 				    const char *from)
2242 {
2243 	return engine_wa_list_verify(engine->kernel_context,
2244 				     &engine->wa_list,
2245 				     from);
2246 }
2247 
2248 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2249 #include "selftest_workarounds.c"
2250 #endif
2251