xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_workarounds.c (revision 4f727ecefefbd180de10e25b3e74c03dce3f1e75)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2018 Intel Corporation
5  */
6 
7 #include "i915_drv.h"
8 #include "intel_context.h"
9 #include "intel_workarounds.h"
10 
11 /**
12  * DOC: Hardware workarounds
13  *
14  * This file is intended as a central place to implement most [1]_ of the
15  * required workarounds for hardware to work as originally intended. They fall
16  * in five basic categories depending on how/when they are applied:
17  *
18  * - Workarounds that touch registers that are saved/restored to/from the HW
19  *   context image. The list is emitted (via Load Register Immediate commands)
20  *   everytime a new context is created.
21  * - GT workarounds. The list of these WAs is applied whenever these registers
22  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
23  * - Display workarounds. The list is applied during display clock-gating
24  *   initialization.
25  * - Workarounds that whitelist a privileged register, so that UMDs can manage
26  *   them directly. This is just a special case of a MMMIO workaround (as we
27  *   write the list of these to/be-whitelisted registers to some special HW
28  *   registers).
29  * - Workaround batchbuffers, that get executed automatically by the hardware
30  *   on every HW context restore.
31  *
32  * .. [1] Please notice that there are other WAs that, due to their nature,
33  *    cannot be applied from a central place. Those are peppered around the rest
34  *    of the code, as needed.
35  *
36  * .. [2] Technically, some registers are powercontext saved & restored, so they
37  *    survive a suspend/resume. In practice, writing them again is not too
38  *    costly and simplifies things. We can revisit this in the future.
39  *
40  * Layout
41  * ~~~~~~
42  *
43  * Keep things in this file ordered by WA type, as per the above (context, GT,
44  * display, register whitelist, batchbuffer). Then, inside each type, keep the
45  * following order:
46  *
47  * - Infrastructure functions and macros
48  * - WAs per platform in standard gen/chrono order
49  * - Public functions to init or apply the given workaround type.
50  */
51 
52 static void wa_init_start(struct i915_wa_list *wal, const char *name)
53 {
54 	wal->name = name;
55 }
56 
57 #define WA_LIST_CHUNK (1 << 4)
58 
59 static void wa_init_finish(struct i915_wa_list *wal)
60 {
61 	/* Trim unused entries. */
62 	if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
63 		struct i915_wa *list = kmemdup(wal->list,
64 					       wal->count * sizeof(*list),
65 					       GFP_KERNEL);
66 
67 		if (list) {
68 			kfree(wal->list);
69 			wal->list = list;
70 		}
71 	}
72 
73 	if (!wal->count)
74 		return;
75 
76 	DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
77 			 wal->wa_count, wal->name);
78 }
79 
80 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
81 {
82 	unsigned int addr = i915_mmio_reg_offset(wa->reg);
83 	unsigned int start = 0, end = wal->count;
84 	const unsigned int grow = WA_LIST_CHUNK;
85 	struct i915_wa *wa_;
86 
87 	GEM_BUG_ON(!is_power_of_2(grow));
88 
89 	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
90 		struct i915_wa *list;
91 
92 		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
93 				     GFP_KERNEL);
94 		if (!list) {
95 			DRM_ERROR("No space for workaround init!\n");
96 			return;
97 		}
98 
99 		if (wal->list)
100 			memcpy(list, wal->list, sizeof(*wa) * wal->count);
101 
102 		wal->list = list;
103 	}
104 
105 	while (start < end) {
106 		unsigned int mid = start + (end - start) / 2;
107 
108 		if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
109 			start = mid + 1;
110 		} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
111 			end = mid;
112 		} else {
113 			wa_ = &wal->list[mid];
114 
115 			if ((wa->mask & ~wa_->mask) == 0) {
116 				DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
117 					  i915_mmio_reg_offset(wa_->reg),
118 					  wa_->mask, wa_->val);
119 
120 				wa_->val &= ~wa->mask;
121 			}
122 
123 			wal->wa_count++;
124 			wa_->val |= wa->val;
125 			wa_->mask |= wa->mask;
126 			wa_->read |= wa->read;
127 			return;
128 		}
129 	}
130 
131 	wal->wa_count++;
132 	wa_ = &wal->list[wal->count++];
133 	*wa_ = *wa;
134 
135 	while (wa_-- > wal->list) {
136 		GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
137 			   i915_mmio_reg_offset(wa_[1].reg));
138 		if (i915_mmio_reg_offset(wa_[1].reg) >
139 		    i915_mmio_reg_offset(wa_[0].reg))
140 			break;
141 
142 		swap(wa_[1], wa_[0]);
143 	}
144 }
145 
146 static void
147 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
148 		   u32 val)
149 {
150 	struct i915_wa wa = {
151 		.reg  = reg,
152 		.mask = mask,
153 		.val  = val,
154 		.read = mask,
155 	};
156 
157 	_wa_add(wal, &wa);
158 }
159 
160 static void
161 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
162 {
163 	wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
164 }
165 
166 static void
167 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
168 {
169 	wa_write_masked_or(wal, reg, ~0, val);
170 }
171 
172 static void
173 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
174 {
175 	wa_write_masked_or(wal, reg, val, val);
176 }
177 
178 static void
179 ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
180 {
181 	struct i915_wa wa = {
182 		.reg  = reg,
183 		.mask = mask,
184 		.val  = val,
185 		/* Bonkers HW, skip verifying */
186 	};
187 
188 	_wa_add(wal, &wa);
189 }
190 
191 #define WA_SET_BIT_MASKED(addr, mask) \
192 	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
193 
194 #define WA_CLR_BIT_MASKED(addr, mask) \
195 	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
196 
197 #define WA_SET_FIELD_MASKED(addr, mask, value) \
198 	wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
199 
200 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
201 				      struct i915_wa_list *wal)
202 {
203 	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
204 
205 	/* WaDisableAsyncFlipPerfMode:bdw,chv */
206 	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
207 
208 	/* WaDisablePartialInstShootdown:bdw,chv */
209 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
210 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
211 
212 	/* Use Force Non-Coherent whenever executing a 3D context. This is a
213 	 * workaround for for a possible hang in the unlikely event a TLB
214 	 * invalidation occurs during a PSD flush.
215 	 */
216 	/* WaForceEnableNonCoherent:bdw,chv */
217 	/* WaHdcDisableFetchWhenMasked:bdw,chv */
218 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
219 			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
220 			  HDC_FORCE_NON_COHERENT);
221 
222 	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
223 	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
224 	 *  polygons in the same 8x4 pixel/sample area to be processed without
225 	 *  stalling waiting for the earlier ones to write to Hierarchical Z
226 	 *  buffer."
227 	 *
228 	 * This optimization is off by default for BDW and CHV; turn it on.
229 	 */
230 	WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
231 
232 	/* Wa4x4STCOptimizationDisable:bdw,chv */
233 	WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
234 
235 	/*
236 	 * BSpec recommends 8x4 when MSAA is used,
237 	 * however in practice 16x4 seems fastest.
238 	 *
239 	 * Note that PS/WM thread counts depend on the WIZ hashing
240 	 * disable bit, which we don't touch here, but it's good
241 	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
242 	 */
243 	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
244 			    GEN6_WIZ_HASHING_MASK,
245 			    GEN6_WIZ_HASHING_16x4);
246 }
247 
248 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
249 				     struct i915_wa_list *wal)
250 {
251 	struct drm_i915_private *i915 = engine->i915;
252 
253 	gen8_ctx_workarounds_init(engine, wal);
254 
255 	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
256 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
257 
258 	/* WaDisableDopClockGating:bdw
259 	 *
260 	 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
261 	 * to disable EUTC clock gating.
262 	 */
263 	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
264 			  DOP_CLOCK_GATING_DISABLE);
265 
266 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
267 			  GEN8_SAMPLER_POWER_BYPASS_DIS);
268 
269 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
270 			  /* WaForceContextSaveRestoreNonCoherent:bdw */
271 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
272 			  /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
273 			  (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
274 }
275 
276 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
277 				     struct i915_wa_list *wal)
278 {
279 	gen8_ctx_workarounds_init(engine, wal);
280 
281 	/* WaDisableThreadStallDopClockGating:chv */
282 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
283 
284 	/* Improve HiZ throughput on CHV. */
285 	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
286 }
287 
288 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
289 				      struct i915_wa_list *wal)
290 {
291 	struct drm_i915_private *i915 = engine->i915;
292 
293 	if (HAS_LLC(i915)) {
294 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
295 		 *
296 		 * Must match Display Engine. See
297 		 * WaCompressedResourceDisplayNewHashMode.
298 		 */
299 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
300 				  GEN9_PBE_COMPRESSED_HASH_SELECTION);
301 		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
302 				  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
303 	}
304 
305 	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
306 	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
307 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
308 			  FLOW_CONTROL_ENABLE |
309 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
310 
311 	/* Syncing dependencies between camera and graphics:skl,bxt,kbl */
312 	if (!IS_COFFEELAKE(i915))
313 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
314 				  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
315 
316 	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
317 	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
318 	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
319 			  GEN9_ENABLE_YV12_BUGFIX |
320 			  GEN9_ENABLE_GPGPU_PREEMPTION);
321 
322 	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
323 	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
324 	WA_SET_BIT_MASKED(CACHE_MODE_1,
325 			  GEN8_4x4_STC_OPTIMIZATION_DISABLE |
326 			  GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
327 
328 	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
329 	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
330 			  GEN9_CCS_TLB_PREFETCH_ENABLE);
331 
332 	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
333 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
334 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
335 			  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
336 
337 	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
338 	 * both tied to WaForceContextSaveRestoreNonCoherent
339 	 * in some hsds for skl. We keep the tie for all gen9. The
340 	 * documentation is a bit hazy and so we want to get common behaviour,
341 	 * even though there is no clear evidence we would need both on kbl/bxt.
342 	 * This area has been source of system hangs so we play it safe
343 	 * and mimic the skl regardless of what bspec says.
344 	 *
345 	 * Use Force Non-Coherent whenever executing a 3D context. This
346 	 * is a workaround for a possible hang in the unlikely event
347 	 * a TLB invalidation occurs during a PSD flush.
348 	 */
349 
350 	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
351 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
352 			  HDC_FORCE_NON_COHERENT);
353 
354 	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
355 	if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
356 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
357 				  GEN8_SAMPLER_POWER_BYPASS_DIS);
358 
359 	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
360 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
361 
362 	/*
363 	 * Supporting preemption with fine-granularity requires changes in the
364 	 * batch buffer programming. Since we can't break old userspace, we
365 	 * need to set our default preemption level to safe value. Userspace is
366 	 * still able to use more fine-grained preemption levels, since in
367 	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
368 	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
369 	 * not real HW workarounds, but merely a way to start using preemption
370 	 * while maintaining old contract with userspace.
371 	 */
372 
373 	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
374 	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
375 
376 	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
377 	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
378 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
379 			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
380 
381 	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
382 	if (IS_GEN9_LP(i915))
383 		WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
384 }
385 
386 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
387 				struct i915_wa_list *wal)
388 {
389 	struct drm_i915_private *i915 = engine->i915;
390 	u8 vals[3] = { 0, 0, 0 };
391 	unsigned int i;
392 
393 	for (i = 0; i < 3; i++) {
394 		u8 ss;
395 
396 		/*
397 		 * Only consider slices where one, and only one, subslice has 7
398 		 * EUs
399 		 */
400 		if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
401 			continue;
402 
403 		/*
404 		 * subslice_7eu[i] != 0 (because of the check above) and
405 		 * ss_max == 4 (maximum number of subslices possible per slice)
406 		 *
407 		 * ->    0 <= ss <= 3;
408 		 */
409 		ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
410 		vals[i] = 3 - ss;
411 	}
412 
413 	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
414 		return;
415 
416 	/* Tune IZ hashing. See intel_device_info_runtime_init() */
417 	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
418 			    GEN9_IZ_HASHING_MASK(2) |
419 			    GEN9_IZ_HASHING_MASK(1) |
420 			    GEN9_IZ_HASHING_MASK(0),
421 			    GEN9_IZ_HASHING(2, vals[2]) |
422 			    GEN9_IZ_HASHING(1, vals[1]) |
423 			    GEN9_IZ_HASHING(0, vals[0]));
424 }
425 
426 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
427 				     struct i915_wa_list *wal)
428 {
429 	gen9_ctx_workarounds_init(engine, wal);
430 	skl_tune_iz_hashing(engine, wal);
431 }
432 
433 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
434 				     struct i915_wa_list *wal)
435 {
436 	gen9_ctx_workarounds_init(engine, wal);
437 
438 	/* WaDisableThreadStallDopClockGating:bxt */
439 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
440 			  STALL_DOP_GATING_DISABLE);
441 
442 	/* WaToEnableHwFixForPushConstHWBug:bxt */
443 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
444 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
445 }
446 
447 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
448 				     struct i915_wa_list *wal)
449 {
450 	struct drm_i915_private *i915 = engine->i915;
451 
452 	gen9_ctx_workarounds_init(engine, wal);
453 
454 	/* WaToEnableHwFixForPushConstHWBug:kbl */
455 	if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
456 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
457 				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
458 
459 	/* WaDisableSbeCacheDispatchPortSharing:kbl */
460 	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
461 			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
462 }
463 
464 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
465 				     struct i915_wa_list *wal)
466 {
467 	gen9_ctx_workarounds_init(engine, wal);
468 
469 	/* WaToEnableHwFixForPushConstHWBug:glk */
470 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
471 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
472 }
473 
474 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
475 				     struct i915_wa_list *wal)
476 {
477 	gen9_ctx_workarounds_init(engine, wal);
478 
479 	/* WaToEnableHwFixForPushConstHWBug:cfl */
480 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
481 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
482 
483 	/* WaDisableSbeCacheDispatchPortSharing:cfl */
484 	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
485 			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
486 }
487 
488 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
489 				     struct i915_wa_list *wal)
490 {
491 	struct drm_i915_private *i915 = engine->i915;
492 
493 	/* WaForceContextSaveRestoreNonCoherent:cnl */
494 	WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
495 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
496 
497 	/* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
498 	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
499 		WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
500 
501 	/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
502 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
503 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
504 
505 	/* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
506 	if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
507 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
508 				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
509 
510 	/* WaPushConstantDereferenceHoldDisable:cnl */
511 	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
512 
513 	/* FtrEnableFastAnisoL1BankingFix:cnl */
514 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
515 
516 	/* WaDisable3DMidCmdPreemption:cnl */
517 	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
518 
519 	/* WaDisableGPGPUMidCmdPreemption:cnl */
520 	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
521 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
522 			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
523 
524 	/* WaDisableEarlyEOT:cnl */
525 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
526 }
527 
528 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
529 				     struct i915_wa_list *wal)
530 {
531 	struct drm_i915_private *i915 = engine->i915;
532 
533 	/* WaDisableBankHangMode:icl */
534 	wa_write(wal,
535 		 GEN8_L3CNTLREG,
536 		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
537 		 GEN8_ERRDETBCTRL);
538 
539 	/* WaDisableBankHangMode:icl */
540 	wa_write(wal,
541 		 GEN8_L3CNTLREG,
542 		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
543 		 GEN8_ERRDETBCTRL);
544 
545 	/* Wa_1604370585:icl (pre-prod)
546 	 * Formerly known as WaPushConstantDereferenceHoldDisable
547 	 */
548 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
549 		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
550 				  PUSH_CONSTANT_DEREF_DISABLE);
551 
552 	/* WaForceEnableNonCoherent:icl
553 	 * This is not the same workaround as in early Gen9 platforms, where
554 	 * lacking this could cause system hangs, but coherency performance
555 	 * overhead is high and only a few compute workloads really need it
556 	 * (the register is whitelisted in hardware now, so UMDs can opt in
557 	 * for coherency if they have a good reason).
558 	 */
559 	WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
560 
561 	/* Wa_2006611047:icl (pre-prod)
562 	 * Formerly known as WaDisableImprovedTdlClkGating
563 	 */
564 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
565 		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
566 				  GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
567 
568 	/* Wa_2006665173:icl (pre-prod) */
569 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
570 		WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
571 				  GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
572 
573 	/* WaEnableFloatBlendOptimization:icl */
574 	wa_write_masked_or(wal,
575 			   GEN10_CACHE_MODE_SS,
576 			   0, /* write-only, so skip validation */
577 			   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
578 
579 	/* WaDisableGPGPUMidThreadPreemption:icl */
580 	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
581 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
582 			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
583 
584 	/* allow headerless messages for preemptible GPGPU context */
585 	WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
586 			  GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
587 }
588 
589 static void
590 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
591 			   struct i915_wa_list *wal,
592 			   const char *name)
593 {
594 	struct drm_i915_private *i915 = engine->i915;
595 
596 	if (engine->class != RENDER_CLASS)
597 		return;
598 
599 	wa_init_start(wal, name);
600 
601 	if (IS_GEN(i915, 11))
602 		icl_ctx_workarounds_init(engine, wal);
603 	else if (IS_CANNONLAKE(i915))
604 		cnl_ctx_workarounds_init(engine, wal);
605 	else if (IS_COFFEELAKE(i915))
606 		cfl_ctx_workarounds_init(engine, wal);
607 	else if (IS_GEMINILAKE(i915))
608 		glk_ctx_workarounds_init(engine, wal);
609 	else if (IS_KABYLAKE(i915))
610 		kbl_ctx_workarounds_init(engine, wal);
611 	else if (IS_BROXTON(i915))
612 		bxt_ctx_workarounds_init(engine, wal);
613 	else if (IS_SKYLAKE(i915))
614 		skl_ctx_workarounds_init(engine, wal);
615 	else if (IS_CHERRYVIEW(i915))
616 		chv_ctx_workarounds_init(engine, wal);
617 	else if (IS_BROADWELL(i915))
618 		bdw_ctx_workarounds_init(engine, wal);
619 	else if (INTEL_GEN(i915) < 8)
620 		return;
621 	else
622 		MISSING_CASE(INTEL_GEN(i915));
623 
624 	wa_init_finish(wal);
625 }
626 
627 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
628 {
629 	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
630 }
631 
632 int intel_engine_emit_ctx_wa(struct i915_request *rq)
633 {
634 	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
635 	struct i915_wa *wa;
636 	unsigned int i;
637 	u32 *cs;
638 	int ret;
639 
640 	if (wal->count == 0)
641 		return 0;
642 
643 	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
644 	if (ret)
645 		return ret;
646 
647 	cs = intel_ring_begin(rq, (wal->count * 2 + 2));
648 	if (IS_ERR(cs))
649 		return PTR_ERR(cs);
650 
651 	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
652 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
653 		*cs++ = i915_mmio_reg_offset(wa->reg);
654 		*cs++ = wa->val;
655 	}
656 	*cs++ = MI_NOOP;
657 
658 	intel_ring_advance(rq, cs);
659 
660 	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
661 	if (ret)
662 		return ret;
663 
664 	return 0;
665 }
666 
667 static void
668 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
669 {
670 	/* WaDisableKillLogic:bxt,skl,kbl */
671 	if (!IS_COFFEELAKE(i915))
672 		wa_write_or(wal,
673 			    GAM_ECOCHK,
674 			    ECOCHK_DIS_TLB);
675 
676 	if (HAS_LLC(i915)) {
677 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
678 		 *
679 		 * Must match Display Engine. See
680 		 * WaCompressedResourceDisplayNewHashMode.
681 		 */
682 		wa_write_or(wal,
683 			    MMCD_MISC_CTRL,
684 			    MMCD_PCLA | MMCD_HOTSPOT_EN);
685 	}
686 
687 	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
688 	wa_write_or(wal,
689 		    GAM_ECOCHK,
690 		    BDW_DISABLE_HDC_INVALIDATION);
691 }
692 
693 static void
694 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
695 {
696 	gen9_gt_workarounds_init(i915, wal);
697 
698 	/* WaDisableGafsUnitClkGating:skl */
699 	wa_write_or(wal,
700 		    GEN7_UCGCTL4,
701 		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
702 
703 	/* WaInPlaceDecompressionHang:skl */
704 	if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
705 		wa_write_or(wal,
706 			    GEN9_GAMT_ECO_REG_RW_IA,
707 			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
708 }
709 
710 static void
711 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
712 {
713 	gen9_gt_workarounds_init(i915, wal);
714 
715 	/* WaInPlaceDecompressionHang:bxt */
716 	wa_write_or(wal,
717 		    GEN9_GAMT_ECO_REG_RW_IA,
718 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
719 }
720 
721 static void
722 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
723 {
724 	gen9_gt_workarounds_init(i915, wal);
725 
726 	/* WaDisableDynamicCreditSharing:kbl */
727 	if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
728 		wa_write_or(wal,
729 			    GAMT_CHKN_BIT_REG,
730 			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
731 
732 	/* WaDisableGafsUnitClkGating:kbl */
733 	wa_write_or(wal,
734 		    GEN7_UCGCTL4,
735 		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
736 
737 	/* WaInPlaceDecompressionHang:kbl */
738 	wa_write_or(wal,
739 		    GEN9_GAMT_ECO_REG_RW_IA,
740 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
741 }
742 
743 static void
744 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
745 {
746 	gen9_gt_workarounds_init(i915, wal);
747 }
748 
749 static void
750 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
751 {
752 	gen9_gt_workarounds_init(i915, wal);
753 
754 	/* WaDisableGafsUnitClkGating:cfl */
755 	wa_write_or(wal,
756 		    GEN7_UCGCTL4,
757 		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
758 
759 	/* WaInPlaceDecompressionHang:cfl */
760 	wa_write_or(wal,
761 		    GEN9_GAMT_ECO_REG_RW_IA,
762 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
763 }
764 
765 static void
766 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
767 {
768 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
769 	u32 mcr_slice_subslice_mask;
770 
771 	/*
772 	 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
773 	 * L3Banks could be fused off in single slice scenario. If that is
774 	 * the case, we might need to program MCR select to a valid L3Bank
775 	 * by default, to make sure we correctly read certain registers
776 	 * later on (in the range 0xB100 - 0xB3FF).
777 	 * This might be incompatible with
778 	 * WaProgramMgsrForCorrectSliceSpecificMmioReads.
779 	 * Fortunately, this should not happen in production hardware, so
780 	 * we only assert that this is the case (instead of implementing
781 	 * something more complex that requires checking the range of every
782 	 * MMIO read).
783 	 */
784 	if (INTEL_GEN(i915) >= 10 &&
785 	    is_power_of_2(sseu->slice_mask)) {
786 		/*
787 		 * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
788 		 * enabled subslice, no need to redirect MCR packet
789 		 */
790 		u32 slice = fls(sseu->slice_mask);
791 		u32 fuse3 =
792 			intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
793 		u8 ss_mask = sseu->subslice_mask[slice];
794 
795 		u8 enabled_mask = (ss_mask | ss_mask >>
796 				   GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
797 		u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
798 
799 		/*
800 		 * Production silicon should have matched L3Bank and
801 		 * subslice enabled
802 		 */
803 		WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
804 	}
805 
806 	if (INTEL_GEN(i915) >= 11)
807 		mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
808 					  GEN11_MCR_SUBSLICE_MASK;
809 	else
810 		mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
811 					  GEN8_MCR_SUBSLICE_MASK;
812 	/*
813 	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
814 	 * Before any MMIO read into slice/subslice specific registers, MCR
815 	 * packet control register needs to be programmed to point to any
816 	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
817 	 * This means each subsequent MMIO read will be forwarded to an
818 	 * specific s/ss combination, but this is OK since these registers
819 	 * are consistent across s/ss in almost all cases. In the rare
820 	 * occasions, such as INSTDONE, where this value is dependent
821 	 * on s/ss combo, the read should be done with read_subslice_reg.
822 	 */
823 	wa_write_masked_or(wal,
824 			   GEN8_MCR_SELECTOR,
825 			   mcr_slice_subslice_mask,
826 			   intel_calculate_mcr_s_ss_select(i915));
827 }
828 
829 static void
830 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
831 {
832 	wa_init_mcr(i915, wal);
833 
834 	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
835 	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
836 		wa_write_or(wal,
837 			    GAMT_CHKN_BIT_REG,
838 			    GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
839 
840 	/* WaInPlaceDecompressionHang:cnl */
841 	wa_write_or(wal,
842 		    GEN9_GAMT_ECO_REG_RW_IA,
843 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
844 }
845 
846 static void
847 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
848 {
849 	wa_init_mcr(i915, wal);
850 
851 	/* WaInPlaceDecompressionHang:icl */
852 	wa_write_or(wal,
853 		    GEN9_GAMT_ECO_REG_RW_IA,
854 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
855 
856 	/* WaModifyGamTlbPartitioning:icl */
857 	wa_write_masked_or(wal,
858 			   GEN11_GACB_PERF_CTRL,
859 			   GEN11_HASH_CTRL_MASK,
860 			   GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
861 
862 	/* Wa_1405766107:icl
863 	 * Formerly known as WaCL2SFHalfMaxAlloc
864 	 */
865 	wa_write_or(wal,
866 		    GEN11_LSN_UNSLCVC,
867 		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
868 		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
869 
870 	/* Wa_220166154:icl
871 	 * Formerly known as WaDisCtxReload
872 	 */
873 	wa_write_or(wal,
874 		    GEN8_GAMW_ECO_DEV_RW_IA,
875 		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
876 
877 	/* Wa_1405779004:icl (pre-prod) */
878 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
879 		wa_write_or(wal,
880 			    SLICE_UNIT_LEVEL_CLKGATE,
881 			    MSCUNIT_CLKGATE_DIS);
882 
883 	/* Wa_1406680159:icl */
884 	wa_write_or(wal,
885 		    SUBSLICE_UNIT_LEVEL_CLKGATE,
886 		    GWUNIT_CLKGATE_DIS);
887 
888 	/* Wa_1406838659:icl (pre-prod) */
889 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
890 		wa_write_or(wal,
891 			    INF_UNIT_LEVEL_CLKGATE,
892 			    CGPSF_CLKGATE_DIS);
893 
894 	/* Wa_1406463099:icl
895 	 * Formerly known as WaGamTlbPendError
896 	 */
897 	wa_write_or(wal,
898 		    GAMT_CHKN_BIT_REG,
899 		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
900 }
901 
902 static void
903 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
904 {
905 	if (IS_GEN(i915, 11))
906 		icl_gt_workarounds_init(i915, wal);
907 	else if (IS_CANNONLAKE(i915))
908 		cnl_gt_workarounds_init(i915, wal);
909 	else if (IS_COFFEELAKE(i915))
910 		cfl_gt_workarounds_init(i915, wal);
911 	else if (IS_GEMINILAKE(i915))
912 		glk_gt_workarounds_init(i915, wal);
913 	else if (IS_KABYLAKE(i915))
914 		kbl_gt_workarounds_init(i915, wal);
915 	else if (IS_BROXTON(i915))
916 		bxt_gt_workarounds_init(i915, wal);
917 	else if (IS_SKYLAKE(i915))
918 		skl_gt_workarounds_init(i915, wal);
919 	else if (INTEL_GEN(i915) <= 8)
920 		return;
921 	else
922 		MISSING_CASE(INTEL_GEN(i915));
923 }
924 
925 void intel_gt_init_workarounds(struct drm_i915_private *i915)
926 {
927 	struct i915_wa_list *wal = &i915->gt_wa_list;
928 
929 	wa_init_start(wal, "GT");
930 	gt_init_workarounds(i915, wal);
931 	wa_init_finish(wal);
932 }
933 
934 static enum forcewake_domains
935 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
936 {
937 	enum forcewake_domains fw = 0;
938 	struct i915_wa *wa;
939 	unsigned int i;
940 
941 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
942 		fw |= intel_uncore_forcewake_for_reg(uncore,
943 						     wa->reg,
944 						     FW_REG_READ |
945 						     FW_REG_WRITE);
946 
947 	return fw;
948 }
949 
950 static bool
951 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
952 {
953 	if ((cur ^ wa->val) & wa->read) {
954 		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
955 			  name, from, i915_mmio_reg_offset(wa->reg),
956 			  cur, cur & wa->read,
957 			  wa->val, wa->mask);
958 
959 		return false;
960 	}
961 
962 	return true;
963 }
964 
965 static void
966 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
967 {
968 	enum forcewake_domains fw;
969 	unsigned long flags;
970 	struct i915_wa *wa;
971 	unsigned int i;
972 
973 	if (!wal->count)
974 		return;
975 
976 	fw = wal_get_fw_for_rmw(uncore, wal);
977 
978 	spin_lock_irqsave(&uncore->lock, flags);
979 	intel_uncore_forcewake_get__locked(uncore, fw);
980 
981 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
982 		intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
983 		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
984 			wa_verify(wa,
985 				  intel_uncore_read_fw(uncore, wa->reg),
986 				  wal->name, "application");
987 	}
988 
989 	intel_uncore_forcewake_put__locked(uncore, fw);
990 	spin_unlock_irqrestore(&uncore->lock, flags);
991 }
992 
993 void intel_gt_apply_workarounds(struct drm_i915_private *i915)
994 {
995 	wa_list_apply(&i915->uncore, &i915->gt_wa_list);
996 }
997 
998 static bool wa_list_verify(struct intel_uncore *uncore,
999 			   const struct i915_wa_list *wal,
1000 			   const char *from)
1001 {
1002 	struct i915_wa *wa;
1003 	unsigned int i;
1004 	bool ok = true;
1005 
1006 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1007 		ok &= wa_verify(wa,
1008 				intel_uncore_read(uncore, wa->reg),
1009 				wal->name, from);
1010 
1011 	return ok;
1012 }
1013 
1014 bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
1015 				 const char *from)
1016 {
1017 	return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
1018 }
1019 
1020 static void
1021 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1022 {
1023 	struct i915_wa wa = {
1024 		.reg = reg
1025 	};
1026 
1027 	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1028 		return;
1029 
1030 	wa.reg.reg |= flags;
1031 	_wa_add(wal, &wa);
1032 }
1033 
1034 static void
1035 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1036 {
1037 	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW);
1038 }
1039 
1040 static void gen9_whitelist_build(struct i915_wa_list *w)
1041 {
1042 	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1043 	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1044 
1045 	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1046 	whitelist_reg(w, GEN8_CS_CHICKEN1);
1047 
1048 	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1049 	whitelist_reg(w, GEN8_HDC_CHICKEN1);
1050 }
1051 
1052 static void skl_whitelist_build(struct intel_engine_cs *engine)
1053 {
1054 	struct i915_wa_list *w = &engine->whitelist;
1055 
1056 	if (engine->class != RENDER_CLASS)
1057 		return;
1058 
1059 	gen9_whitelist_build(w);
1060 
1061 	/* WaDisableLSQCROPERFforOCL:skl */
1062 	whitelist_reg(w, GEN8_L3SQCREG4);
1063 }
1064 
1065 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1066 {
1067 	if (engine->class != RENDER_CLASS)
1068 		return;
1069 
1070 	gen9_whitelist_build(&engine->whitelist);
1071 }
1072 
1073 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1074 {
1075 	struct i915_wa_list *w = &engine->whitelist;
1076 
1077 	if (engine->class != RENDER_CLASS)
1078 		return;
1079 
1080 	gen9_whitelist_build(w);
1081 
1082 	/* WaDisableLSQCROPERFforOCL:kbl */
1083 	whitelist_reg(w, GEN8_L3SQCREG4);
1084 }
1085 
1086 static void glk_whitelist_build(struct intel_engine_cs *engine)
1087 {
1088 	struct i915_wa_list *w = &engine->whitelist;
1089 
1090 	if (engine->class != RENDER_CLASS)
1091 		return;
1092 
1093 	gen9_whitelist_build(w);
1094 
1095 	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1096 	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1097 }
1098 
1099 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1100 {
1101 	if (engine->class != RENDER_CLASS)
1102 		return;
1103 
1104 	gen9_whitelist_build(&engine->whitelist);
1105 }
1106 
1107 static void cnl_whitelist_build(struct intel_engine_cs *engine)
1108 {
1109 	struct i915_wa_list *w = &engine->whitelist;
1110 
1111 	if (engine->class != RENDER_CLASS)
1112 		return;
1113 
1114 	/* WaEnablePreemptionGranularityControlByUMD:cnl */
1115 	whitelist_reg(w, GEN8_CS_CHICKEN1);
1116 }
1117 
1118 static void icl_whitelist_build(struct intel_engine_cs *engine)
1119 {
1120 	struct i915_wa_list *w = &engine->whitelist;
1121 
1122 	switch (engine->class) {
1123 	case RENDER_CLASS:
1124 		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
1125 		whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1126 
1127 		/* WaAllowUMDToModifySamplerMode:icl */
1128 		whitelist_reg(w, GEN10_SAMPLER_MODE);
1129 
1130 		/* WaEnableStateCacheRedirectToCS:icl */
1131 		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1132 		break;
1133 
1134 	case VIDEO_DECODE_CLASS:
1135 		/* hucStatusRegOffset */
1136 		whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1137 				  RING_FORCE_TO_NONPRIV_RD);
1138 		/* hucUKernelHdrInfoRegOffset */
1139 		whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1140 				  RING_FORCE_TO_NONPRIV_RD);
1141 		/* hucStatus2RegOffset */
1142 		whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1143 				  RING_FORCE_TO_NONPRIV_RD);
1144 		break;
1145 
1146 	default:
1147 		break;
1148 	}
1149 }
1150 
1151 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1152 {
1153 	struct drm_i915_private *i915 = engine->i915;
1154 	struct i915_wa_list *w = &engine->whitelist;
1155 
1156 	wa_init_start(w, "whitelist");
1157 
1158 	if (IS_GEN(i915, 11))
1159 		icl_whitelist_build(engine);
1160 	else if (IS_CANNONLAKE(i915))
1161 		cnl_whitelist_build(engine);
1162 	else if (IS_COFFEELAKE(i915))
1163 		cfl_whitelist_build(engine);
1164 	else if (IS_GEMINILAKE(i915))
1165 		glk_whitelist_build(engine);
1166 	else if (IS_KABYLAKE(i915))
1167 		kbl_whitelist_build(engine);
1168 	else if (IS_BROXTON(i915))
1169 		bxt_whitelist_build(engine);
1170 	else if (IS_SKYLAKE(i915))
1171 		skl_whitelist_build(engine);
1172 	else if (INTEL_GEN(i915) <= 8)
1173 		return;
1174 	else
1175 		MISSING_CASE(INTEL_GEN(i915));
1176 
1177 	wa_init_finish(w);
1178 }
1179 
1180 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1181 {
1182 	const struct i915_wa_list *wal = &engine->whitelist;
1183 	struct intel_uncore *uncore = engine->uncore;
1184 	const u32 base = engine->mmio_base;
1185 	struct i915_wa *wa;
1186 	unsigned int i;
1187 
1188 	if (!wal->count)
1189 		return;
1190 
1191 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1192 		intel_uncore_write(uncore,
1193 				   RING_FORCE_TO_NONPRIV(base, i),
1194 				   i915_mmio_reg_offset(wa->reg));
1195 
1196 	/* And clear the rest just in case of garbage */
1197 	for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1198 		intel_uncore_write(uncore,
1199 				   RING_FORCE_TO_NONPRIV(base, i),
1200 				   i915_mmio_reg_offset(RING_NOPID(base)));
1201 }
1202 
1203 static void
1204 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1205 {
1206 	struct drm_i915_private *i915 = engine->i915;
1207 
1208 	if (IS_GEN(i915, 11)) {
1209 		/* This is not an Wa. Enable for better image quality */
1210 		wa_masked_en(wal,
1211 			     _3D_CHICKEN3,
1212 			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1213 
1214 		/* WaPipelineFlushCoherentLines:icl */
1215 		ignore_wa_write_or(wal,
1216 				   GEN8_L3SQCREG4,
1217 				   GEN8_LQSC_FLUSH_COHERENT_LINES,
1218 				   GEN8_LQSC_FLUSH_COHERENT_LINES);
1219 
1220 		/*
1221 		 * Wa_1405543622:icl
1222 		 * Formerly known as WaGAPZPriorityScheme
1223 		 */
1224 		wa_write_or(wal,
1225 			    GEN8_GARBCNTL,
1226 			    GEN11_ARBITRATION_PRIO_ORDER_MASK);
1227 
1228 		/*
1229 		 * Wa_1604223664:icl
1230 		 * Formerly known as WaL3BankAddressHashing
1231 		 */
1232 		wa_write_masked_or(wal,
1233 				   GEN8_GARBCNTL,
1234 				   GEN11_HASH_CTRL_EXCL_MASK,
1235 				   GEN11_HASH_CTRL_EXCL_BIT0);
1236 		wa_write_masked_or(wal,
1237 				   GEN11_GLBLINVL,
1238 				   GEN11_BANK_HASH_ADDR_EXCL_MASK,
1239 				   GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1240 
1241 		/*
1242 		 * Wa_1405733216:icl
1243 		 * Formerly known as WaDisableCleanEvicts
1244 		 */
1245 		ignore_wa_write_or(wal,
1246 				   GEN8_L3SQCREG4,
1247 				   GEN11_LQSC_CLEAN_EVICT_DISABLE,
1248 				   GEN11_LQSC_CLEAN_EVICT_DISABLE);
1249 
1250 		/* WaForwardProgressSoftReset:icl */
1251 		wa_write_or(wal,
1252 			    GEN10_SCRATCH_LNCF2,
1253 			    PMFLUSHDONE_LNICRSDROP |
1254 			    PMFLUSH_GAPL3UNBLOCK |
1255 			    PMFLUSHDONE_LNEBLK);
1256 
1257 		/* Wa_1406609255:icl (pre-prod) */
1258 		if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1259 			wa_write_or(wal,
1260 				    GEN7_SARCHKMD,
1261 				    GEN7_DISABLE_DEMAND_PREFETCH |
1262 				    GEN7_DISABLE_SAMPLER_PREFETCH);
1263 	}
1264 
1265 	if (IS_GEN_RANGE(i915, 9, 11)) {
1266 		/* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1267 		wa_masked_en(wal,
1268 			     GEN7_FF_SLICE_CS_CHICKEN1,
1269 			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1270 	}
1271 
1272 	if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1273 		/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1274 		wa_write_or(wal,
1275 			    GEN8_GARBCNTL,
1276 			    GEN9_GAPS_TSV_CREDIT_DISABLE);
1277 	}
1278 
1279 	if (IS_BROXTON(i915)) {
1280 		/* WaDisablePooledEuLoadBalancingFix:bxt */
1281 		wa_masked_en(wal,
1282 			     FF_SLICE_CS_CHICKEN2,
1283 			     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1284 	}
1285 
1286 	if (IS_GEN(i915, 9)) {
1287 		/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1288 		wa_masked_en(wal,
1289 			     GEN9_CSFE_CHICKEN1_RCS,
1290 			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1291 
1292 		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1293 		wa_write_or(wal,
1294 			    BDW_SCRATCH1,
1295 			    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1296 
1297 		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1298 		if (IS_GEN9_LP(i915))
1299 			wa_write_masked_or(wal,
1300 					   GEN8_L3SQCREG1,
1301 					   L3_PRIO_CREDITS_MASK,
1302 					   L3_GENERAL_PRIO_CREDITS(62) |
1303 					   L3_HIGH_PRIO_CREDITS(2));
1304 
1305 		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1306 		wa_write_or(wal,
1307 			    GEN8_L3SQCREG4,
1308 			    GEN8_LQSC_FLUSH_COHERENT_LINES);
1309 	}
1310 }
1311 
1312 static void
1313 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1314 {
1315 	struct drm_i915_private *i915 = engine->i915;
1316 
1317 	/* WaKBLVECSSemaphoreWaitPoll:kbl */
1318 	if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1319 		wa_write(wal,
1320 			 RING_SEMA_WAIT_POLL(engine->mmio_base),
1321 			 1);
1322 	}
1323 }
1324 
1325 static void
1326 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1327 {
1328 	if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1329 		return;
1330 
1331 	if (engine->id == RCS0)
1332 		rcs_engine_wa_init(engine, wal);
1333 	else
1334 		xcs_engine_wa_init(engine, wal);
1335 }
1336 
1337 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1338 {
1339 	struct i915_wa_list *wal = &engine->wa_list;
1340 
1341 	if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
1342 		return;
1343 
1344 	wa_init_start(wal, engine->name);
1345 	engine_init_workarounds(engine, wal);
1346 	wa_init_finish(wal);
1347 }
1348 
1349 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1350 {
1351 	wa_list_apply(engine->uncore, &engine->wa_list);
1352 }
1353 
1354 static struct i915_vma *
1355 create_scratch(struct i915_address_space *vm, int count)
1356 {
1357 	struct drm_i915_gem_object *obj;
1358 	struct i915_vma *vma;
1359 	unsigned int size;
1360 	int err;
1361 
1362 	size = round_up(count * sizeof(u32), PAGE_SIZE);
1363 	obj = i915_gem_object_create_internal(vm->i915, size);
1364 	if (IS_ERR(obj))
1365 		return ERR_CAST(obj);
1366 
1367 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
1368 
1369 	vma = i915_vma_instance(obj, vm, NULL);
1370 	if (IS_ERR(vma)) {
1371 		err = PTR_ERR(vma);
1372 		goto err_obj;
1373 	}
1374 
1375 	err = i915_vma_pin(vma, 0, 0,
1376 			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
1377 	if (err)
1378 		goto err_obj;
1379 
1380 	return vma;
1381 
1382 err_obj:
1383 	i915_gem_object_put(obj);
1384 	return ERR_PTR(err);
1385 }
1386 
1387 static int
1388 wa_list_srm(struct i915_request *rq,
1389 	    const struct i915_wa_list *wal,
1390 	    struct i915_vma *vma)
1391 {
1392 	const struct i915_wa *wa;
1393 	unsigned int i;
1394 	u32 srm, *cs;
1395 
1396 	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1397 	if (INTEL_GEN(rq->i915) >= 8)
1398 		srm++;
1399 
1400 	cs = intel_ring_begin(rq, 4 * wal->count);
1401 	if (IS_ERR(cs))
1402 		return PTR_ERR(cs);
1403 
1404 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1405 		*cs++ = srm;
1406 		*cs++ = i915_mmio_reg_offset(wa->reg);
1407 		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
1408 		*cs++ = 0;
1409 	}
1410 	intel_ring_advance(rq, cs);
1411 
1412 	return 0;
1413 }
1414 
1415 static int engine_wa_list_verify(struct intel_context *ce,
1416 				 const struct i915_wa_list * const wal,
1417 				 const char *from)
1418 {
1419 	const struct i915_wa *wa;
1420 	struct i915_request *rq;
1421 	struct i915_vma *vma;
1422 	unsigned int i;
1423 	u32 *results;
1424 	int err;
1425 
1426 	if (!wal->count)
1427 		return 0;
1428 
1429 	vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count);
1430 	if (IS_ERR(vma))
1431 		return PTR_ERR(vma);
1432 
1433 	rq = intel_context_create_request(ce);
1434 	if (IS_ERR(rq)) {
1435 		err = PTR_ERR(rq);
1436 		goto err_vma;
1437 	}
1438 
1439 	err = wa_list_srm(rq, wal, vma);
1440 	if (err)
1441 		goto err_vma;
1442 
1443 	i915_request_add(rq);
1444 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1445 		err = -ETIME;
1446 		goto err_vma;
1447 	}
1448 
1449 	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1450 	if (IS_ERR(results)) {
1451 		err = PTR_ERR(results);
1452 		goto err_vma;
1453 	}
1454 
1455 	err = 0;
1456 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1457 		if (!wa_verify(wa, results[i], wal->name, from))
1458 			err = -ENXIO;
1459 
1460 	i915_gem_object_unpin_map(vma->obj);
1461 
1462 err_vma:
1463 	i915_vma_unpin(vma);
1464 	i915_vma_put(vma);
1465 	return err;
1466 }
1467 
1468 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
1469 				    const char *from)
1470 {
1471 	return engine_wa_list_verify(engine->kernel_context,
1472 				     &engine->wa_list,
1473 				     from);
1474 }
1475 
1476 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1477 #include "selftest_workarounds.c"
1478 #endif
1479