1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2018 Intel Corporation
5  */
6 
7 #include "i915_drv.h"
8 #include "intel_context.h"
9 #include "intel_workarounds.h"
10 
11 /**
12  * DOC: Hardware workarounds
13  *
14  * This file is intended as a central place to implement most [1]_ of the
15  * required workarounds for hardware to work as originally intended. They fall
16  * in five basic categories depending on how/when they are applied:
17  *
18  * - Workarounds that touch registers that are saved/restored to/from the HW
19  *   context image. The list is emitted (via Load Register Immediate commands)
20  *   everytime a new context is created.
21  * - GT workarounds. The list of these WAs is applied whenever these registers
22  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
23  * - Display workarounds. The list is applied during display clock-gating
24  *   initialization.
25  * - Workarounds that whitelist a privileged register, so that UMDs can manage
26  *   them directly. This is just a special case of a MMMIO workaround (as we
27  *   write the list of these to/be-whitelisted registers to some special HW
28  *   registers).
29  * - Workaround batchbuffers, that get executed automatically by the hardware
30  *   on every HW context restore.
31  *
32  * .. [1] Please notice that there are other WAs that, due to their nature,
33  *    cannot be applied from a central place. Those are peppered around the rest
34  *    of the code, as needed.
35  *
36  * .. [2] Technically, some registers are powercontext saved & restored, so they
37  *    survive a suspend/resume. In practice, writing them again is not too
38  *    costly and simplifies things. We can revisit this in the future.
39  *
40  * Layout
41  * ~~~~~~
42  *
43  * Keep things in this file ordered by WA type, as per the above (context, GT,
44  * display, register whitelist, batchbuffer). Then, inside each type, keep the
45  * following order:
46  *
47  * - Infrastructure functions and macros
48  * - WAs per platform in standard gen/chrono order
49  * - Public functions to init or apply the given workaround type.
50  */
51 
52 static void wa_init_start(struct i915_wa_list *wal, const char *name)
53 {
54 	wal->name = name;
55 }
56 
57 #define WA_LIST_CHUNK (1 << 4)
58 
59 static void wa_init_finish(struct i915_wa_list *wal)
60 {
61 	/* Trim unused entries. */
62 	if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
63 		struct i915_wa *list = kmemdup(wal->list,
64 					       wal->count * sizeof(*list),
65 					       GFP_KERNEL);
66 
67 		if (list) {
68 			kfree(wal->list);
69 			wal->list = list;
70 		}
71 	}
72 
73 	if (!wal->count)
74 		return;
75 
76 	DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
77 			 wal->wa_count, wal->name);
78 }
79 
80 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
81 {
82 	unsigned int addr = i915_mmio_reg_offset(wa->reg);
83 	unsigned int start = 0, end = wal->count;
84 	const unsigned int grow = WA_LIST_CHUNK;
85 	struct i915_wa *wa_;
86 
87 	GEM_BUG_ON(!is_power_of_2(grow));
88 
89 	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
90 		struct i915_wa *list;
91 
92 		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
93 				     GFP_KERNEL);
94 		if (!list) {
95 			DRM_ERROR("No space for workaround init!\n");
96 			return;
97 		}
98 
99 		if (wal->list)
100 			memcpy(list, wal->list, sizeof(*wa) * wal->count);
101 
102 		wal->list = list;
103 	}
104 
105 	while (start < end) {
106 		unsigned int mid = start + (end - start) / 2;
107 
108 		if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
109 			start = mid + 1;
110 		} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
111 			end = mid;
112 		} else {
113 			wa_ = &wal->list[mid];
114 
115 			if ((wa->mask & ~wa_->mask) == 0) {
116 				DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
117 					  i915_mmio_reg_offset(wa_->reg),
118 					  wa_->mask, wa_->val);
119 
120 				wa_->val &= ~wa->mask;
121 			}
122 
123 			wal->wa_count++;
124 			wa_->val |= wa->val;
125 			wa_->mask |= wa->mask;
126 			wa_->read |= wa->read;
127 			return;
128 		}
129 	}
130 
131 	wal->wa_count++;
132 	wa_ = &wal->list[wal->count++];
133 	*wa_ = *wa;
134 
135 	while (wa_-- > wal->list) {
136 		GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
137 			   i915_mmio_reg_offset(wa_[1].reg));
138 		if (i915_mmio_reg_offset(wa_[1].reg) >
139 		    i915_mmio_reg_offset(wa_[0].reg))
140 			break;
141 
142 		swap(wa_[1], wa_[0]);
143 	}
144 }
145 
146 static void
147 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
148 		   u32 val)
149 {
150 	struct i915_wa wa = {
151 		.reg  = reg,
152 		.mask = mask,
153 		.val  = val,
154 		.read = mask,
155 	};
156 
157 	_wa_add(wal, &wa);
158 }
159 
160 static void
161 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
162 {
163 	wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
164 }
165 
166 static void
167 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
168 {
169 	wa_write_masked_or(wal, reg, ~0, val);
170 }
171 
172 static void
173 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
174 {
175 	wa_write_masked_or(wal, reg, val, val);
176 }
177 
178 static void
179 ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
180 {
181 	struct i915_wa wa = {
182 		.reg  = reg,
183 		.mask = mask,
184 		.val  = val,
185 		/* Bonkers HW, skip verifying */
186 	};
187 
188 	_wa_add(wal, &wa);
189 }
190 
191 #define WA_SET_BIT_MASKED(addr, mask) \
192 	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
193 
194 #define WA_CLR_BIT_MASKED(addr, mask) \
195 	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
196 
197 #define WA_SET_FIELD_MASKED(addr, mask, value) \
198 	wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
199 
200 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
201 				      struct i915_wa_list *wal)
202 {
203 	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
204 
205 	/* WaDisableAsyncFlipPerfMode:bdw,chv */
206 	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
207 
208 	/* WaDisablePartialInstShootdown:bdw,chv */
209 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
210 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
211 
212 	/* Use Force Non-Coherent whenever executing a 3D context. This is a
213 	 * workaround for for a possible hang in the unlikely event a TLB
214 	 * invalidation occurs during a PSD flush.
215 	 */
216 	/* WaForceEnableNonCoherent:bdw,chv */
217 	/* WaHdcDisableFetchWhenMasked:bdw,chv */
218 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
219 			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
220 			  HDC_FORCE_NON_COHERENT);
221 
222 	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
223 	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
224 	 *  polygons in the same 8x4 pixel/sample area to be processed without
225 	 *  stalling waiting for the earlier ones to write to Hierarchical Z
226 	 *  buffer."
227 	 *
228 	 * This optimization is off by default for BDW and CHV; turn it on.
229 	 */
230 	WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
231 
232 	/* Wa4x4STCOptimizationDisable:bdw,chv */
233 	WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
234 
235 	/*
236 	 * BSpec recommends 8x4 when MSAA is used,
237 	 * however in practice 16x4 seems fastest.
238 	 *
239 	 * Note that PS/WM thread counts depend on the WIZ hashing
240 	 * disable bit, which we don't touch here, but it's good
241 	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
242 	 */
243 	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
244 			    GEN6_WIZ_HASHING_MASK,
245 			    GEN6_WIZ_HASHING_16x4);
246 }
247 
248 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
249 				     struct i915_wa_list *wal)
250 {
251 	struct drm_i915_private *i915 = engine->i915;
252 
253 	gen8_ctx_workarounds_init(engine, wal);
254 
255 	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
256 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
257 
258 	/* WaDisableDopClockGating:bdw
259 	 *
260 	 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
261 	 * to disable EUTC clock gating.
262 	 */
263 	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
264 			  DOP_CLOCK_GATING_DISABLE);
265 
266 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
267 			  GEN8_SAMPLER_POWER_BYPASS_DIS);
268 
269 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
270 			  /* WaForceContextSaveRestoreNonCoherent:bdw */
271 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
272 			  /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
273 			  (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
274 }
275 
276 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
277 				     struct i915_wa_list *wal)
278 {
279 	gen8_ctx_workarounds_init(engine, wal);
280 
281 	/* WaDisableThreadStallDopClockGating:chv */
282 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
283 
284 	/* Improve HiZ throughput on CHV. */
285 	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
286 }
287 
288 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
289 				      struct i915_wa_list *wal)
290 {
291 	struct drm_i915_private *i915 = engine->i915;
292 
293 	if (HAS_LLC(i915)) {
294 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
295 		 *
296 		 * Must match Display Engine. See
297 		 * WaCompressedResourceDisplayNewHashMode.
298 		 */
299 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
300 				  GEN9_PBE_COMPRESSED_HASH_SELECTION);
301 		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
302 				  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
303 	}
304 
305 	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
306 	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
307 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
308 			  FLOW_CONTROL_ENABLE |
309 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
310 
311 	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
312 	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
313 	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
314 			  GEN9_ENABLE_YV12_BUGFIX |
315 			  GEN9_ENABLE_GPGPU_PREEMPTION);
316 
317 	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
318 	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
319 	WA_SET_BIT_MASKED(CACHE_MODE_1,
320 			  GEN8_4x4_STC_OPTIMIZATION_DISABLE |
321 			  GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
322 
323 	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
324 	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
325 			  GEN9_CCS_TLB_PREFETCH_ENABLE);
326 
327 	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
328 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
329 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
330 			  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
331 
332 	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
333 	 * both tied to WaForceContextSaveRestoreNonCoherent
334 	 * in some hsds for skl. We keep the tie for all gen9. The
335 	 * documentation is a bit hazy and so we want to get common behaviour,
336 	 * even though there is no clear evidence we would need both on kbl/bxt.
337 	 * This area has been source of system hangs so we play it safe
338 	 * and mimic the skl regardless of what bspec says.
339 	 *
340 	 * Use Force Non-Coherent whenever executing a 3D context. This
341 	 * is a workaround for a possible hang in the unlikely event
342 	 * a TLB invalidation occurs during a PSD flush.
343 	 */
344 
345 	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
346 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
347 			  HDC_FORCE_NON_COHERENT);
348 
349 	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
350 	if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
351 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
352 				  GEN8_SAMPLER_POWER_BYPASS_DIS);
353 
354 	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
355 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
356 
357 	/*
358 	 * Supporting preemption with fine-granularity requires changes in the
359 	 * batch buffer programming. Since we can't break old userspace, we
360 	 * need to set our default preemption level to safe value. Userspace is
361 	 * still able to use more fine-grained preemption levels, since in
362 	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
363 	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
364 	 * not real HW workarounds, but merely a way to start using preemption
365 	 * while maintaining old contract with userspace.
366 	 */
367 
368 	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
369 	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
370 
371 	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
372 	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
373 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
374 			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
375 
376 	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
377 	if (IS_GEN9_LP(i915))
378 		WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
379 }
380 
381 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
382 				struct i915_wa_list *wal)
383 {
384 	struct drm_i915_private *i915 = engine->i915;
385 	u8 vals[3] = { 0, 0, 0 };
386 	unsigned int i;
387 
388 	for (i = 0; i < 3; i++) {
389 		u8 ss;
390 
391 		/*
392 		 * Only consider slices where one, and only one, subslice has 7
393 		 * EUs
394 		 */
395 		if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
396 			continue;
397 
398 		/*
399 		 * subslice_7eu[i] != 0 (because of the check above) and
400 		 * ss_max == 4 (maximum number of subslices possible per slice)
401 		 *
402 		 * ->    0 <= ss <= 3;
403 		 */
404 		ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
405 		vals[i] = 3 - ss;
406 	}
407 
408 	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
409 		return;
410 
411 	/* Tune IZ hashing. See intel_device_info_runtime_init() */
412 	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
413 			    GEN9_IZ_HASHING_MASK(2) |
414 			    GEN9_IZ_HASHING_MASK(1) |
415 			    GEN9_IZ_HASHING_MASK(0),
416 			    GEN9_IZ_HASHING(2, vals[2]) |
417 			    GEN9_IZ_HASHING(1, vals[1]) |
418 			    GEN9_IZ_HASHING(0, vals[0]));
419 }
420 
421 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
422 				     struct i915_wa_list *wal)
423 {
424 	gen9_ctx_workarounds_init(engine, wal);
425 	skl_tune_iz_hashing(engine, wal);
426 }
427 
428 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
429 				     struct i915_wa_list *wal)
430 {
431 	gen9_ctx_workarounds_init(engine, wal);
432 
433 	/* WaDisableThreadStallDopClockGating:bxt */
434 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
435 			  STALL_DOP_GATING_DISABLE);
436 
437 	/* WaToEnableHwFixForPushConstHWBug:bxt */
438 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
439 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
440 }
441 
442 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
443 				     struct i915_wa_list *wal)
444 {
445 	struct drm_i915_private *i915 = engine->i915;
446 
447 	gen9_ctx_workarounds_init(engine, wal);
448 
449 	/* WaToEnableHwFixForPushConstHWBug:kbl */
450 	if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
451 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
452 				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
453 
454 	/* WaDisableSbeCacheDispatchPortSharing:kbl */
455 	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
456 			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
457 }
458 
459 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
460 				     struct i915_wa_list *wal)
461 {
462 	gen9_ctx_workarounds_init(engine, wal);
463 
464 	/* WaToEnableHwFixForPushConstHWBug:glk */
465 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
466 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
467 }
468 
469 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
470 				     struct i915_wa_list *wal)
471 {
472 	gen9_ctx_workarounds_init(engine, wal);
473 
474 	/* WaToEnableHwFixForPushConstHWBug:cfl */
475 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
476 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
477 
478 	/* WaDisableSbeCacheDispatchPortSharing:cfl */
479 	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
480 			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
481 }
482 
483 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
484 				     struct i915_wa_list *wal)
485 {
486 	struct drm_i915_private *i915 = engine->i915;
487 
488 	/* WaForceContextSaveRestoreNonCoherent:cnl */
489 	WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
490 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
491 
492 	/* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
493 	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
494 		WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
495 
496 	/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
497 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
498 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
499 
500 	/* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
501 	if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
502 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
503 				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
504 
505 	/* WaPushConstantDereferenceHoldDisable:cnl */
506 	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
507 
508 	/* FtrEnableFastAnisoL1BankingFix:cnl */
509 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
510 
511 	/* WaDisable3DMidCmdPreemption:cnl */
512 	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
513 
514 	/* WaDisableGPGPUMidCmdPreemption:cnl */
515 	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
516 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
517 			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
518 
519 	/* WaDisableEarlyEOT:cnl */
520 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
521 }
522 
523 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
524 				     struct i915_wa_list *wal)
525 {
526 	struct drm_i915_private *i915 = engine->i915;
527 
528 	/* WaDisableBankHangMode:icl */
529 	wa_write(wal,
530 		 GEN8_L3CNTLREG,
531 		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
532 		 GEN8_ERRDETBCTRL);
533 
534 	/* WaDisableBankHangMode:icl */
535 	wa_write(wal,
536 		 GEN8_L3CNTLREG,
537 		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
538 		 GEN8_ERRDETBCTRL);
539 
540 	/* Wa_1604370585:icl (pre-prod)
541 	 * Formerly known as WaPushConstantDereferenceHoldDisable
542 	 */
543 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
544 		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
545 				  PUSH_CONSTANT_DEREF_DISABLE);
546 
547 	/* WaForceEnableNonCoherent:icl
548 	 * This is not the same workaround as in early Gen9 platforms, where
549 	 * lacking this could cause system hangs, but coherency performance
550 	 * overhead is high and only a few compute workloads really need it
551 	 * (the register is whitelisted in hardware now, so UMDs can opt in
552 	 * for coherency if they have a good reason).
553 	 */
554 	WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
555 
556 	/* Wa_2006611047:icl (pre-prod)
557 	 * Formerly known as WaDisableImprovedTdlClkGating
558 	 */
559 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
560 		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
561 				  GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
562 
563 	/* Wa_2006665173:icl (pre-prod) */
564 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
565 		WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
566 				  GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
567 
568 	/* WaEnableFloatBlendOptimization:icl */
569 	wa_write_masked_or(wal,
570 			   GEN10_CACHE_MODE_SS,
571 			   0, /* write-only, so skip validation */
572 			   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
573 
574 	/* WaDisableGPGPUMidThreadPreemption:icl */
575 	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
576 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
577 			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
578 
579 	/* allow headerless messages for preemptible GPGPU context */
580 	WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
581 			  GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
582 }
583 
584 static void
585 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
586 			   struct i915_wa_list *wal,
587 			   const char *name)
588 {
589 	struct drm_i915_private *i915 = engine->i915;
590 
591 	if (engine->class != RENDER_CLASS)
592 		return;
593 
594 	wa_init_start(wal, name);
595 
596 	if (IS_GEN(i915, 11))
597 		icl_ctx_workarounds_init(engine, wal);
598 	else if (IS_CANNONLAKE(i915))
599 		cnl_ctx_workarounds_init(engine, wal);
600 	else if (IS_COFFEELAKE(i915))
601 		cfl_ctx_workarounds_init(engine, wal);
602 	else if (IS_GEMINILAKE(i915))
603 		glk_ctx_workarounds_init(engine, wal);
604 	else if (IS_KABYLAKE(i915))
605 		kbl_ctx_workarounds_init(engine, wal);
606 	else if (IS_BROXTON(i915))
607 		bxt_ctx_workarounds_init(engine, wal);
608 	else if (IS_SKYLAKE(i915))
609 		skl_ctx_workarounds_init(engine, wal);
610 	else if (IS_CHERRYVIEW(i915))
611 		chv_ctx_workarounds_init(engine, wal);
612 	else if (IS_BROADWELL(i915))
613 		bdw_ctx_workarounds_init(engine, wal);
614 	else if (INTEL_GEN(i915) < 8)
615 		return;
616 	else
617 		MISSING_CASE(INTEL_GEN(i915));
618 
619 	wa_init_finish(wal);
620 }
621 
622 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
623 {
624 	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
625 }
626 
627 int intel_engine_emit_ctx_wa(struct i915_request *rq)
628 {
629 	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
630 	struct i915_wa *wa;
631 	unsigned int i;
632 	u32 *cs;
633 	int ret;
634 
635 	if (wal->count == 0)
636 		return 0;
637 
638 	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
639 	if (ret)
640 		return ret;
641 
642 	cs = intel_ring_begin(rq, (wal->count * 2 + 2));
643 	if (IS_ERR(cs))
644 		return PTR_ERR(cs);
645 
646 	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
647 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
648 		*cs++ = i915_mmio_reg_offset(wa->reg);
649 		*cs++ = wa->val;
650 	}
651 	*cs++ = MI_NOOP;
652 
653 	intel_ring_advance(rq, cs);
654 
655 	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
656 	if (ret)
657 		return ret;
658 
659 	return 0;
660 }
661 
662 static void
663 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
664 {
665 	/* WaDisableKillLogic:bxt,skl,kbl */
666 	if (!IS_COFFEELAKE(i915))
667 		wa_write_or(wal,
668 			    GAM_ECOCHK,
669 			    ECOCHK_DIS_TLB);
670 
671 	if (HAS_LLC(i915)) {
672 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
673 		 *
674 		 * Must match Display Engine. See
675 		 * WaCompressedResourceDisplayNewHashMode.
676 		 */
677 		wa_write_or(wal,
678 			    MMCD_MISC_CTRL,
679 			    MMCD_PCLA | MMCD_HOTSPOT_EN);
680 	}
681 
682 	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
683 	wa_write_or(wal,
684 		    GAM_ECOCHK,
685 		    BDW_DISABLE_HDC_INVALIDATION);
686 }
687 
688 static void
689 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
690 {
691 	gen9_gt_workarounds_init(i915, wal);
692 
693 	/* WaDisableGafsUnitClkGating:skl */
694 	wa_write_or(wal,
695 		    GEN7_UCGCTL4,
696 		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
697 
698 	/* WaInPlaceDecompressionHang:skl */
699 	if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
700 		wa_write_or(wal,
701 			    GEN9_GAMT_ECO_REG_RW_IA,
702 			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
703 }
704 
705 static void
706 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
707 {
708 	gen9_gt_workarounds_init(i915, wal);
709 
710 	/* WaInPlaceDecompressionHang:bxt */
711 	wa_write_or(wal,
712 		    GEN9_GAMT_ECO_REG_RW_IA,
713 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
714 }
715 
716 static void
717 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
718 {
719 	gen9_gt_workarounds_init(i915, wal);
720 
721 	/* WaDisableDynamicCreditSharing:kbl */
722 	if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
723 		wa_write_or(wal,
724 			    GAMT_CHKN_BIT_REG,
725 			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
726 
727 	/* WaDisableGafsUnitClkGating:kbl */
728 	wa_write_or(wal,
729 		    GEN7_UCGCTL4,
730 		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
731 
732 	/* WaInPlaceDecompressionHang:kbl */
733 	wa_write_or(wal,
734 		    GEN9_GAMT_ECO_REG_RW_IA,
735 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
736 }
737 
738 static void
739 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
740 {
741 	gen9_gt_workarounds_init(i915, wal);
742 }
743 
744 static void
745 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
746 {
747 	gen9_gt_workarounds_init(i915, wal);
748 
749 	/* WaDisableGafsUnitClkGating:cfl */
750 	wa_write_or(wal,
751 		    GEN7_UCGCTL4,
752 		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
753 
754 	/* WaInPlaceDecompressionHang:cfl */
755 	wa_write_or(wal,
756 		    GEN9_GAMT_ECO_REG_RW_IA,
757 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
758 }
759 
760 static void
761 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
762 {
763 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
764 	u32 mcr_slice_subslice_mask;
765 
766 	/*
767 	 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
768 	 * L3Banks could be fused off in single slice scenario. If that is
769 	 * the case, we might need to program MCR select to a valid L3Bank
770 	 * by default, to make sure we correctly read certain registers
771 	 * later on (in the range 0xB100 - 0xB3FF).
772 	 * This might be incompatible with
773 	 * WaProgramMgsrForCorrectSliceSpecificMmioReads.
774 	 * Fortunately, this should not happen in production hardware, so
775 	 * we only assert that this is the case (instead of implementing
776 	 * something more complex that requires checking the range of every
777 	 * MMIO read).
778 	 */
779 	if (INTEL_GEN(i915) >= 10 &&
780 	    is_power_of_2(sseu->slice_mask)) {
781 		/*
782 		 * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
783 		 * enabled subslice, no need to redirect MCR packet
784 		 */
785 		u32 slice = fls(sseu->slice_mask);
786 		u32 fuse3 =
787 			intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
788 		u8 ss_mask = sseu->subslice_mask[slice];
789 
790 		u8 enabled_mask = (ss_mask | ss_mask >>
791 				   GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
792 		u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
793 
794 		/*
795 		 * Production silicon should have matched L3Bank and
796 		 * subslice enabled
797 		 */
798 		WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
799 	}
800 
801 	if (INTEL_GEN(i915) >= 11)
802 		mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
803 					  GEN11_MCR_SUBSLICE_MASK;
804 	else
805 		mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
806 					  GEN8_MCR_SUBSLICE_MASK;
807 	/*
808 	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
809 	 * Before any MMIO read into slice/subslice specific registers, MCR
810 	 * packet control register needs to be programmed to point to any
811 	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
812 	 * This means each subsequent MMIO read will be forwarded to an
813 	 * specific s/ss combination, but this is OK since these registers
814 	 * are consistent across s/ss in almost all cases. In the rare
815 	 * occasions, such as INSTDONE, where this value is dependent
816 	 * on s/ss combo, the read should be done with read_subslice_reg.
817 	 */
818 	wa_write_masked_or(wal,
819 			   GEN8_MCR_SELECTOR,
820 			   mcr_slice_subslice_mask,
821 			   intel_calculate_mcr_s_ss_select(i915));
822 }
823 
824 static void
825 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
826 {
827 	wa_init_mcr(i915, wal);
828 
829 	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
830 	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
831 		wa_write_or(wal,
832 			    GAMT_CHKN_BIT_REG,
833 			    GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
834 
835 	/* WaInPlaceDecompressionHang:cnl */
836 	wa_write_or(wal,
837 		    GEN9_GAMT_ECO_REG_RW_IA,
838 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
839 }
840 
841 static void
842 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
843 {
844 	wa_init_mcr(i915, wal);
845 
846 	/* WaInPlaceDecompressionHang:icl */
847 	wa_write_or(wal,
848 		    GEN9_GAMT_ECO_REG_RW_IA,
849 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
850 
851 	/* WaModifyGamTlbPartitioning:icl */
852 	wa_write_masked_or(wal,
853 			   GEN11_GACB_PERF_CTRL,
854 			   GEN11_HASH_CTRL_MASK,
855 			   GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
856 
857 	/* Wa_1405766107:icl
858 	 * Formerly known as WaCL2SFHalfMaxAlloc
859 	 */
860 	wa_write_or(wal,
861 		    GEN11_LSN_UNSLCVC,
862 		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
863 		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
864 
865 	/* Wa_220166154:icl
866 	 * Formerly known as WaDisCtxReload
867 	 */
868 	wa_write_or(wal,
869 		    GEN8_GAMW_ECO_DEV_RW_IA,
870 		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
871 
872 	/* Wa_1405779004:icl (pre-prod) */
873 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
874 		wa_write_or(wal,
875 			    SLICE_UNIT_LEVEL_CLKGATE,
876 			    MSCUNIT_CLKGATE_DIS);
877 
878 	/* Wa_1406680159:icl */
879 	wa_write_or(wal,
880 		    SUBSLICE_UNIT_LEVEL_CLKGATE,
881 		    GWUNIT_CLKGATE_DIS);
882 
883 	/* Wa_1406838659:icl (pre-prod) */
884 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
885 		wa_write_or(wal,
886 			    INF_UNIT_LEVEL_CLKGATE,
887 			    CGPSF_CLKGATE_DIS);
888 
889 	/* Wa_1406463099:icl
890 	 * Formerly known as WaGamTlbPendError
891 	 */
892 	wa_write_or(wal,
893 		    GAMT_CHKN_BIT_REG,
894 		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
895 }
896 
897 static void
898 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
899 {
900 	if (IS_GEN(i915, 11))
901 		icl_gt_workarounds_init(i915, wal);
902 	else if (IS_CANNONLAKE(i915))
903 		cnl_gt_workarounds_init(i915, wal);
904 	else if (IS_COFFEELAKE(i915))
905 		cfl_gt_workarounds_init(i915, wal);
906 	else if (IS_GEMINILAKE(i915))
907 		glk_gt_workarounds_init(i915, wal);
908 	else if (IS_KABYLAKE(i915))
909 		kbl_gt_workarounds_init(i915, wal);
910 	else if (IS_BROXTON(i915))
911 		bxt_gt_workarounds_init(i915, wal);
912 	else if (IS_SKYLAKE(i915))
913 		skl_gt_workarounds_init(i915, wal);
914 	else if (INTEL_GEN(i915) <= 8)
915 		return;
916 	else
917 		MISSING_CASE(INTEL_GEN(i915));
918 }
919 
920 void intel_gt_init_workarounds(struct drm_i915_private *i915)
921 {
922 	struct i915_wa_list *wal = &i915->gt_wa_list;
923 
924 	wa_init_start(wal, "GT");
925 	gt_init_workarounds(i915, wal);
926 	wa_init_finish(wal);
927 }
928 
929 static enum forcewake_domains
930 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
931 {
932 	enum forcewake_domains fw = 0;
933 	struct i915_wa *wa;
934 	unsigned int i;
935 
936 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
937 		fw |= intel_uncore_forcewake_for_reg(uncore,
938 						     wa->reg,
939 						     FW_REG_READ |
940 						     FW_REG_WRITE);
941 
942 	return fw;
943 }
944 
945 static bool
946 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
947 {
948 	if ((cur ^ wa->val) & wa->read) {
949 		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
950 			  name, from, i915_mmio_reg_offset(wa->reg),
951 			  cur, cur & wa->read,
952 			  wa->val, wa->mask);
953 
954 		return false;
955 	}
956 
957 	return true;
958 }
959 
960 static void
961 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
962 {
963 	enum forcewake_domains fw;
964 	unsigned long flags;
965 	struct i915_wa *wa;
966 	unsigned int i;
967 
968 	if (!wal->count)
969 		return;
970 
971 	fw = wal_get_fw_for_rmw(uncore, wal);
972 
973 	spin_lock_irqsave(&uncore->lock, flags);
974 	intel_uncore_forcewake_get__locked(uncore, fw);
975 
976 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
977 		intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
978 		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
979 			wa_verify(wa,
980 				  intel_uncore_read_fw(uncore, wa->reg),
981 				  wal->name, "application");
982 	}
983 
984 	intel_uncore_forcewake_put__locked(uncore, fw);
985 	spin_unlock_irqrestore(&uncore->lock, flags);
986 }
987 
988 void intel_gt_apply_workarounds(struct drm_i915_private *i915)
989 {
990 	wa_list_apply(&i915->uncore, &i915->gt_wa_list);
991 }
992 
993 static bool wa_list_verify(struct intel_uncore *uncore,
994 			   const struct i915_wa_list *wal,
995 			   const char *from)
996 {
997 	struct i915_wa *wa;
998 	unsigned int i;
999 	bool ok = true;
1000 
1001 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1002 		ok &= wa_verify(wa,
1003 				intel_uncore_read(uncore, wa->reg),
1004 				wal->name, from);
1005 
1006 	return ok;
1007 }
1008 
1009 bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
1010 				 const char *from)
1011 {
1012 	return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
1013 }
1014 
1015 static void
1016 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1017 {
1018 	struct i915_wa wa = {
1019 		.reg = reg
1020 	};
1021 
1022 	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1023 		return;
1024 
1025 	wa.reg.reg |= flags;
1026 	_wa_add(wal, &wa);
1027 }
1028 
1029 static void
1030 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1031 {
1032 	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW);
1033 }
1034 
1035 static void gen9_whitelist_build(struct i915_wa_list *w)
1036 {
1037 	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1038 	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1039 
1040 	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1041 	whitelist_reg(w, GEN8_CS_CHICKEN1);
1042 
1043 	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1044 	whitelist_reg(w, GEN8_HDC_CHICKEN1);
1045 }
1046 
1047 static void skl_whitelist_build(struct intel_engine_cs *engine)
1048 {
1049 	struct i915_wa_list *w = &engine->whitelist;
1050 
1051 	if (engine->class != RENDER_CLASS)
1052 		return;
1053 
1054 	gen9_whitelist_build(w);
1055 
1056 	/* WaDisableLSQCROPERFforOCL:skl */
1057 	whitelist_reg(w, GEN8_L3SQCREG4);
1058 }
1059 
1060 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1061 {
1062 	if (engine->class != RENDER_CLASS)
1063 		return;
1064 
1065 	gen9_whitelist_build(&engine->whitelist);
1066 }
1067 
1068 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1069 {
1070 	struct i915_wa_list *w = &engine->whitelist;
1071 
1072 	if (engine->class != RENDER_CLASS)
1073 		return;
1074 
1075 	gen9_whitelist_build(w);
1076 
1077 	/* WaDisableLSQCROPERFforOCL:kbl */
1078 	whitelist_reg(w, GEN8_L3SQCREG4);
1079 }
1080 
1081 static void glk_whitelist_build(struct intel_engine_cs *engine)
1082 {
1083 	struct i915_wa_list *w = &engine->whitelist;
1084 
1085 	if (engine->class != RENDER_CLASS)
1086 		return;
1087 
1088 	gen9_whitelist_build(w);
1089 
1090 	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1091 	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1092 }
1093 
1094 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1095 {
1096 	struct i915_wa_list *w = &engine->whitelist;
1097 
1098 	if (engine->class != RENDER_CLASS)
1099 		return;
1100 
1101 	gen9_whitelist_build(w);
1102 
1103 	/*
1104 	 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1105 	 *
1106 	 * This covers 4 register which are next to one another :
1107 	 *   - PS_INVOCATION_COUNT
1108 	 *   - PS_INVOCATION_COUNT_UDW
1109 	 *   - PS_DEPTH_COUNT
1110 	 *   - PS_DEPTH_COUNT_UDW
1111 	 */
1112 	whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1113 			  RING_FORCE_TO_NONPRIV_RD |
1114 			  RING_FORCE_TO_NONPRIV_RANGE_4);
1115 }
1116 
1117 static void cnl_whitelist_build(struct intel_engine_cs *engine)
1118 {
1119 	struct i915_wa_list *w = &engine->whitelist;
1120 
1121 	if (engine->class != RENDER_CLASS)
1122 		return;
1123 
1124 	/* WaEnablePreemptionGranularityControlByUMD:cnl */
1125 	whitelist_reg(w, GEN8_CS_CHICKEN1);
1126 }
1127 
1128 static void icl_whitelist_build(struct intel_engine_cs *engine)
1129 {
1130 	struct i915_wa_list *w = &engine->whitelist;
1131 
1132 	switch (engine->class) {
1133 	case RENDER_CLASS:
1134 		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
1135 		whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1136 
1137 		/* WaAllowUMDToModifySamplerMode:icl */
1138 		whitelist_reg(w, GEN10_SAMPLER_MODE);
1139 
1140 		/* WaEnableStateCacheRedirectToCS:icl */
1141 		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1142 
1143 		/*
1144 		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1145 		 *
1146 		 * This covers 4 register which are next to one another :
1147 		 *   - PS_INVOCATION_COUNT
1148 		 *   - PS_INVOCATION_COUNT_UDW
1149 		 *   - PS_DEPTH_COUNT
1150 		 *   - PS_DEPTH_COUNT_UDW
1151 		 */
1152 		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1153 				  RING_FORCE_TO_NONPRIV_RD |
1154 				  RING_FORCE_TO_NONPRIV_RANGE_4);
1155 		break;
1156 
1157 	case VIDEO_DECODE_CLASS:
1158 		/* hucStatusRegOffset */
1159 		whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1160 				  RING_FORCE_TO_NONPRIV_RD);
1161 		/* hucUKernelHdrInfoRegOffset */
1162 		whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1163 				  RING_FORCE_TO_NONPRIV_RD);
1164 		/* hucStatus2RegOffset */
1165 		whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1166 				  RING_FORCE_TO_NONPRIV_RD);
1167 		break;
1168 
1169 	default:
1170 		break;
1171 	}
1172 }
1173 
1174 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1175 {
1176 	struct drm_i915_private *i915 = engine->i915;
1177 	struct i915_wa_list *w = &engine->whitelist;
1178 
1179 	wa_init_start(w, "whitelist");
1180 
1181 	if (IS_GEN(i915, 11))
1182 		icl_whitelist_build(engine);
1183 	else if (IS_CANNONLAKE(i915))
1184 		cnl_whitelist_build(engine);
1185 	else if (IS_COFFEELAKE(i915))
1186 		cfl_whitelist_build(engine);
1187 	else if (IS_GEMINILAKE(i915))
1188 		glk_whitelist_build(engine);
1189 	else if (IS_KABYLAKE(i915))
1190 		kbl_whitelist_build(engine);
1191 	else if (IS_BROXTON(i915))
1192 		bxt_whitelist_build(engine);
1193 	else if (IS_SKYLAKE(i915))
1194 		skl_whitelist_build(engine);
1195 	else if (INTEL_GEN(i915) <= 8)
1196 		return;
1197 	else
1198 		MISSING_CASE(INTEL_GEN(i915));
1199 
1200 	wa_init_finish(w);
1201 }
1202 
1203 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1204 {
1205 	const struct i915_wa_list *wal = &engine->whitelist;
1206 	struct intel_uncore *uncore = engine->uncore;
1207 	const u32 base = engine->mmio_base;
1208 	struct i915_wa *wa;
1209 	unsigned int i;
1210 
1211 	if (!wal->count)
1212 		return;
1213 
1214 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1215 		intel_uncore_write(uncore,
1216 				   RING_FORCE_TO_NONPRIV(base, i),
1217 				   i915_mmio_reg_offset(wa->reg));
1218 
1219 	/* And clear the rest just in case of garbage */
1220 	for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1221 		intel_uncore_write(uncore,
1222 				   RING_FORCE_TO_NONPRIV(base, i),
1223 				   i915_mmio_reg_offset(RING_NOPID(base)));
1224 }
1225 
1226 static void
1227 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1228 {
1229 	struct drm_i915_private *i915 = engine->i915;
1230 
1231 	if (IS_GEN(i915, 11)) {
1232 		/* This is not an Wa. Enable for better image quality */
1233 		wa_masked_en(wal,
1234 			     _3D_CHICKEN3,
1235 			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1236 
1237 		/* WaPipelineFlushCoherentLines:icl */
1238 		ignore_wa_write_or(wal,
1239 				   GEN8_L3SQCREG4,
1240 				   GEN8_LQSC_FLUSH_COHERENT_LINES,
1241 				   GEN8_LQSC_FLUSH_COHERENT_LINES);
1242 
1243 		/*
1244 		 * Wa_1405543622:icl
1245 		 * Formerly known as WaGAPZPriorityScheme
1246 		 */
1247 		wa_write_or(wal,
1248 			    GEN8_GARBCNTL,
1249 			    GEN11_ARBITRATION_PRIO_ORDER_MASK);
1250 
1251 		/*
1252 		 * Wa_1604223664:icl
1253 		 * Formerly known as WaL3BankAddressHashing
1254 		 */
1255 		wa_write_masked_or(wal,
1256 				   GEN8_GARBCNTL,
1257 				   GEN11_HASH_CTRL_EXCL_MASK,
1258 				   GEN11_HASH_CTRL_EXCL_BIT0);
1259 		wa_write_masked_or(wal,
1260 				   GEN11_GLBLINVL,
1261 				   GEN11_BANK_HASH_ADDR_EXCL_MASK,
1262 				   GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1263 
1264 		/*
1265 		 * Wa_1405733216:icl
1266 		 * Formerly known as WaDisableCleanEvicts
1267 		 */
1268 		ignore_wa_write_or(wal,
1269 				   GEN8_L3SQCREG4,
1270 				   GEN11_LQSC_CLEAN_EVICT_DISABLE,
1271 				   GEN11_LQSC_CLEAN_EVICT_DISABLE);
1272 
1273 		/* WaForwardProgressSoftReset:icl */
1274 		wa_write_or(wal,
1275 			    GEN10_SCRATCH_LNCF2,
1276 			    PMFLUSHDONE_LNICRSDROP |
1277 			    PMFLUSH_GAPL3UNBLOCK |
1278 			    PMFLUSHDONE_LNEBLK);
1279 
1280 		/* Wa_1406609255:icl (pre-prod) */
1281 		if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1282 			wa_write_or(wal,
1283 				    GEN7_SARCHKMD,
1284 				    GEN7_DISABLE_DEMAND_PREFETCH);
1285 
1286 		/* Wa_1606682166:icl */
1287 		wa_write_or(wal,
1288 			    GEN7_SARCHKMD,
1289 			    GEN7_DISABLE_SAMPLER_PREFETCH);
1290 	}
1291 
1292 	if (IS_GEN_RANGE(i915, 9, 11)) {
1293 		/* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1294 		wa_masked_en(wal,
1295 			     GEN7_FF_SLICE_CS_CHICKEN1,
1296 			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1297 	}
1298 
1299 	if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1300 		/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1301 		wa_write_or(wal,
1302 			    GEN8_GARBCNTL,
1303 			    GEN9_GAPS_TSV_CREDIT_DISABLE);
1304 	}
1305 
1306 	if (IS_BROXTON(i915)) {
1307 		/* WaDisablePooledEuLoadBalancingFix:bxt */
1308 		wa_masked_en(wal,
1309 			     FF_SLICE_CS_CHICKEN2,
1310 			     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1311 	}
1312 
1313 	if (IS_GEN(i915, 9)) {
1314 		/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1315 		wa_masked_en(wal,
1316 			     GEN9_CSFE_CHICKEN1_RCS,
1317 			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1318 
1319 		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1320 		wa_write_or(wal,
1321 			    BDW_SCRATCH1,
1322 			    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1323 
1324 		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1325 		if (IS_GEN9_LP(i915))
1326 			wa_write_masked_or(wal,
1327 					   GEN8_L3SQCREG1,
1328 					   L3_PRIO_CREDITS_MASK,
1329 					   L3_GENERAL_PRIO_CREDITS(62) |
1330 					   L3_HIGH_PRIO_CREDITS(2));
1331 
1332 		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1333 		wa_write_or(wal,
1334 			    GEN8_L3SQCREG4,
1335 			    GEN8_LQSC_FLUSH_COHERENT_LINES);
1336 	}
1337 }
1338 
1339 static void
1340 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1341 {
1342 	struct drm_i915_private *i915 = engine->i915;
1343 
1344 	/* WaKBLVECSSemaphoreWaitPoll:kbl */
1345 	if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1346 		wa_write(wal,
1347 			 RING_SEMA_WAIT_POLL(engine->mmio_base),
1348 			 1);
1349 	}
1350 }
1351 
1352 static void
1353 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1354 {
1355 	if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1356 		return;
1357 
1358 	if (engine->id == RCS0)
1359 		rcs_engine_wa_init(engine, wal);
1360 	else
1361 		xcs_engine_wa_init(engine, wal);
1362 }
1363 
1364 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1365 {
1366 	struct i915_wa_list *wal = &engine->wa_list;
1367 
1368 	if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
1369 		return;
1370 
1371 	wa_init_start(wal, engine->name);
1372 	engine_init_workarounds(engine, wal);
1373 	wa_init_finish(wal);
1374 }
1375 
1376 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1377 {
1378 	wa_list_apply(engine->uncore, &engine->wa_list);
1379 }
1380 
1381 static struct i915_vma *
1382 create_scratch(struct i915_address_space *vm, int count)
1383 {
1384 	struct drm_i915_gem_object *obj;
1385 	struct i915_vma *vma;
1386 	unsigned int size;
1387 	int err;
1388 
1389 	size = round_up(count * sizeof(u32), PAGE_SIZE);
1390 	obj = i915_gem_object_create_internal(vm->i915, size);
1391 	if (IS_ERR(obj))
1392 		return ERR_CAST(obj);
1393 
1394 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
1395 
1396 	vma = i915_vma_instance(obj, vm, NULL);
1397 	if (IS_ERR(vma)) {
1398 		err = PTR_ERR(vma);
1399 		goto err_obj;
1400 	}
1401 
1402 	err = i915_vma_pin(vma, 0, 0,
1403 			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
1404 	if (err)
1405 		goto err_obj;
1406 
1407 	return vma;
1408 
1409 err_obj:
1410 	i915_gem_object_put(obj);
1411 	return ERR_PTR(err);
1412 }
1413 
1414 static int
1415 wa_list_srm(struct i915_request *rq,
1416 	    const struct i915_wa_list *wal,
1417 	    struct i915_vma *vma)
1418 {
1419 	const struct i915_wa *wa;
1420 	unsigned int i;
1421 	u32 srm, *cs;
1422 
1423 	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1424 	if (INTEL_GEN(rq->i915) >= 8)
1425 		srm++;
1426 
1427 	cs = intel_ring_begin(rq, 4 * wal->count);
1428 	if (IS_ERR(cs))
1429 		return PTR_ERR(cs);
1430 
1431 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1432 		*cs++ = srm;
1433 		*cs++ = i915_mmio_reg_offset(wa->reg);
1434 		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
1435 		*cs++ = 0;
1436 	}
1437 	intel_ring_advance(rq, cs);
1438 
1439 	return 0;
1440 }
1441 
1442 static int engine_wa_list_verify(struct intel_context *ce,
1443 				 const struct i915_wa_list * const wal,
1444 				 const char *from)
1445 {
1446 	const struct i915_wa *wa;
1447 	struct i915_request *rq;
1448 	struct i915_vma *vma;
1449 	unsigned int i;
1450 	u32 *results;
1451 	int err;
1452 
1453 	if (!wal->count)
1454 		return 0;
1455 
1456 	vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count);
1457 	if (IS_ERR(vma))
1458 		return PTR_ERR(vma);
1459 
1460 	rq = intel_context_create_request(ce);
1461 	if (IS_ERR(rq)) {
1462 		err = PTR_ERR(rq);
1463 		goto err_vma;
1464 	}
1465 
1466 	err = wa_list_srm(rq, wal, vma);
1467 	if (err)
1468 		goto err_vma;
1469 
1470 	i915_request_add(rq);
1471 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1472 		err = -ETIME;
1473 		goto err_vma;
1474 	}
1475 
1476 	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1477 	if (IS_ERR(results)) {
1478 		err = PTR_ERR(results);
1479 		goto err_vma;
1480 	}
1481 
1482 	err = 0;
1483 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1484 		if (!wa_verify(wa, results[i], wal->name, from))
1485 			err = -ENXIO;
1486 
1487 	i915_gem_object_unpin_map(vma->obj);
1488 
1489 err_vma:
1490 	i915_vma_unpin(vma);
1491 	i915_vma_put(vma);
1492 	return err;
1493 }
1494 
1495 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
1496 				    const char *from)
1497 {
1498 	return engine_wa_list_verify(engine->kernel_context,
1499 				     &engine->wa_list,
1500 				     from);
1501 }
1502 
1503 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1504 #include "selftest_workarounds.c"
1505 #endif
1506