1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2018 Intel Corporation
5  */
6 
7 #include "i915_drv.h"
8 #include "intel_context.h"
9 #include "intel_workarounds.h"
10 
11 /**
12  * DOC: Hardware workarounds
13  *
14  * This file is intended as a central place to implement most [1]_ of the
15  * required workarounds for hardware to work as originally intended. They fall
16  * in five basic categories depending on how/when they are applied:
17  *
18  * - Workarounds that touch registers that are saved/restored to/from the HW
19  *   context image. The list is emitted (via Load Register Immediate commands)
20  *   everytime a new context is created.
21  * - GT workarounds. The list of these WAs is applied whenever these registers
22  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
23  * - Display workarounds. The list is applied during display clock-gating
24  *   initialization.
25  * - Workarounds that whitelist a privileged register, so that UMDs can manage
26  *   them directly. This is just a special case of a MMMIO workaround (as we
27  *   write the list of these to/be-whitelisted registers to some special HW
28  *   registers).
29  * - Workaround batchbuffers, that get executed automatically by the hardware
30  *   on every HW context restore.
31  *
32  * .. [1] Please notice that there are other WAs that, due to their nature,
33  *    cannot be applied from a central place. Those are peppered around the rest
34  *    of the code, as needed.
35  *
36  * .. [2] Technically, some registers are powercontext saved & restored, so they
37  *    survive a suspend/resume. In practice, writing them again is not too
38  *    costly and simplifies things. We can revisit this in the future.
39  *
40  * Layout
41  * ~~~~~~
42  *
43  * Keep things in this file ordered by WA type, as per the above (context, GT,
44  * display, register whitelist, batchbuffer). Then, inside each type, keep the
45  * following order:
46  *
47  * - Infrastructure functions and macros
48  * - WAs per platform in standard gen/chrono order
49  * - Public functions to init or apply the given workaround type.
50  */
51 
52 static void wa_init_start(struct i915_wa_list *wal, const char *name)
53 {
54 	wal->name = name;
55 }
56 
57 #define WA_LIST_CHUNK (1 << 4)
58 
59 static void wa_init_finish(struct i915_wa_list *wal)
60 {
61 	/* Trim unused entries. */
62 	if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
63 		struct i915_wa *list = kmemdup(wal->list,
64 					       wal->count * sizeof(*list),
65 					       GFP_KERNEL);
66 
67 		if (list) {
68 			kfree(wal->list);
69 			wal->list = list;
70 		}
71 	}
72 
73 	if (!wal->count)
74 		return;
75 
76 	DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
77 			 wal->wa_count, wal->name);
78 }
79 
80 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
81 {
82 	unsigned int addr = i915_mmio_reg_offset(wa->reg);
83 	unsigned int start = 0, end = wal->count;
84 	const unsigned int grow = WA_LIST_CHUNK;
85 	struct i915_wa *wa_;
86 
87 	GEM_BUG_ON(!is_power_of_2(grow));
88 
89 	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
90 		struct i915_wa *list;
91 
92 		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
93 				     GFP_KERNEL);
94 		if (!list) {
95 			DRM_ERROR("No space for workaround init!\n");
96 			return;
97 		}
98 
99 		if (wal->list)
100 			memcpy(list, wal->list, sizeof(*wa) * wal->count);
101 
102 		wal->list = list;
103 	}
104 
105 	while (start < end) {
106 		unsigned int mid = start + (end - start) / 2;
107 
108 		if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
109 			start = mid + 1;
110 		} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
111 			end = mid;
112 		} else {
113 			wa_ = &wal->list[mid];
114 
115 			if ((wa->mask & ~wa_->mask) == 0) {
116 				DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
117 					  i915_mmio_reg_offset(wa_->reg),
118 					  wa_->mask, wa_->val);
119 
120 				wa_->val &= ~wa->mask;
121 			}
122 
123 			wal->wa_count++;
124 			wa_->val |= wa->val;
125 			wa_->mask |= wa->mask;
126 			wa_->read |= wa->read;
127 			return;
128 		}
129 	}
130 
131 	wal->wa_count++;
132 	wa_ = &wal->list[wal->count++];
133 	*wa_ = *wa;
134 
135 	while (wa_-- > wal->list) {
136 		GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
137 			   i915_mmio_reg_offset(wa_[1].reg));
138 		if (i915_mmio_reg_offset(wa_[1].reg) >
139 		    i915_mmio_reg_offset(wa_[0].reg))
140 			break;
141 
142 		swap(wa_[1], wa_[0]);
143 	}
144 }
145 
146 static void
147 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
148 		   u32 val)
149 {
150 	struct i915_wa wa = {
151 		.reg  = reg,
152 		.mask = mask,
153 		.val  = val,
154 		.read = mask,
155 	};
156 
157 	_wa_add(wal, &wa);
158 }
159 
160 static void
161 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
162 {
163 	wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
164 }
165 
166 static void
167 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
168 {
169 	wa_write_masked_or(wal, reg, ~0, val);
170 }
171 
172 static void
173 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
174 {
175 	wa_write_masked_or(wal, reg, val, val);
176 }
177 
178 static void
179 ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
180 {
181 	struct i915_wa wa = {
182 		.reg  = reg,
183 		.mask = mask,
184 		.val  = val,
185 		/* Bonkers HW, skip verifying */
186 	};
187 
188 	_wa_add(wal, &wa);
189 }
190 
191 #define WA_SET_BIT_MASKED(addr, mask) \
192 	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
193 
194 #define WA_CLR_BIT_MASKED(addr, mask) \
195 	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
196 
197 #define WA_SET_FIELD_MASKED(addr, mask, value) \
198 	wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
199 
200 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
201 				      struct i915_wa_list *wal)
202 {
203 	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
204 
205 	/* WaDisableAsyncFlipPerfMode:bdw,chv */
206 	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
207 
208 	/* WaDisablePartialInstShootdown:bdw,chv */
209 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
210 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
211 
212 	/* Use Force Non-Coherent whenever executing a 3D context. This is a
213 	 * workaround for for a possible hang in the unlikely event a TLB
214 	 * invalidation occurs during a PSD flush.
215 	 */
216 	/* WaForceEnableNonCoherent:bdw,chv */
217 	/* WaHdcDisableFetchWhenMasked:bdw,chv */
218 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
219 			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
220 			  HDC_FORCE_NON_COHERENT);
221 
222 	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
223 	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
224 	 *  polygons in the same 8x4 pixel/sample area to be processed without
225 	 *  stalling waiting for the earlier ones to write to Hierarchical Z
226 	 *  buffer."
227 	 *
228 	 * This optimization is off by default for BDW and CHV; turn it on.
229 	 */
230 	WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
231 
232 	/* Wa4x4STCOptimizationDisable:bdw,chv */
233 	WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
234 
235 	/*
236 	 * BSpec recommends 8x4 when MSAA is used,
237 	 * however in practice 16x4 seems fastest.
238 	 *
239 	 * Note that PS/WM thread counts depend on the WIZ hashing
240 	 * disable bit, which we don't touch here, but it's good
241 	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
242 	 */
243 	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
244 			    GEN6_WIZ_HASHING_MASK,
245 			    GEN6_WIZ_HASHING_16x4);
246 }
247 
248 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
249 				     struct i915_wa_list *wal)
250 {
251 	struct drm_i915_private *i915 = engine->i915;
252 
253 	gen8_ctx_workarounds_init(engine, wal);
254 
255 	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
256 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
257 
258 	/* WaDisableDopClockGating:bdw
259 	 *
260 	 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
261 	 * to disable EUTC clock gating.
262 	 */
263 	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
264 			  DOP_CLOCK_GATING_DISABLE);
265 
266 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
267 			  GEN8_SAMPLER_POWER_BYPASS_DIS);
268 
269 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
270 			  /* WaForceContextSaveRestoreNonCoherent:bdw */
271 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
272 			  /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
273 			  (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
274 }
275 
276 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
277 				     struct i915_wa_list *wal)
278 {
279 	gen8_ctx_workarounds_init(engine, wal);
280 
281 	/* WaDisableThreadStallDopClockGating:chv */
282 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
283 
284 	/* Improve HiZ throughput on CHV. */
285 	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
286 }
287 
288 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
289 				      struct i915_wa_list *wal)
290 {
291 	struct drm_i915_private *i915 = engine->i915;
292 
293 	if (HAS_LLC(i915)) {
294 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
295 		 *
296 		 * Must match Display Engine. See
297 		 * WaCompressedResourceDisplayNewHashMode.
298 		 */
299 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
300 				  GEN9_PBE_COMPRESSED_HASH_SELECTION);
301 		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
302 				  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
303 	}
304 
305 	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
306 	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
307 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
308 			  FLOW_CONTROL_ENABLE |
309 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
310 
311 	/* Syncing dependencies between camera and graphics:skl,bxt,kbl */
312 	if (!IS_COFFEELAKE(i915))
313 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
314 				  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
315 
316 	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
317 	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
318 	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
319 			  GEN9_ENABLE_YV12_BUGFIX |
320 			  GEN9_ENABLE_GPGPU_PREEMPTION);
321 
322 	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
323 	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
324 	WA_SET_BIT_MASKED(CACHE_MODE_1,
325 			  GEN8_4x4_STC_OPTIMIZATION_DISABLE |
326 			  GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
327 
328 	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
329 	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
330 			  GEN9_CCS_TLB_PREFETCH_ENABLE);
331 
332 	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
333 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
334 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
335 			  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
336 
337 	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
338 	 * both tied to WaForceContextSaveRestoreNonCoherent
339 	 * in some hsds for skl. We keep the tie for all gen9. The
340 	 * documentation is a bit hazy and so we want to get common behaviour,
341 	 * even though there is no clear evidence we would need both on kbl/bxt.
342 	 * This area has been source of system hangs so we play it safe
343 	 * and mimic the skl regardless of what bspec says.
344 	 *
345 	 * Use Force Non-Coherent whenever executing a 3D context. This
346 	 * is a workaround for a possible hang in the unlikely event
347 	 * a TLB invalidation occurs during a PSD flush.
348 	 */
349 
350 	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
351 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
352 			  HDC_FORCE_NON_COHERENT);
353 
354 	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
355 	if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
356 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
357 				  GEN8_SAMPLER_POWER_BYPASS_DIS);
358 
359 	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
360 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
361 
362 	/*
363 	 * Supporting preemption with fine-granularity requires changes in the
364 	 * batch buffer programming. Since we can't break old userspace, we
365 	 * need to set our default preemption level to safe value. Userspace is
366 	 * still able to use more fine-grained preemption levels, since in
367 	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
368 	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
369 	 * not real HW workarounds, but merely a way to start using preemption
370 	 * while maintaining old contract with userspace.
371 	 */
372 
373 	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
374 	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
375 
376 	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
377 	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
378 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
379 			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
380 
381 	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
382 	if (IS_GEN9_LP(i915))
383 		WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
384 }
385 
386 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
387 				struct i915_wa_list *wal)
388 {
389 	struct drm_i915_private *i915 = engine->i915;
390 	u8 vals[3] = { 0, 0, 0 };
391 	unsigned int i;
392 
393 	for (i = 0; i < 3; i++) {
394 		u8 ss;
395 
396 		/*
397 		 * Only consider slices where one, and only one, subslice has 7
398 		 * EUs
399 		 */
400 		if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
401 			continue;
402 
403 		/*
404 		 * subslice_7eu[i] != 0 (because of the check above) and
405 		 * ss_max == 4 (maximum number of subslices possible per slice)
406 		 *
407 		 * ->    0 <= ss <= 3;
408 		 */
409 		ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
410 		vals[i] = 3 - ss;
411 	}
412 
413 	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
414 		return;
415 
416 	/* Tune IZ hashing. See intel_device_info_runtime_init() */
417 	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
418 			    GEN9_IZ_HASHING_MASK(2) |
419 			    GEN9_IZ_HASHING_MASK(1) |
420 			    GEN9_IZ_HASHING_MASK(0),
421 			    GEN9_IZ_HASHING(2, vals[2]) |
422 			    GEN9_IZ_HASHING(1, vals[1]) |
423 			    GEN9_IZ_HASHING(0, vals[0]));
424 }
425 
426 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
427 				     struct i915_wa_list *wal)
428 {
429 	gen9_ctx_workarounds_init(engine, wal);
430 	skl_tune_iz_hashing(engine, wal);
431 }
432 
433 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
434 				     struct i915_wa_list *wal)
435 {
436 	gen9_ctx_workarounds_init(engine, wal);
437 
438 	/* WaDisableThreadStallDopClockGating:bxt */
439 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
440 			  STALL_DOP_GATING_DISABLE);
441 
442 	/* WaToEnableHwFixForPushConstHWBug:bxt */
443 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
444 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
445 }
446 
447 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
448 				     struct i915_wa_list *wal)
449 {
450 	struct drm_i915_private *i915 = engine->i915;
451 
452 	gen9_ctx_workarounds_init(engine, wal);
453 
454 	/* WaToEnableHwFixForPushConstHWBug:kbl */
455 	if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
456 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
457 				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
458 
459 	/* WaDisableSbeCacheDispatchPortSharing:kbl */
460 	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
461 			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
462 }
463 
464 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
465 				     struct i915_wa_list *wal)
466 {
467 	gen9_ctx_workarounds_init(engine, wal);
468 
469 	/* WaToEnableHwFixForPushConstHWBug:glk */
470 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
471 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
472 }
473 
474 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
475 				     struct i915_wa_list *wal)
476 {
477 	gen9_ctx_workarounds_init(engine, wal);
478 
479 	/* WaToEnableHwFixForPushConstHWBug:cfl */
480 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
481 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
482 
483 	/* WaDisableSbeCacheDispatchPortSharing:cfl */
484 	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
485 			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
486 }
487 
488 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
489 				     struct i915_wa_list *wal)
490 {
491 	struct drm_i915_private *i915 = engine->i915;
492 
493 	/* WaForceContextSaveRestoreNonCoherent:cnl */
494 	WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
495 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
496 
497 	/* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
498 	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
499 		WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
500 
501 	/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
502 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
503 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
504 
505 	/* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
506 	if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
507 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
508 				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
509 
510 	/* WaPushConstantDereferenceHoldDisable:cnl */
511 	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
512 
513 	/* FtrEnableFastAnisoL1BankingFix:cnl */
514 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
515 
516 	/* WaDisable3DMidCmdPreemption:cnl */
517 	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
518 
519 	/* WaDisableGPGPUMidCmdPreemption:cnl */
520 	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
521 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
522 			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
523 
524 	/* WaDisableEarlyEOT:cnl */
525 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
526 }
527 
528 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
529 				     struct i915_wa_list *wal)
530 {
531 	struct drm_i915_private *i915 = engine->i915;
532 
533 	/* WaDisableBankHangMode:icl */
534 	wa_write(wal,
535 		 GEN8_L3CNTLREG,
536 		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
537 		 GEN8_ERRDETBCTRL);
538 
539 	/* WaDisableBankHangMode:icl */
540 	wa_write(wal,
541 		 GEN8_L3CNTLREG,
542 		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
543 		 GEN8_ERRDETBCTRL);
544 
545 	/* Wa_1604370585:icl (pre-prod)
546 	 * Formerly known as WaPushConstantDereferenceHoldDisable
547 	 */
548 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
549 		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
550 				  PUSH_CONSTANT_DEREF_DISABLE);
551 
552 	/* WaForceEnableNonCoherent:icl
553 	 * This is not the same workaround as in early Gen9 platforms, where
554 	 * lacking this could cause system hangs, but coherency performance
555 	 * overhead is high and only a few compute workloads really need it
556 	 * (the register is whitelisted in hardware now, so UMDs can opt in
557 	 * for coherency if they have a good reason).
558 	 */
559 	WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
560 
561 	/* Wa_2006611047:icl (pre-prod)
562 	 * Formerly known as WaDisableImprovedTdlClkGating
563 	 */
564 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
565 		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
566 				  GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
567 
568 	/* Wa_2006665173:icl (pre-prod) */
569 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
570 		WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
571 				  GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
572 
573 	/* WaEnableFloatBlendOptimization:icl */
574 	wa_write_masked_or(wal,
575 			   GEN10_CACHE_MODE_SS,
576 			   0, /* write-only, so skip validation */
577 			   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
578 
579 	/* WaDisableGPGPUMidThreadPreemption:icl */
580 	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
581 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
582 			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
583 
584 	/* allow headerless messages for preemptible GPGPU context */
585 	WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
586 			  GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
587 }
588 
589 static void
590 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
591 			   struct i915_wa_list *wal,
592 			   const char *name)
593 {
594 	struct drm_i915_private *i915 = engine->i915;
595 
596 	if (engine->class != RENDER_CLASS)
597 		return;
598 
599 	wa_init_start(wal, name);
600 
601 	if (IS_GEN(i915, 11))
602 		icl_ctx_workarounds_init(engine, wal);
603 	else if (IS_CANNONLAKE(i915))
604 		cnl_ctx_workarounds_init(engine, wal);
605 	else if (IS_COFFEELAKE(i915))
606 		cfl_ctx_workarounds_init(engine, wal);
607 	else if (IS_GEMINILAKE(i915))
608 		glk_ctx_workarounds_init(engine, wal);
609 	else if (IS_KABYLAKE(i915))
610 		kbl_ctx_workarounds_init(engine, wal);
611 	else if (IS_BROXTON(i915))
612 		bxt_ctx_workarounds_init(engine, wal);
613 	else if (IS_SKYLAKE(i915))
614 		skl_ctx_workarounds_init(engine, wal);
615 	else if (IS_CHERRYVIEW(i915))
616 		chv_ctx_workarounds_init(engine, wal);
617 	else if (IS_BROADWELL(i915))
618 		bdw_ctx_workarounds_init(engine, wal);
619 	else if (INTEL_GEN(i915) < 8)
620 		return;
621 	else
622 		MISSING_CASE(INTEL_GEN(i915));
623 
624 	wa_init_finish(wal);
625 }
626 
627 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
628 {
629 	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
630 }
631 
632 int intel_engine_emit_ctx_wa(struct i915_request *rq)
633 {
634 	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
635 	struct i915_wa *wa;
636 	unsigned int i;
637 	u32 *cs;
638 	int ret;
639 
640 	if (wal->count == 0)
641 		return 0;
642 
643 	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
644 	if (ret)
645 		return ret;
646 
647 	cs = intel_ring_begin(rq, (wal->count * 2 + 2));
648 	if (IS_ERR(cs))
649 		return PTR_ERR(cs);
650 
651 	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
652 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
653 		*cs++ = i915_mmio_reg_offset(wa->reg);
654 		*cs++ = wa->val;
655 	}
656 	*cs++ = MI_NOOP;
657 
658 	intel_ring_advance(rq, cs);
659 
660 	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
661 	if (ret)
662 		return ret;
663 
664 	return 0;
665 }
666 
667 static void
668 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
669 {
670 	/* WaDisableKillLogic:bxt,skl,kbl */
671 	if (!IS_COFFEELAKE(i915))
672 		wa_write_or(wal,
673 			    GAM_ECOCHK,
674 			    ECOCHK_DIS_TLB);
675 
676 	if (HAS_LLC(i915)) {
677 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
678 		 *
679 		 * Must match Display Engine. See
680 		 * WaCompressedResourceDisplayNewHashMode.
681 		 */
682 		wa_write_or(wal,
683 			    MMCD_MISC_CTRL,
684 			    MMCD_PCLA | MMCD_HOTSPOT_EN);
685 	}
686 
687 	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
688 	wa_write_or(wal,
689 		    GAM_ECOCHK,
690 		    BDW_DISABLE_HDC_INVALIDATION);
691 }
692 
693 static void
694 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
695 {
696 	gen9_gt_workarounds_init(i915, wal);
697 
698 	/* WaDisableGafsUnitClkGating:skl */
699 	wa_write_or(wal,
700 		    GEN7_UCGCTL4,
701 		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
702 
703 	/* WaInPlaceDecompressionHang:skl */
704 	if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
705 		wa_write_or(wal,
706 			    GEN9_GAMT_ECO_REG_RW_IA,
707 			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
708 }
709 
710 static void
711 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
712 {
713 	gen9_gt_workarounds_init(i915, wal);
714 
715 	/* WaInPlaceDecompressionHang:bxt */
716 	wa_write_or(wal,
717 		    GEN9_GAMT_ECO_REG_RW_IA,
718 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
719 }
720 
721 static void
722 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
723 {
724 	gen9_gt_workarounds_init(i915, wal);
725 
726 	/* WaDisableDynamicCreditSharing:kbl */
727 	if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
728 		wa_write_or(wal,
729 			    GAMT_CHKN_BIT_REG,
730 			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
731 
732 	/* WaDisableGafsUnitClkGating:kbl */
733 	wa_write_or(wal,
734 		    GEN7_UCGCTL4,
735 		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
736 
737 	/* WaInPlaceDecompressionHang:kbl */
738 	wa_write_or(wal,
739 		    GEN9_GAMT_ECO_REG_RW_IA,
740 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
741 }
742 
743 static void
744 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
745 {
746 	gen9_gt_workarounds_init(i915, wal);
747 }
748 
749 static void
750 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
751 {
752 	gen9_gt_workarounds_init(i915, wal);
753 
754 	/* WaDisableGafsUnitClkGating:cfl */
755 	wa_write_or(wal,
756 		    GEN7_UCGCTL4,
757 		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
758 
759 	/* WaInPlaceDecompressionHang:cfl */
760 	wa_write_or(wal,
761 		    GEN9_GAMT_ECO_REG_RW_IA,
762 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
763 }
764 
765 static void
766 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
767 {
768 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
769 	u32 mcr_slice_subslice_mask;
770 
771 	/*
772 	 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
773 	 * L3Banks could be fused off in single slice scenario. If that is
774 	 * the case, we might need to program MCR select to a valid L3Bank
775 	 * by default, to make sure we correctly read certain registers
776 	 * later on (in the range 0xB100 - 0xB3FF).
777 	 * This might be incompatible with
778 	 * WaProgramMgsrForCorrectSliceSpecificMmioReads.
779 	 * Fortunately, this should not happen in production hardware, so
780 	 * we only assert that this is the case (instead of implementing
781 	 * something more complex that requires checking the range of every
782 	 * MMIO read).
783 	 */
784 	if (INTEL_GEN(i915) >= 10 &&
785 	    is_power_of_2(sseu->slice_mask)) {
786 		/*
787 		 * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
788 		 * enabled subslice, no need to redirect MCR packet
789 		 */
790 		u32 slice = fls(sseu->slice_mask);
791 		u32 fuse3 =
792 			intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
793 		u8 ss_mask = sseu->subslice_mask[slice];
794 
795 		u8 enabled_mask = (ss_mask | ss_mask >>
796 				   GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
797 		u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
798 
799 		/*
800 		 * Production silicon should have matched L3Bank and
801 		 * subslice enabled
802 		 */
803 		WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
804 	}
805 
806 	if (INTEL_GEN(i915) >= 11)
807 		mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
808 					  GEN11_MCR_SUBSLICE_MASK;
809 	else
810 		mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
811 					  GEN8_MCR_SUBSLICE_MASK;
812 	/*
813 	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
814 	 * Before any MMIO read into slice/subslice specific registers, MCR
815 	 * packet control register needs to be programmed to point to any
816 	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
817 	 * This means each subsequent MMIO read will be forwarded to an
818 	 * specific s/ss combination, but this is OK since these registers
819 	 * are consistent across s/ss in almost all cases. In the rare
820 	 * occasions, such as INSTDONE, where this value is dependent
821 	 * on s/ss combo, the read should be done with read_subslice_reg.
822 	 */
823 	wa_write_masked_or(wal,
824 			   GEN8_MCR_SELECTOR,
825 			   mcr_slice_subslice_mask,
826 			   intel_calculate_mcr_s_ss_select(i915));
827 }
828 
829 static void
830 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
831 {
832 	wa_init_mcr(i915, wal);
833 
834 	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
835 	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
836 		wa_write_or(wal,
837 			    GAMT_CHKN_BIT_REG,
838 			    GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
839 
840 	/* WaInPlaceDecompressionHang:cnl */
841 	wa_write_or(wal,
842 		    GEN9_GAMT_ECO_REG_RW_IA,
843 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
844 }
845 
846 static void
847 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
848 {
849 	wa_init_mcr(i915, wal);
850 
851 	/* WaInPlaceDecompressionHang:icl */
852 	wa_write_or(wal,
853 		    GEN9_GAMT_ECO_REG_RW_IA,
854 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
855 
856 	/* WaModifyGamTlbPartitioning:icl */
857 	wa_write_masked_or(wal,
858 			   GEN11_GACB_PERF_CTRL,
859 			   GEN11_HASH_CTRL_MASK,
860 			   GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
861 
862 	/* Wa_1405766107:icl
863 	 * Formerly known as WaCL2SFHalfMaxAlloc
864 	 */
865 	wa_write_or(wal,
866 		    GEN11_LSN_UNSLCVC,
867 		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
868 		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
869 
870 	/* Wa_220166154:icl
871 	 * Formerly known as WaDisCtxReload
872 	 */
873 	wa_write_or(wal,
874 		    GEN8_GAMW_ECO_DEV_RW_IA,
875 		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
876 
877 	/* Wa_1405779004:icl (pre-prod) */
878 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
879 		wa_write_or(wal,
880 			    SLICE_UNIT_LEVEL_CLKGATE,
881 			    MSCUNIT_CLKGATE_DIS);
882 
883 	/* Wa_1406680159:icl */
884 	wa_write_or(wal,
885 		    SUBSLICE_UNIT_LEVEL_CLKGATE,
886 		    GWUNIT_CLKGATE_DIS);
887 
888 	/* Wa_1406838659:icl (pre-prod) */
889 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
890 		wa_write_or(wal,
891 			    INF_UNIT_LEVEL_CLKGATE,
892 			    CGPSF_CLKGATE_DIS);
893 
894 	/* Wa_1406463099:icl
895 	 * Formerly known as WaGamTlbPendError
896 	 */
897 	wa_write_or(wal,
898 		    GAMT_CHKN_BIT_REG,
899 		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
900 }
901 
902 static void
903 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
904 {
905 	if (IS_GEN(i915, 11))
906 		icl_gt_workarounds_init(i915, wal);
907 	else if (IS_CANNONLAKE(i915))
908 		cnl_gt_workarounds_init(i915, wal);
909 	else if (IS_COFFEELAKE(i915))
910 		cfl_gt_workarounds_init(i915, wal);
911 	else if (IS_GEMINILAKE(i915))
912 		glk_gt_workarounds_init(i915, wal);
913 	else if (IS_KABYLAKE(i915))
914 		kbl_gt_workarounds_init(i915, wal);
915 	else if (IS_BROXTON(i915))
916 		bxt_gt_workarounds_init(i915, wal);
917 	else if (IS_SKYLAKE(i915))
918 		skl_gt_workarounds_init(i915, wal);
919 	else if (INTEL_GEN(i915) <= 8)
920 		return;
921 	else
922 		MISSING_CASE(INTEL_GEN(i915));
923 }
924 
925 void intel_gt_init_workarounds(struct drm_i915_private *i915)
926 {
927 	struct i915_wa_list *wal = &i915->gt_wa_list;
928 
929 	wa_init_start(wal, "GT");
930 	gt_init_workarounds(i915, wal);
931 	wa_init_finish(wal);
932 }
933 
934 static enum forcewake_domains
935 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
936 {
937 	enum forcewake_domains fw = 0;
938 	struct i915_wa *wa;
939 	unsigned int i;
940 
941 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
942 		fw |= intel_uncore_forcewake_for_reg(uncore,
943 						     wa->reg,
944 						     FW_REG_READ |
945 						     FW_REG_WRITE);
946 
947 	return fw;
948 }
949 
950 static bool
951 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
952 {
953 	if ((cur ^ wa->val) & wa->read) {
954 		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
955 			  name, from, i915_mmio_reg_offset(wa->reg),
956 			  cur, cur & wa->read,
957 			  wa->val, wa->mask);
958 
959 		return false;
960 	}
961 
962 	return true;
963 }
964 
965 static void
966 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
967 {
968 	enum forcewake_domains fw;
969 	unsigned long flags;
970 	struct i915_wa *wa;
971 	unsigned int i;
972 
973 	if (!wal->count)
974 		return;
975 
976 	fw = wal_get_fw_for_rmw(uncore, wal);
977 
978 	spin_lock_irqsave(&uncore->lock, flags);
979 	intel_uncore_forcewake_get__locked(uncore, fw);
980 
981 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
982 		intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
983 		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
984 			wa_verify(wa,
985 				  intel_uncore_read_fw(uncore, wa->reg),
986 				  wal->name, "application");
987 	}
988 
989 	intel_uncore_forcewake_put__locked(uncore, fw);
990 	spin_unlock_irqrestore(&uncore->lock, flags);
991 }
992 
993 void intel_gt_apply_workarounds(struct drm_i915_private *i915)
994 {
995 	wa_list_apply(&i915->uncore, &i915->gt_wa_list);
996 }
997 
998 static bool wa_list_verify(struct intel_uncore *uncore,
999 			   const struct i915_wa_list *wal,
1000 			   const char *from)
1001 {
1002 	struct i915_wa *wa;
1003 	unsigned int i;
1004 	bool ok = true;
1005 
1006 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1007 		ok &= wa_verify(wa,
1008 				intel_uncore_read(uncore, wa->reg),
1009 				wal->name, from);
1010 
1011 	return ok;
1012 }
1013 
1014 bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
1015 				 const char *from)
1016 {
1017 	return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
1018 }
1019 
1020 static void
1021 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1022 {
1023 	struct i915_wa wa = {
1024 		.reg = reg
1025 	};
1026 
1027 	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1028 		return;
1029 
1030 	wa.reg.reg |= flags;
1031 	_wa_add(wal, &wa);
1032 }
1033 
1034 static void
1035 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1036 {
1037 	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW);
1038 }
1039 
1040 static void gen9_whitelist_build(struct i915_wa_list *w)
1041 {
1042 	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1043 	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1044 
1045 	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1046 	whitelist_reg(w, GEN8_CS_CHICKEN1);
1047 
1048 	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1049 	whitelist_reg(w, GEN8_HDC_CHICKEN1);
1050 }
1051 
1052 static void skl_whitelist_build(struct intel_engine_cs *engine)
1053 {
1054 	struct i915_wa_list *w = &engine->whitelist;
1055 
1056 	if (engine->class != RENDER_CLASS)
1057 		return;
1058 
1059 	gen9_whitelist_build(w);
1060 
1061 	/* WaDisableLSQCROPERFforOCL:skl */
1062 	whitelist_reg(w, GEN8_L3SQCREG4);
1063 }
1064 
1065 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1066 {
1067 	if (engine->class != RENDER_CLASS)
1068 		return;
1069 
1070 	gen9_whitelist_build(&engine->whitelist);
1071 }
1072 
1073 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1074 {
1075 	struct i915_wa_list *w = &engine->whitelist;
1076 
1077 	if (engine->class != RENDER_CLASS)
1078 		return;
1079 
1080 	gen9_whitelist_build(w);
1081 
1082 	/* WaDisableLSQCROPERFforOCL:kbl */
1083 	whitelist_reg(w, GEN8_L3SQCREG4);
1084 }
1085 
1086 static void glk_whitelist_build(struct intel_engine_cs *engine)
1087 {
1088 	struct i915_wa_list *w = &engine->whitelist;
1089 
1090 	if (engine->class != RENDER_CLASS)
1091 		return;
1092 
1093 	gen9_whitelist_build(w);
1094 
1095 	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1096 	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1097 }
1098 
1099 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1100 {
1101 	struct i915_wa_list *w = &engine->whitelist;
1102 
1103 	if (engine->class != RENDER_CLASS)
1104 		return;
1105 
1106 	gen9_whitelist_build(w);
1107 
1108 	/*
1109 	 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1110 	 *
1111 	 * This covers 4 register which are next to one another :
1112 	 *   - PS_INVOCATION_COUNT
1113 	 *   - PS_INVOCATION_COUNT_UDW
1114 	 *   - PS_DEPTH_COUNT
1115 	 *   - PS_DEPTH_COUNT_UDW
1116 	 */
1117 	whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1118 			  RING_FORCE_TO_NONPRIV_RD |
1119 			  RING_FORCE_TO_NONPRIV_RANGE_4);
1120 }
1121 
1122 static void cnl_whitelist_build(struct intel_engine_cs *engine)
1123 {
1124 	struct i915_wa_list *w = &engine->whitelist;
1125 
1126 	if (engine->class != RENDER_CLASS)
1127 		return;
1128 
1129 	/* WaEnablePreemptionGranularityControlByUMD:cnl */
1130 	whitelist_reg(w, GEN8_CS_CHICKEN1);
1131 }
1132 
1133 static void icl_whitelist_build(struct intel_engine_cs *engine)
1134 {
1135 	struct i915_wa_list *w = &engine->whitelist;
1136 
1137 	switch (engine->class) {
1138 	case RENDER_CLASS:
1139 		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
1140 		whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1141 
1142 		/* WaAllowUMDToModifySamplerMode:icl */
1143 		whitelist_reg(w, GEN10_SAMPLER_MODE);
1144 
1145 		/* WaEnableStateCacheRedirectToCS:icl */
1146 		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1147 
1148 		/*
1149 		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1150 		 *
1151 		 * This covers 4 register which are next to one another :
1152 		 *   - PS_INVOCATION_COUNT
1153 		 *   - PS_INVOCATION_COUNT_UDW
1154 		 *   - PS_DEPTH_COUNT
1155 		 *   - PS_DEPTH_COUNT_UDW
1156 		 */
1157 		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1158 				  RING_FORCE_TO_NONPRIV_RD |
1159 				  RING_FORCE_TO_NONPRIV_RANGE_4);
1160 		break;
1161 
1162 	case VIDEO_DECODE_CLASS:
1163 		/* hucStatusRegOffset */
1164 		whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1165 				  RING_FORCE_TO_NONPRIV_RD);
1166 		/* hucUKernelHdrInfoRegOffset */
1167 		whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1168 				  RING_FORCE_TO_NONPRIV_RD);
1169 		/* hucStatus2RegOffset */
1170 		whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1171 				  RING_FORCE_TO_NONPRIV_RD);
1172 		break;
1173 
1174 	default:
1175 		break;
1176 	}
1177 }
1178 
1179 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1180 {
1181 	struct drm_i915_private *i915 = engine->i915;
1182 	struct i915_wa_list *w = &engine->whitelist;
1183 
1184 	wa_init_start(w, "whitelist");
1185 
1186 	if (IS_GEN(i915, 11))
1187 		icl_whitelist_build(engine);
1188 	else if (IS_CANNONLAKE(i915))
1189 		cnl_whitelist_build(engine);
1190 	else if (IS_COFFEELAKE(i915))
1191 		cfl_whitelist_build(engine);
1192 	else if (IS_GEMINILAKE(i915))
1193 		glk_whitelist_build(engine);
1194 	else if (IS_KABYLAKE(i915))
1195 		kbl_whitelist_build(engine);
1196 	else if (IS_BROXTON(i915))
1197 		bxt_whitelist_build(engine);
1198 	else if (IS_SKYLAKE(i915))
1199 		skl_whitelist_build(engine);
1200 	else if (INTEL_GEN(i915) <= 8)
1201 		return;
1202 	else
1203 		MISSING_CASE(INTEL_GEN(i915));
1204 
1205 	wa_init_finish(w);
1206 }
1207 
1208 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1209 {
1210 	const struct i915_wa_list *wal = &engine->whitelist;
1211 	struct intel_uncore *uncore = engine->uncore;
1212 	const u32 base = engine->mmio_base;
1213 	struct i915_wa *wa;
1214 	unsigned int i;
1215 
1216 	if (!wal->count)
1217 		return;
1218 
1219 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1220 		intel_uncore_write(uncore,
1221 				   RING_FORCE_TO_NONPRIV(base, i),
1222 				   i915_mmio_reg_offset(wa->reg));
1223 
1224 	/* And clear the rest just in case of garbage */
1225 	for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1226 		intel_uncore_write(uncore,
1227 				   RING_FORCE_TO_NONPRIV(base, i),
1228 				   i915_mmio_reg_offset(RING_NOPID(base)));
1229 }
1230 
1231 static void
1232 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1233 {
1234 	struct drm_i915_private *i915 = engine->i915;
1235 
1236 	if (IS_GEN(i915, 11)) {
1237 		/* This is not an Wa. Enable for better image quality */
1238 		wa_masked_en(wal,
1239 			     _3D_CHICKEN3,
1240 			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1241 
1242 		/* WaPipelineFlushCoherentLines:icl */
1243 		ignore_wa_write_or(wal,
1244 				   GEN8_L3SQCREG4,
1245 				   GEN8_LQSC_FLUSH_COHERENT_LINES,
1246 				   GEN8_LQSC_FLUSH_COHERENT_LINES);
1247 
1248 		/*
1249 		 * Wa_1405543622:icl
1250 		 * Formerly known as WaGAPZPriorityScheme
1251 		 */
1252 		wa_write_or(wal,
1253 			    GEN8_GARBCNTL,
1254 			    GEN11_ARBITRATION_PRIO_ORDER_MASK);
1255 
1256 		/*
1257 		 * Wa_1604223664:icl
1258 		 * Formerly known as WaL3BankAddressHashing
1259 		 */
1260 		wa_write_masked_or(wal,
1261 				   GEN8_GARBCNTL,
1262 				   GEN11_HASH_CTRL_EXCL_MASK,
1263 				   GEN11_HASH_CTRL_EXCL_BIT0);
1264 		wa_write_masked_or(wal,
1265 				   GEN11_GLBLINVL,
1266 				   GEN11_BANK_HASH_ADDR_EXCL_MASK,
1267 				   GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1268 
1269 		/*
1270 		 * Wa_1405733216:icl
1271 		 * Formerly known as WaDisableCleanEvicts
1272 		 */
1273 		ignore_wa_write_or(wal,
1274 				   GEN8_L3SQCREG4,
1275 				   GEN11_LQSC_CLEAN_EVICT_DISABLE,
1276 				   GEN11_LQSC_CLEAN_EVICT_DISABLE);
1277 
1278 		/* WaForwardProgressSoftReset:icl */
1279 		wa_write_or(wal,
1280 			    GEN10_SCRATCH_LNCF2,
1281 			    PMFLUSHDONE_LNICRSDROP |
1282 			    PMFLUSH_GAPL3UNBLOCK |
1283 			    PMFLUSHDONE_LNEBLK);
1284 
1285 		/* Wa_1406609255:icl (pre-prod) */
1286 		if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1287 			wa_write_or(wal,
1288 				    GEN7_SARCHKMD,
1289 				    GEN7_DISABLE_DEMAND_PREFETCH);
1290 
1291 		/* Wa_1606682166:icl */
1292 		wa_write_or(wal,
1293 			    GEN7_SARCHKMD,
1294 			    GEN7_DISABLE_SAMPLER_PREFETCH);
1295 	}
1296 
1297 	if (IS_GEN_RANGE(i915, 9, 11)) {
1298 		/* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1299 		wa_masked_en(wal,
1300 			     GEN7_FF_SLICE_CS_CHICKEN1,
1301 			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1302 	}
1303 
1304 	if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1305 		/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1306 		wa_write_or(wal,
1307 			    GEN8_GARBCNTL,
1308 			    GEN9_GAPS_TSV_CREDIT_DISABLE);
1309 	}
1310 
1311 	if (IS_BROXTON(i915)) {
1312 		/* WaDisablePooledEuLoadBalancingFix:bxt */
1313 		wa_masked_en(wal,
1314 			     FF_SLICE_CS_CHICKEN2,
1315 			     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1316 	}
1317 
1318 	if (IS_GEN(i915, 9)) {
1319 		/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1320 		wa_masked_en(wal,
1321 			     GEN9_CSFE_CHICKEN1_RCS,
1322 			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1323 
1324 		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1325 		wa_write_or(wal,
1326 			    BDW_SCRATCH1,
1327 			    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1328 
1329 		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1330 		if (IS_GEN9_LP(i915))
1331 			wa_write_masked_or(wal,
1332 					   GEN8_L3SQCREG1,
1333 					   L3_PRIO_CREDITS_MASK,
1334 					   L3_GENERAL_PRIO_CREDITS(62) |
1335 					   L3_HIGH_PRIO_CREDITS(2));
1336 
1337 		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1338 		wa_write_or(wal,
1339 			    GEN8_L3SQCREG4,
1340 			    GEN8_LQSC_FLUSH_COHERENT_LINES);
1341 	}
1342 }
1343 
1344 static void
1345 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1346 {
1347 	struct drm_i915_private *i915 = engine->i915;
1348 
1349 	/* WaKBLVECSSemaphoreWaitPoll:kbl */
1350 	if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1351 		wa_write(wal,
1352 			 RING_SEMA_WAIT_POLL(engine->mmio_base),
1353 			 1);
1354 	}
1355 }
1356 
1357 static void
1358 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1359 {
1360 	if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1361 		return;
1362 
1363 	if (engine->id == RCS0)
1364 		rcs_engine_wa_init(engine, wal);
1365 	else
1366 		xcs_engine_wa_init(engine, wal);
1367 }
1368 
1369 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1370 {
1371 	struct i915_wa_list *wal = &engine->wa_list;
1372 
1373 	if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
1374 		return;
1375 
1376 	wa_init_start(wal, engine->name);
1377 	engine_init_workarounds(engine, wal);
1378 	wa_init_finish(wal);
1379 }
1380 
1381 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1382 {
1383 	wa_list_apply(engine->uncore, &engine->wa_list);
1384 }
1385 
1386 static struct i915_vma *
1387 create_scratch(struct i915_address_space *vm, int count)
1388 {
1389 	struct drm_i915_gem_object *obj;
1390 	struct i915_vma *vma;
1391 	unsigned int size;
1392 	int err;
1393 
1394 	size = round_up(count * sizeof(u32), PAGE_SIZE);
1395 	obj = i915_gem_object_create_internal(vm->i915, size);
1396 	if (IS_ERR(obj))
1397 		return ERR_CAST(obj);
1398 
1399 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
1400 
1401 	vma = i915_vma_instance(obj, vm, NULL);
1402 	if (IS_ERR(vma)) {
1403 		err = PTR_ERR(vma);
1404 		goto err_obj;
1405 	}
1406 
1407 	err = i915_vma_pin(vma, 0, 0,
1408 			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
1409 	if (err)
1410 		goto err_obj;
1411 
1412 	return vma;
1413 
1414 err_obj:
1415 	i915_gem_object_put(obj);
1416 	return ERR_PTR(err);
1417 }
1418 
1419 static int
1420 wa_list_srm(struct i915_request *rq,
1421 	    const struct i915_wa_list *wal,
1422 	    struct i915_vma *vma)
1423 {
1424 	const struct i915_wa *wa;
1425 	unsigned int i;
1426 	u32 srm, *cs;
1427 
1428 	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1429 	if (INTEL_GEN(rq->i915) >= 8)
1430 		srm++;
1431 
1432 	cs = intel_ring_begin(rq, 4 * wal->count);
1433 	if (IS_ERR(cs))
1434 		return PTR_ERR(cs);
1435 
1436 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1437 		*cs++ = srm;
1438 		*cs++ = i915_mmio_reg_offset(wa->reg);
1439 		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
1440 		*cs++ = 0;
1441 	}
1442 	intel_ring_advance(rq, cs);
1443 
1444 	return 0;
1445 }
1446 
1447 static int engine_wa_list_verify(struct intel_context *ce,
1448 				 const struct i915_wa_list * const wal,
1449 				 const char *from)
1450 {
1451 	const struct i915_wa *wa;
1452 	struct i915_request *rq;
1453 	struct i915_vma *vma;
1454 	unsigned int i;
1455 	u32 *results;
1456 	int err;
1457 
1458 	if (!wal->count)
1459 		return 0;
1460 
1461 	vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count);
1462 	if (IS_ERR(vma))
1463 		return PTR_ERR(vma);
1464 
1465 	rq = intel_context_create_request(ce);
1466 	if (IS_ERR(rq)) {
1467 		err = PTR_ERR(rq);
1468 		goto err_vma;
1469 	}
1470 
1471 	err = wa_list_srm(rq, wal, vma);
1472 	if (err)
1473 		goto err_vma;
1474 
1475 	i915_request_add(rq);
1476 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1477 		err = -ETIME;
1478 		goto err_vma;
1479 	}
1480 
1481 	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1482 	if (IS_ERR(results)) {
1483 		err = PTR_ERR(results);
1484 		goto err_vma;
1485 	}
1486 
1487 	err = 0;
1488 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1489 		if (!wa_verify(wa, results[i], wal->name, from))
1490 			err = -ENXIO;
1491 
1492 	i915_gem_object_unpin_map(vma->obj);
1493 
1494 err_vma:
1495 	i915_vma_unpin(vma);
1496 	i915_vma_put(vma);
1497 	return err;
1498 }
1499 
1500 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
1501 				    const char *from)
1502 {
1503 	return engine_wa_list_verify(engine->kernel_context,
1504 				     &engine->wa_list,
1505 				     from);
1506 }
1507 
1508 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1509 #include "selftest_workarounds.c"
1510 #endif
1511