1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2018 Intel Corporation 5 */ 6 7 #include "i915_drv.h" 8 #include "intel_context.h" 9 #include "intel_engine_pm.h" 10 #include "intel_gt.h" 11 #include "intel_ring.h" 12 #include "intel_workarounds.h" 13 14 /** 15 * DOC: Hardware workarounds 16 * 17 * This file is intended as a central place to implement most [1]_ of the 18 * required workarounds for hardware to work as originally intended. They fall 19 * in five basic categories depending on how/when they are applied: 20 * 21 * - Workarounds that touch registers that are saved/restored to/from the HW 22 * context image. The list is emitted (via Load Register Immediate commands) 23 * everytime a new context is created. 24 * - GT workarounds. The list of these WAs is applied whenever these registers 25 * revert to default values (on GPU reset, suspend/resume [2]_, etc..). 26 * - Display workarounds. The list is applied during display clock-gating 27 * initialization. 28 * - Workarounds that whitelist a privileged register, so that UMDs can manage 29 * them directly. This is just a special case of a MMMIO workaround (as we 30 * write the list of these to/be-whitelisted registers to some special HW 31 * registers). 32 * - Workaround batchbuffers, that get executed automatically by the hardware 33 * on every HW context restore. 34 * 35 * .. [1] Please notice that there are other WAs that, due to their nature, 36 * cannot be applied from a central place. Those are peppered around the rest 37 * of the code, as needed. 38 * 39 * .. [2] Technically, some registers are powercontext saved & restored, so they 40 * survive a suspend/resume. In practice, writing them again is not too 41 * costly and simplifies things. We can revisit this in the future. 42 * 43 * Layout 44 * ~~~~~~ 45 * 46 * Keep things in this file ordered by WA type, as per the above (context, GT, 47 * display, register whitelist, batchbuffer). Then, inside each type, keep the 48 * following order: 49 * 50 * - Infrastructure functions and macros 51 * - WAs per platform in standard gen/chrono order 52 * - Public functions to init or apply the given workaround type. 53 */ 54 55 static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name) 56 { 57 wal->name = name; 58 wal->engine_name = engine_name; 59 } 60 61 #define WA_LIST_CHUNK (1 << 4) 62 63 static void wa_init_finish(struct i915_wa_list *wal) 64 { 65 /* Trim unused entries. */ 66 if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) { 67 struct i915_wa *list = kmemdup(wal->list, 68 wal->count * sizeof(*list), 69 GFP_KERNEL); 70 71 if (list) { 72 kfree(wal->list); 73 wal->list = list; 74 } 75 } 76 77 if (!wal->count) 78 return; 79 80 DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n", 81 wal->wa_count, wal->name, wal->engine_name); 82 } 83 84 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) 85 { 86 unsigned int addr = i915_mmio_reg_offset(wa->reg); 87 unsigned int start = 0, end = wal->count; 88 const unsigned int grow = WA_LIST_CHUNK; 89 struct i915_wa *wa_; 90 91 GEM_BUG_ON(!is_power_of_2(grow)); 92 93 if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ 94 struct i915_wa *list; 95 96 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), 97 GFP_KERNEL); 98 if (!list) { 99 DRM_ERROR("No space for workaround init!\n"); 100 return; 101 } 102 103 if (wal->list) 104 memcpy(list, wal->list, sizeof(*wa) * wal->count); 105 106 wal->list = list; 107 } 108 109 while (start < end) { 110 unsigned int mid = start + (end - start) / 2; 111 112 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) { 113 start = mid + 1; 114 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) { 115 end = mid; 116 } else { 117 wa_ = &wal->list[mid]; 118 119 if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) { 120 DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n", 121 i915_mmio_reg_offset(wa_->reg), 122 wa_->clr, wa_->set); 123 124 wa_->set &= ~wa->clr; 125 } 126 127 wal->wa_count++; 128 wa_->set |= wa->set; 129 wa_->clr |= wa->clr; 130 wa_->read |= wa->read; 131 return; 132 } 133 } 134 135 wal->wa_count++; 136 wa_ = &wal->list[wal->count++]; 137 *wa_ = *wa; 138 139 while (wa_-- > wal->list) { 140 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) == 141 i915_mmio_reg_offset(wa_[1].reg)); 142 if (i915_mmio_reg_offset(wa_[1].reg) > 143 i915_mmio_reg_offset(wa_[0].reg)) 144 break; 145 146 swap(wa_[1], wa_[0]); 147 } 148 } 149 150 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, 151 u32 clear, u32 set, u32 read_mask) 152 { 153 struct i915_wa wa = { 154 .reg = reg, 155 .clr = clear, 156 .set = set, 157 .read = read_mask, 158 }; 159 160 _wa_add(wal, &wa); 161 } 162 163 static void 164 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) 165 { 166 wa_add(wal, reg, clear, set, clear); 167 } 168 169 static void 170 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set) 171 { 172 wa_write_masked_or(wal, reg, ~0, set); 173 } 174 175 static void 176 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) 177 { 178 wa_write_masked_or(wal, reg, set, set); 179 } 180 181 static void 182 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) 183 { 184 wa_write_masked_or(wal, reg, clr, 0); 185 } 186 187 static void 188 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 189 { 190 wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val); 191 } 192 193 static void 194 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 195 { 196 wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val); 197 } 198 199 #define WA_SET_BIT_MASKED(addr, mask) \ 200 wa_masked_en(wal, (addr), (mask)) 201 202 #define WA_CLR_BIT_MASKED(addr, mask) \ 203 wa_masked_dis(wal, (addr), (mask)) 204 205 #define WA_SET_FIELD_MASKED(addr, mask, value) \ 206 wa_write_masked_or(wal, (addr), 0, _MASKED_FIELD((mask), (value))) 207 208 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, 209 struct i915_wa_list *wal) 210 { 211 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 212 } 213 214 static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine, 215 struct i915_wa_list *wal) 216 { 217 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 218 } 219 220 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, 221 struct i915_wa_list *wal) 222 { 223 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 224 225 /* WaDisableAsyncFlipPerfMode:bdw,chv */ 226 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 227 228 /* WaDisablePartialInstShootdown:bdw,chv */ 229 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 230 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 231 232 /* Use Force Non-Coherent whenever executing a 3D context. This is a 233 * workaround for for a possible hang in the unlikely event a TLB 234 * invalidation occurs during a PSD flush. 235 */ 236 /* WaForceEnableNonCoherent:bdw,chv */ 237 /* WaHdcDisableFetchWhenMasked:bdw,chv */ 238 WA_SET_BIT_MASKED(HDC_CHICKEN0, 239 HDC_DONOT_FETCH_MEM_WHEN_MASKED | 240 HDC_FORCE_NON_COHERENT); 241 242 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 243 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 244 * polygons in the same 8x4 pixel/sample area to be processed without 245 * stalling waiting for the earlier ones to write to Hierarchical Z 246 * buffer." 247 * 248 * This optimization is off by default for BDW and CHV; turn it on. 249 */ 250 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 251 252 /* Wa4x4STCOptimizationDisable:bdw,chv */ 253 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); 254 255 /* 256 * BSpec recommends 8x4 when MSAA is used, 257 * however in practice 16x4 seems fastest. 258 * 259 * Note that PS/WM thread counts depend on the WIZ hashing 260 * disable bit, which we don't touch here, but it's good 261 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 262 */ 263 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 264 GEN6_WIZ_HASHING_MASK, 265 GEN6_WIZ_HASHING_16x4); 266 } 267 268 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, 269 struct i915_wa_list *wal) 270 { 271 struct drm_i915_private *i915 = engine->i915; 272 273 gen8_ctx_workarounds_init(engine, wal); 274 275 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 276 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 277 278 /* WaDisableDopClockGating:bdw 279 * 280 * Also see the related UCGTCL1 write in bdw_init_clock_gating() 281 * to disable EUTC clock gating. 282 */ 283 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 284 DOP_CLOCK_GATING_DISABLE); 285 286 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 287 GEN8_SAMPLER_POWER_BYPASS_DIS); 288 289 WA_SET_BIT_MASKED(HDC_CHICKEN0, 290 /* WaForceContextSaveRestoreNonCoherent:bdw */ 291 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 292 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 293 (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); 294 } 295 296 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, 297 struct i915_wa_list *wal) 298 { 299 gen8_ctx_workarounds_init(engine, wal); 300 301 /* WaDisableThreadStallDopClockGating:chv */ 302 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 303 304 /* Improve HiZ throughput on CHV. */ 305 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 306 } 307 308 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, 309 struct i915_wa_list *wal) 310 { 311 struct drm_i915_private *i915 = engine->i915; 312 313 if (HAS_LLC(i915)) { 314 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 315 * 316 * Must match Display Engine. See 317 * WaCompressedResourceDisplayNewHashMode. 318 */ 319 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 320 GEN9_PBE_COMPRESSED_HASH_SELECTION); 321 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 322 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); 323 } 324 325 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ 326 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ 327 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 328 FLOW_CONTROL_ENABLE | 329 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 330 331 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ 332 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ 333 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 334 GEN9_ENABLE_YV12_BUGFIX | 335 GEN9_ENABLE_GPGPU_PREEMPTION); 336 337 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ 338 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ 339 WA_SET_BIT_MASKED(CACHE_MODE_1, 340 GEN8_4x4_STC_OPTIMIZATION_DISABLE | 341 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); 342 343 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ 344 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 345 GEN9_CCS_TLB_PREFETCH_ENABLE); 346 347 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ 348 WA_SET_BIT_MASKED(HDC_CHICKEN0, 349 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 350 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); 351 352 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are 353 * both tied to WaForceContextSaveRestoreNonCoherent 354 * in some hsds for skl. We keep the tie for all gen9. The 355 * documentation is a bit hazy and so we want to get common behaviour, 356 * even though there is no clear evidence we would need both on kbl/bxt. 357 * This area has been source of system hangs so we play it safe 358 * and mimic the skl regardless of what bspec says. 359 * 360 * Use Force Non-Coherent whenever executing a 3D context. This 361 * is a workaround for a possible hang in the unlikely event 362 * a TLB invalidation occurs during a PSD flush. 363 */ 364 365 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ 366 WA_SET_BIT_MASKED(HDC_CHICKEN0, 367 HDC_FORCE_NON_COHERENT); 368 369 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ 370 if (IS_SKYLAKE(i915) || 371 IS_KABYLAKE(i915) || 372 IS_COFFEELAKE(i915) || 373 IS_COMETLAKE(i915)) 374 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 375 GEN8_SAMPLER_POWER_BYPASS_DIS); 376 377 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ 378 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); 379 380 /* 381 * Supporting preemption with fine-granularity requires changes in the 382 * batch buffer programming. Since we can't break old userspace, we 383 * need to set our default preemption level to safe value. Userspace is 384 * still able to use more fine-grained preemption levels, since in 385 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the 386 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are 387 * not real HW workarounds, but merely a way to start using preemption 388 * while maintaining old contract with userspace. 389 */ 390 391 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ 392 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 393 394 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ 395 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 396 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 397 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 398 399 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ 400 if (IS_GEN9_LP(i915)) 401 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); 402 } 403 404 static void skl_tune_iz_hashing(struct intel_engine_cs *engine, 405 struct i915_wa_list *wal) 406 { 407 struct intel_gt *gt = engine->gt; 408 u8 vals[3] = { 0, 0, 0 }; 409 unsigned int i; 410 411 for (i = 0; i < 3; i++) { 412 u8 ss; 413 414 /* 415 * Only consider slices where one, and only one, subslice has 7 416 * EUs 417 */ 418 if (!is_power_of_2(gt->info.sseu.subslice_7eu[i])) 419 continue; 420 421 /* 422 * subslice_7eu[i] != 0 (because of the check above) and 423 * ss_max == 4 (maximum number of subslices possible per slice) 424 * 425 * -> 0 <= ss <= 3; 426 */ 427 ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1; 428 vals[i] = 3 - ss; 429 } 430 431 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) 432 return; 433 434 /* Tune IZ hashing. See intel_device_info_runtime_init() */ 435 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 436 GEN9_IZ_HASHING_MASK(2) | 437 GEN9_IZ_HASHING_MASK(1) | 438 GEN9_IZ_HASHING_MASK(0), 439 GEN9_IZ_HASHING(2, vals[2]) | 440 GEN9_IZ_HASHING(1, vals[1]) | 441 GEN9_IZ_HASHING(0, vals[0])); 442 } 443 444 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine, 445 struct i915_wa_list *wal) 446 { 447 gen9_ctx_workarounds_init(engine, wal); 448 skl_tune_iz_hashing(engine, wal); 449 } 450 451 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine, 452 struct i915_wa_list *wal) 453 { 454 gen9_ctx_workarounds_init(engine, wal); 455 456 /* WaDisableThreadStallDopClockGating:bxt */ 457 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 458 STALL_DOP_GATING_DISABLE); 459 460 /* WaToEnableHwFixForPushConstHWBug:bxt */ 461 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 462 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 463 } 464 465 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, 466 struct i915_wa_list *wal) 467 { 468 struct drm_i915_private *i915 = engine->i915; 469 470 gen9_ctx_workarounds_init(engine, wal); 471 472 /* WaToEnableHwFixForPushConstHWBug:kbl */ 473 if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) 474 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 475 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 476 477 /* WaDisableSbeCacheDispatchPortSharing:kbl */ 478 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, 479 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 480 } 481 482 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, 483 struct i915_wa_list *wal) 484 { 485 gen9_ctx_workarounds_init(engine, wal); 486 487 /* WaToEnableHwFixForPushConstHWBug:glk */ 488 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 489 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 490 } 491 492 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, 493 struct i915_wa_list *wal) 494 { 495 gen9_ctx_workarounds_init(engine, wal); 496 497 /* WaToEnableHwFixForPushConstHWBug:cfl */ 498 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 499 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 500 501 /* WaDisableSbeCacheDispatchPortSharing:cfl */ 502 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, 503 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 504 } 505 506 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, 507 struct i915_wa_list *wal) 508 { 509 /* WaForceContextSaveRestoreNonCoherent:cnl */ 510 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, 511 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); 512 513 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ 514 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 515 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 516 517 /* WaPushConstantDereferenceHoldDisable:cnl */ 518 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); 519 520 /* FtrEnableFastAnisoL1BankingFix:cnl */ 521 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); 522 523 /* WaDisable3DMidCmdPreemption:cnl */ 524 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 525 526 /* WaDisableGPGPUMidCmdPreemption:cnl */ 527 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 528 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 529 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 530 531 /* WaDisableEarlyEOT:cnl */ 532 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); 533 } 534 535 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, 536 struct i915_wa_list *wal) 537 { 538 struct drm_i915_private *i915 = engine->i915; 539 540 /* WaDisableBankHangMode:icl */ 541 wa_write(wal, 542 GEN8_L3CNTLREG, 543 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | 544 GEN8_ERRDETBCTRL); 545 546 /* Wa_1604370585:icl (pre-prod) 547 * Formerly known as WaPushConstantDereferenceHoldDisable 548 */ 549 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 550 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 551 PUSH_CONSTANT_DEREF_DISABLE); 552 553 /* WaForceEnableNonCoherent:icl 554 * This is not the same workaround as in early Gen9 platforms, where 555 * lacking this could cause system hangs, but coherency performance 556 * overhead is high and only a few compute workloads really need it 557 * (the register is whitelisted in hardware now, so UMDs can opt in 558 * for coherency if they have a good reason). 559 */ 560 WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); 561 562 /* Wa_2006611047:icl (pre-prod) 563 * Formerly known as WaDisableImprovedTdlClkGating 564 */ 565 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 566 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 567 GEN11_TDL_CLOCK_GATING_FIX_DISABLE); 568 569 /* Wa_2006665173:icl (pre-prod) */ 570 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 571 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, 572 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); 573 574 /* WaEnableFloatBlendOptimization:icl */ 575 wa_write_masked_or(wal, 576 GEN10_CACHE_MODE_SS, 577 0, /* write-only, so skip validation */ 578 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); 579 580 /* WaDisableGPGPUMidThreadPreemption:icl */ 581 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 582 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 583 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); 584 585 /* allow headerless messages for preemptible GPGPU context */ 586 WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE, 587 GEN11_SAMPLER_ENABLE_HEADLESS_MSG); 588 589 /* Wa_1604278689:icl,ehl */ 590 wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID); 591 wa_write_masked_or(wal, IVB_FBC_RT_BASE_UPPER, 592 0, /* write-only register; skip validation */ 593 0xFFFFFFFF); 594 595 /* Wa_1406306137:icl,ehl */ 596 wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); 597 } 598 599 static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, 600 struct i915_wa_list *wal) 601 { 602 /* 603 * Wa_1409142259:tgl 604 * Wa_1409347922:tgl 605 * Wa_1409252684:tgl 606 * Wa_1409217633:tgl 607 * Wa_1409207793:tgl 608 * Wa_1409178076:tgl 609 * Wa_1408979724:tgl 610 */ 611 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, 612 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); 613 614 /* 615 * Wa_1604555607:gen12 and Wa_1608008084:gen12 616 * FF_MODE2 register will return the wrong value when read. The default 617 * value for this register is zero for all fields and there are no bit 618 * masks. So instead of doing a RMW we should just write the GS Timer 619 * and TDS timer values for Wa_1604555607 and Wa_16011163337. 620 */ 621 wa_add(wal, 622 FF_MODE2, 623 FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK, 624 FF_MODE2_GS_TIMER_224 | FF_MODE2_TDS_TIMER_128, 625 0); 626 627 /* WaDisableGPGPUMidThreadPreemption:tgl */ 628 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 629 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 630 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); 631 } 632 633 static void 634 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, 635 struct i915_wa_list *wal, 636 const char *name) 637 { 638 struct drm_i915_private *i915 = engine->i915; 639 640 if (engine->class != RENDER_CLASS) 641 return; 642 643 wa_init_start(wal, name, engine->name); 644 645 if (IS_GEN(i915, 12)) 646 tgl_ctx_workarounds_init(engine, wal); 647 else if (IS_GEN(i915, 11)) 648 icl_ctx_workarounds_init(engine, wal); 649 else if (IS_CANNONLAKE(i915)) 650 cnl_ctx_workarounds_init(engine, wal); 651 else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) 652 cfl_ctx_workarounds_init(engine, wal); 653 else if (IS_GEMINILAKE(i915)) 654 glk_ctx_workarounds_init(engine, wal); 655 else if (IS_KABYLAKE(i915)) 656 kbl_ctx_workarounds_init(engine, wal); 657 else if (IS_BROXTON(i915)) 658 bxt_ctx_workarounds_init(engine, wal); 659 else if (IS_SKYLAKE(i915)) 660 skl_ctx_workarounds_init(engine, wal); 661 else if (IS_CHERRYVIEW(i915)) 662 chv_ctx_workarounds_init(engine, wal); 663 else if (IS_BROADWELL(i915)) 664 bdw_ctx_workarounds_init(engine, wal); 665 else if (IS_GEN(i915, 7)) 666 gen7_ctx_workarounds_init(engine, wal); 667 else if (IS_GEN(i915, 6)) 668 gen6_ctx_workarounds_init(engine, wal); 669 else if (INTEL_GEN(i915) < 8) 670 return; 671 else 672 MISSING_CASE(INTEL_GEN(i915)); 673 674 wa_init_finish(wal); 675 } 676 677 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) 678 { 679 __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context"); 680 } 681 682 int intel_engine_emit_ctx_wa(struct i915_request *rq) 683 { 684 struct i915_wa_list *wal = &rq->engine->ctx_wa_list; 685 struct i915_wa *wa; 686 unsigned int i; 687 u32 *cs; 688 int ret; 689 690 if (wal->count == 0) 691 return 0; 692 693 ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 694 if (ret) 695 return ret; 696 697 cs = intel_ring_begin(rq, (wal->count * 2 + 2)); 698 if (IS_ERR(cs)) 699 return PTR_ERR(cs); 700 701 *cs++ = MI_LOAD_REGISTER_IMM(wal->count); 702 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 703 *cs++ = i915_mmio_reg_offset(wa->reg); 704 *cs++ = wa->set; 705 } 706 *cs++ = MI_NOOP; 707 708 intel_ring_advance(rq, cs); 709 710 ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 711 if (ret) 712 return ret; 713 714 return 0; 715 } 716 717 static void 718 gen4_gt_workarounds_init(struct drm_i915_private *i915, 719 struct i915_wa_list *wal) 720 { 721 /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */ 722 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); 723 } 724 725 static void 726 g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 727 { 728 gen4_gt_workarounds_init(i915, wal); 729 730 /* WaDisableRenderCachePipelinedFlush:g4x,ilk */ 731 wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); 732 } 733 734 static void 735 ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 736 { 737 g4x_gt_workarounds_init(i915, wal); 738 739 wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); 740 } 741 742 static void 743 snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 744 { 745 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ 746 wa_masked_en(wal, 747 _3D_CHICKEN, 748 _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB); 749 750 /* WaDisable_RenderCache_OperationalFlush:snb */ 751 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); 752 753 /* 754 * BSpec recommends 8x4 when MSAA is used, 755 * however in practice 16x4 seems fastest. 756 * 757 * Note that PS/WM thread counts depend on the WIZ hashing 758 * disable bit, which we don't touch here, but it's good 759 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 760 */ 761 wa_add(wal, 762 GEN6_GT_MODE, 0, 763 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), 764 GEN6_WIZ_HASHING_16x4); 765 766 wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB); 767 768 wa_masked_en(wal, 769 _3D_CHICKEN3, 770 /* WaStripsFansDisableFastClipPerformanceFix:snb */ 771 _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL | 772 /* 773 * Bspec says: 774 * "This bit must be set if 3DSTATE_CLIP clip mode is set 775 * to normal and 3DSTATE_SF number of SF output attributes 776 * is more than 16." 777 */ 778 _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH); 779 } 780 781 static void 782 ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 783 { 784 /* WaDisableEarlyCull:ivb */ 785 wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); 786 787 /* WaDisablePSDDualDispatchEnable:ivb */ 788 if (IS_IVB_GT1(i915)) 789 wa_masked_en(wal, 790 GEN7_HALF_SLICE_CHICKEN1, 791 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); 792 793 /* WaDisable_RenderCache_OperationalFlush:ivb */ 794 wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); 795 796 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ 797 wa_masked_dis(wal, 798 GEN7_COMMON_SLICE_CHICKEN1, 799 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); 800 801 /* WaApplyL3ControlAndL3ChickenMode:ivb */ 802 wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL); 803 wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); 804 805 /* WaForceL3Serialization:ivb */ 806 wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); 807 808 /* 809 * WaVSThreadDispatchOverride:ivb,vlv 810 * 811 * This actually overrides the dispatch 812 * mode for all thread types. 813 */ 814 wa_write_masked_or(wal, GEN7_FF_THREAD_MODE, 815 GEN7_FF_SCHED_MASK, 816 GEN7_FF_TS_SCHED_HW | 817 GEN7_FF_VS_SCHED_HW | 818 GEN7_FF_DS_SCHED_HW); 819 820 if (0) { /* causes HiZ corruption on ivb:gt1 */ 821 /* enable HiZ Raw Stall Optimization */ 822 wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 823 } 824 825 /* WaDisable4x2SubspanOptimization:ivb */ 826 wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); 827 828 /* 829 * BSpec recommends 8x4 when MSAA is used, 830 * however in practice 16x4 seems fastest. 831 * 832 * Note that PS/WM thread counts depend on the WIZ hashing 833 * disable bit, which we don't touch here, but it's good 834 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 835 */ 836 wa_add(wal, GEN7_GT_MODE, 0, 837 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), 838 GEN6_WIZ_HASHING_16x4); 839 } 840 841 static void 842 vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 843 { 844 /* WaDisableEarlyCull:vlv */ 845 wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); 846 847 /* WaPsdDispatchEnable:vlv */ 848 /* WaDisablePSDDualDispatchEnable:vlv */ 849 wa_masked_en(wal, 850 GEN7_HALF_SLICE_CHICKEN1, 851 GEN7_MAX_PS_THREAD_DEP | 852 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); 853 854 /* WaDisable_RenderCache_OperationalFlush:vlv */ 855 wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); 856 857 /* WaForceL3Serialization:vlv */ 858 wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); 859 860 /* 861 * WaVSThreadDispatchOverride:ivb,vlv 862 * 863 * This actually overrides the dispatch 864 * mode for all thread types. 865 */ 866 wa_write_masked_or(wal, 867 GEN7_FF_THREAD_MODE, 868 GEN7_FF_SCHED_MASK, 869 GEN7_FF_TS_SCHED_HW | 870 GEN7_FF_VS_SCHED_HW | 871 GEN7_FF_DS_SCHED_HW); 872 873 /* 874 * BSpec says this must be set, even though 875 * WaDisable4x2SubspanOptimization isn't listed for VLV. 876 */ 877 wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); 878 879 /* 880 * BSpec recommends 8x4 when MSAA is used, 881 * however in practice 16x4 seems fastest. 882 * 883 * Note that PS/WM thread counts depend on the WIZ hashing 884 * disable bit, which we don't touch here, but it's good 885 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 886 */ 887 wa_add(wal, GEN7_GT_MODE, 0, 888 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), 889 GEN6_WIZ_HASHING_16x4); 890 891 /* 892 * WaIncreaseL3CreditsForVLVB0:vlv 893 * This is the hardware default actually. 894 */ 895 wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); 896 } 897 898 static void 899 hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 900 { 901 /* L3 caching of data atomics doesn't work -- disable it. */ 902 wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); 903 904 wa_add(wal, 905 HSW_ROW_CHICKEN3, 0, 906 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), 907 0 /* XXX does this reg exist? */); 908 909 /* WaVSRefCountFullforceMissDisable:hsw */ 910 wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); 911 912 wa_masked_dis(wal, 913 CACHE_MODE_0_GEN7, 914 /* WaDisable_RenderCache_OperationalFlush:hsw */ 915 RC_OP_FLUSH_ENABLE | 916 /* enable HiZ Raw Stall Optimization */ 917 HIZ_RAW_STALL_OPT_DISABLE); 918 919 /* WaDisable4x2SubspanOptimization:hsw */ 920 wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); 921 922 /* 923 * BSpec recommends 8x4 when MSAA is used, 924 * however in practice 16x4 seems fastest. 925 * 926 * Note that PS/WM thread counts depend on the WIZ hashing 927 * disable bit, which we don't touch here, but it's good 928 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 929 */ 930 wa_add(wal, GEN7_GT_MODE, 0, 931 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), 932 GEN6_WIZ_HASHING_16x4); 933 934 /* WaSampleCChickenBitEnable:hsw */ 935 wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); 936 } 937 938 static void 939 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 940 { 941 /* WaDisableKillLogic:bxt,skl,kbl */ 942 if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915)) 943 wa_write_or(wal, 944 GAM_ECOCHK, 945 ECOCHK_DIS_TLB); 946 947 if (HAS_LLC(i915)) { 948 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 949 * 950 * Must match Display Engine. See 951 * WaCompressedResourceDisplayNewHashMode. 952 */ 953 wa_write_or(wal, 954 MMCD_MISC_CTRL, 955 MMCD_PCLA | MMCD_HOTSPOT_EN); 956 } 957 958 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ 959 wa_write_or(wal, 960 GAM_ECOCHK, 961 BDW_DISABLE_HDC_INVALIDATION); 962 } 963 964 static void 965 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 966 { 967 gen9_gt_workarounds_init(i915, wal); 968 969 /* WaDisableGafsUnitClkGating:skl */ 970 wa_write_or(wal, 971 GEN7_UCGCTL4, 972 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 973 974 /* WaInPlaceDecompressionHang:skl */ 975 if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) 976 wa_write_or(wal, 977 GEN9_GAMT_ECO_REG_RW_IA, 978 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 979 } 980 981 static void 982 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 983 { 984 gen9_gt_workarounds_init(i915, wal); 985 986 /* WaInPlaceDecompressionHang:bxt */ 987 wa_write_or(wal, 988 GEN9_GAMT_ECO_REG_RW_IA, 989 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 990 } 991 992 static void 993 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 994 { 995 gen9_gt_workarounds_init(i915, wal); 996 997 /* WaDisableDynamicCreditSharing:kbl */ 998 if (IS_KBL_REVID(i915, 0, KBL_REVID_B0)) 999 wa_write_or(wal, 1000 GAMT_CHKN_BIT_REG, 1001 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); 1002 1003 /* WaDisableGafsUnitClkGating:kbl */ 1004 wa_write_or(wal, 1005 GEN7_UCGCTL4, 1006 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 1007 1008 /* WaInPlaceDecompressionHang:kbl */ 1009 wa_write_or(wal, 1010 GEN9_GAMT_ECO_REG_RW_IA, 1011 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1012 } 1013 1014 static void 1015 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 1016 { 1017 gen9_gt_workarounds_init(i915, wal); 1018 } 1019 1020 static void 1021 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 1022 { 1023 gen9_gt_workarounds_init(i915, wal); 1024 1025 /* WaDisableGafsUnitClkGating:cfl */ 1026 wa_write_or(wal, 1027 GEN7_UCGCTL4, 1028 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 1029 1030 /* WaInPlaceDecompressionHang:cfl */ 1031 wa_write_or(wal, 1032 GEN9_GAMT_ECO_REG_RW_IA, 1033 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1034 } 1035 1036 static void 1037 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) 1038 { 1039 const struct sseu_dev_info *sseu = &i915->gt.info.sseu; 1040 unsigned int slice, subslice; 1041 u32 l3_en, mcr, mcr_mask; 1042 1043 GEM_BUG_ON(INTEL_GEN(i915) < 10); 1044 1045 /* 1046 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl 1047 * L3Banks could be fused off in single slice scenario. If that is 1048 * the case, we might need to program MCR select to a valid L3Bank 1049 * by default, to make sure we correctly read certain registers 1050 * later on (in the range 0xB100 - 0xB3FF). 1051 * 1052 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl 1053 * Before any MMIO read into slice/subslice specific registers, MCR 1054 * packet control register needs to be programmed to point to any 1055 * enabled s/ss pair. Otherwise, incorrect values will be returned. 1056 * This means each subsequent MMIO read will be forwarded to an 1057 * specific s/ss combination, but this is OK since these registers 1058 * are consistent across s/ss in almost all cases. In the rare 1059 * occasions, such as INSTDONE, where this value is dependent 1060 * on s/ss combo, the read should be done with read_subslice_reg. 1061 * 1062 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both 1063 * to which subslice, or to which L3 bank, the respective mmio reads 1064 * will go, we have to find a common index which works for both 1065 * accesses. 1066 * 1067 * Case where we cannot find a common index fortunately should not 1068 * happen in production hardware, so we only emit a warning instead of 1069 * implementing something more complex that requires checking the range 1070 * of every MMIO read. 1071 */ 1072 1073 if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { 1074 u32 l3_fuse = 1075 intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & 1076 GEN10_L3BANK_MASK; 1077 1078 drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse); 1079 l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse); 1080 } else { 1081 l3_en = ~0; 1082 } 1083 1084 slice = fls(sseu->slice_mask) - 1; 1085 subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); 1086 if (!subslice) { 1087 drm_warn(&i915->drm, 1088 "No common index found between subslice mask %x and L3 bank mask %x!\n", 1089 intel_sseu_get_subslices(sseu, slice), l3_en); 1090 subslice = fls(l3_en); 1091 drm_WARN_ON(&i915->drm, !subslice); 1092 } 1093 subslice--; 1094 1095 if (INTEL_GEN(i915) >= 11) { 1096 mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); 1097 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; 1098 } else { 1099 mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); 1100 mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; 1101 } 1102 1103 drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr); 1104 1105 wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); 1106 } 1107 1108 static void 1109 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 1110 { 1111 wa_init_mcr(i915, wal); 1112 1113 /* WaInPlaceDecompressionHang:cnl */ 1114 wa_write_or(wal, 1115 GEN9_GAMT_ECO_REG_RW_IA, 1116 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1117 } 1118 1119 static void 1120 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 1121 { 1122 wa_init_mcr(i915, wal); 1123 1124 /* WaInPlaceDecompressionHang:icl */ 1125 wa_write_or(wal, 1126 GEN9_GAMT_ECO_REG_RW_IA, 1127 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1128 1129 /* WaModifyGamTlbPartitioning:icl */ 1130 wa_write_masked_or(wal, 1131 GEN11_GACB_PERF_CTRL, 1132 GEN11_HASH_CTRL_MASK, 1133 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); 1134 1135 /* Wa_1405766107:icl 1136 * Formerly known as WaCL2SFHalfMaxAlloc 1137 */ 1138 wa_write_or(wal, 1139 GEN11_LSN_UNSLCVC, 1140 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | 1141 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); 1142 1143 /* Wa_220166154:icl 1144 * Formerly known as WaDisCtxReload 1145 */ 1146 wa_write_or(wal, 1147 GEN8_GAMW_ECO_DEV_RW_IA, 1148 GAMW_ECO_DEV_CTX_RELOAD_DISABLE); 1149 1150 /* Wa_1405779004:icl (pre-prod) */ 1151 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 1152 wa_write_or(wal, 1153 SLICE_UNIT_LEVEL_CLKGATE, 1154 MSCUNIT_CLKGATE_DIS); 1155 1156 /* Wa_1406838659:icl (pre-prod) */ 1157 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 1158 wa_write_or(wal, 1159 INF_UNIT_LEVEL_CLKGATE, 1160 CGPSF_CLKGATE_DIS); 1161 1162 /* Wa_1406463099:icl 1163 * Formerly known as WaGamTlbPendError 1164 */ 1165 wa_write_or(wal, 1166 GAMT_CHKN_BIT_REG, 1167 GAMT_CHKN_DISABLE_L3_COH_PIPE); 1168 1169 /* Wa_1607087056:icl,ehl,jsl */ 1170 if (IS_ICELAKE(i915) || 1171 IS_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) { 1172 wa_write_or(wal, 1173 SLICE_UNIT_LEVEL_CLKGATE, 1174 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); 1175 } 1176 } 1177 1178 static void 1179 tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 1180 { 1181 wa_init_mcr(i915, wal); 1182 1183 /* Wa_1409420604:tgl */ 1184 if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) 1185 wa_write_or(wal, 1186 SUBSLICE_UNIT_LEVEL_CLKGATE2, 1187 CPSSUNIT_CLKGATE_DIS); 1188 1189 /* Wa_1607087056:tgl also know as BUG:1409180338 */ 1190 if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) 1191 wa_write_or(wal, 1192 SLICE_UNIT_LEVEL_CLKGATE, 1193 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); 1194 } 1195 1196 static void 1197 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) 1198 { 1199 if (IS_GEN(i915, 12)) 1200 tgl_gt_workarounds_init(i915, wal); 1201 else if (IS_GEN(i915, 11)) 1202 icl_gt_workarounds_init(i915, wal); 1203 else if (IS_CANNONLAKE(i915)) 1204 cnl_gt_workarounds_init(i915, wal); 1205 else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) 1206 cfl_gt_workarounds_init(i915, wal); 1207 else if (IS_GEMINILAKE(i915)) 1208 glk_gt_workarounds_init(i915, wal); 1209 else if (IS_KABYLAKE(i915)) 1210 kbl_gt_workarounds_init(i915, wal); 1211 else if (IS_BROXTON(i915)) 1212 bxt_gt_workarounds_init(i915, wal); 1213 else if (IS_SKYLAKE(i915)) 1214 skl_gt_workarounds_init(i915, wal); 1215 else if (IS_HASWELL(i915)) 1216 hsw_gt_workarounds_init(i915, wal); 1217 else if (IS_VALLEYVIEW(i915)) 1218 vlv_gt_workarounds_init(i915, wal); 1219 else if (IS_IVYBRIDGE(i915)) 1220 ivb_gt_workarounds_init(i915, wal); 1221 else if (IS_GEN(i915, 6)) 1222 snb_gt_workarounds_init(i915, wal); 1223 else if (IS_GEN(i915, 5)) 1224 ilk_gt_workarounds_init(i915, wal); 1225 else if (IS_G4X(i915)) 1226 g4x_gt_workarounds_init(i915, wal); 1227 else if (IS_GEN(i915, 4)) 1228 gen4_gt_workarounds_init(i915, wal); 1229 else if (INTEL_GEN(i915) <= 8) 1230 return; 1231 else 1232 MISSING_CASE(INTEL_GEN(i915)); 1233 } 1234 1235 void intel_gt_init_workarounds(struct drm_i915_private *i915) 1236 { 1237 struct i915_wa_list *wal = &i915->gt_wa_list; 1238 1239 wa_init_start(wal, "GT", "global"); 1240 gt_init_workarounds(i915, wal); 1241 wa_init_finish(wal); 1242 } 1243 1244 static enum forcewake_domains 1245 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) 1246 { 1247 enum forcewake_domains fw = 0; 1248 struct i915_wa *wa; 1249 unsigned int i; 1250 1251 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1252 fw |= intel_uncore_forcewake_for_reg(uncore, 1253 wa->reg, 1254 FW_REG_READ | 1255 FW_REG_WRITE); 1256 1257 return fw; 1258 } 1259 1260 static bool 1261 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) 1262 { 1263 if ((cur ^ wa->set) & wa->read) { 1264 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x)\n", 1265 name, from, i915_mmio_reg_offset(wa->reg), 1266 cur, cur & wa->read, wa->set); 1267 1268 return false; 1269 } 1270 1271 return true; 1272 } 1273 1274 static void 1275 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) 1276 { 1277 enum forcewake_domains fw; 1278 unsigned long flags; 1279 struct i915_wa *wa; 1280 unsigned int i; 1281 1282 if (!wal->count) 1283 return; 1284 1285 fw = wal_get_fw_for_rmw(uncore, wal); 1286 1287 spin_lock_irqsave(&uncore->lock, flags); 1288 intel_uncore_forcewake_get__locked(uncore, fw); 1289 1290 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 1291 if (wa->clr) 1292 intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set); 1293 else 1294 intel_uncore_write_fw(uncore, wa->reg, wa->set); 1295 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 1296 wa_verify(wa, 1297 intel_uncore_read_fw(uncore, wa->reg), 1298 wal->name, "application"); 1299 } 1300 1301 intel_uncore_forcewake_put__locked(uncore, fw); 1302 spin_unlock_irqrestore(&uncore->lock, flags); 1303 } 1304 1305 void intel_gt_apply_workarounds(struct intel_gt *gt) 1306 { 1307 wa_list_apply(gt->uncore, >->i915->gt_wa_list); 1308 } 1309 1310 static bool wa_list_verify(struct intel_uncore *uncore, 1311 const struct i915_wa_list *wal, 1312 const char *from) 1313 { 1314 struct i915_wa *wa; 1315 unsigned int i; 1316 bool ok = true; 1317 1318 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1319 ok &= wa_verify(wa, 1320 intel_uncore_read(uncore, wa->reg), 1321 wal->name, from); 1322 1323 return ok; 1324 } 1325 1326 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) 1327 { 1328 return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from); 1329 } 1330 1331 static inline bool is_nonpriv_flags_valid(u32 flags) 1332 { 1333 /* Check only valid flag bits are set */ 1334 if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID) 1335 return false; 1336 1337 /* NB: Only 3 out of 4 enum values are valid for access field */ 1338 if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == 1339 RING_FORCE_TO_NONPRIV_ACCESS_INVALID) 1340 return false; 1341 1342 return true; 1343 } 1344 1345 static void 1346 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) 1347 { 1348 struct i915_wa wa = { 1349 .reg = reg 1350 }; 1351 1352 if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) 1353 return; 1354 1355 if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags))) 1356 return; 1357 1358 wa.reg.reg |= flags; 1359 _wa_add(wal, &wa); 1360 } 1361 1362 static void 1363 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) 1364 { 1365 whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); 1366 } 1367 1368 static void gen9_whitelist_build(struct i915_wa_list *w) 1369 { 1370 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ 1371 whitelist_reg(w, GEN9_CTX_PREEMPT_REG); 1372 1373 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ 1374 whitelist_reg(w, GEN8_CS_CHICKEN1); 1375 1376 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ 1377 whitelist_reg(w, GEN8_HDC_CHICKEN1); 1378 1379 /* WaSendPushConstantsFromMMIO:skl,bxt */ 1380 whitelist_reg(w, COMMON_SLICE_CHICKEN2); 1381 } 1382 1383 static void skl_whitelist_build(struct intel_engine_cs *engine) 1384 { 1385 struct i915_wa_list *w = &engine->whitelist; 1386 1387 if (engine->class != RENDER_CLASS) 1388 return; 1389 1390 gen9_whitelist_build(w); 1391 1392 /* WaDisableLSQCROPERFforOCL:skl */ 1393 whitelist_reg(w, GEN8_L3SQCREG4); 1394 } 1395 1396 static void bxt_whitelist_build(struct intel_engine_cs *engine) 1397 { 1398 if (engine->class != RENDER_CLASS) 1399 return; 1400 1401 gen9_whitelist_build(&engine->whitelist); 1402 } 1403 1404 static void kbl_whitelist_build(struct intel_engine_cs *engine) 1405 { 1406 struct i915_wa_list *w = &engine->whitelist; 1407 1408 if (engine->class != RENDER_CLASS) 1409 return; 1410 1411 gen9_whitelist_build(w); 1412 1413 /* WaDisableLSQCROPERFforOCL:kbl */ 1414 whitelist_reg(w, GEN8_L3SQCREG4); 1415 } 1416 1417 static void glk_whitelist_build(struct intel_engine_cs *engine) 1418 { 1419 struct i915_wa_list *w = &engine->whitelist; 1420 1421 if (engine->class != RENDER_CLASS) 1422 return; 1423 1424 gen9_whitelist_build(w); 1425 1426 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ 1427 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1428 } 1429 1430 static void cfl_whitelist_build(struct intel_engine_cs *engine) 1431 { 1432 struct i915_wa_list *w = &engine->whitelist; 1433 1434 if (engine->class != RENDER_CLASS) 1435 return; 1436 1437 gen9_whitelist_build(w); 1438 1439 /* 1440 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml 1441 * 1442 * This covers 4 register which are next to one another : 1443 * - PS_INVOCATION_COUNT 1444 * - PS_INVOCATION_COUNT_UDW 1445 * - PS_DEPTH_COUNT 1446 * - PS_DEPTH_COUNT_UDW 1447 */ 1448 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1449 RING_FORCE_TO_NONPRIV_ACCESS_RD | 1450 RING_FORCE_TO_NONPRIV_RANGE_4); 1451 } 1452 1453 static void cml_whitelist_build(struct intel_engine_cs *engine) 1454 { 1455 struct i915_wa_list *w = &engine->whitelist; 1456 1457 if (engine->class != RENDER_CLASS) 1458 whitelist_reg_ext(w, 1459 RING_CTX_TIMESTAMP(engine->mmio_base), 1460 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1461 1462 cfl_whitelist_build(engine); 1463 } 1464 1465 static void cnl_whitelist_build(struct intel_engine_cs *engine) 1466 { 1467 struct i915_wa_list *w = &engine->whitelist; 1468 1469 if (engine->class != RENDER_CLASS) 1470 return; 1471 1472 /* WaEnablePreemptionGranularityControlByUMD:cnl */ 1473 whitelist_reg(w, GEN8_CS_CHICKEN1); 1474 } 1475 1476 static void icl_whitelist_build(struct intel_engine_cs *engine) 1477 { 1478 struct i915_wa_list *w = &engine->whitelist; 1479 1480 switch (engine->class) { 1481 case RENDER_CLASS: 1482 /* WaAllowUMDToModifyHalfSliceChicken7:icl */ 1483 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); 1484 1485 /* WaAllowUMDToModifySamplerMode:icl */ 1486 whitelist_reg(w, GEN10_SAMPLER_MODE); 1487 1488 /* WaEnableStateCacheRedirectToCS:icl */ 1489 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1490 1491 /* 1492 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl 1493 * 1494 * This covers 4 register which are next to one another : 1495 * - PS_INVOCATION_COUNT 1496 * - PS_INVOCATION_COUNT_UDW 1497 * - PS_DEPTH_COUNT 1498 * - PS_DEPTH_COUNT_UDW 1499 */ 1500 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1501 RING_FORCE_TO_NONPRIV_ACCESS_RD | 1502 RING_FORCE_TO_NONPRIV_RANGE_4); 1503 break; 1504 1505 case VIDEO_DECODE_CLASS: 1506 /* hucStatusRegOffset */ 1507 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base), 1508 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1509 /* hucUKernelHdrInfoRegOffset */ 1510 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base), 1511 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1512 /* hucStatus2RegOffset */ 1513 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), 1514 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1515 whitelist_reg_ext(w, 1516 RING_CTX_TIMESTAMP(engine->mmio_base), 1517 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1518 break; 1519 1520 default: 1521 whitelist_reg_ext(w, 1522 RING_CTX_TIMESTAMP(engine->mmio_base), 1523 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1524 break; 1525 } 1526 } 1527 1528 static void tgl_whitelist_build(struct intel_engine_cs *engine) 1529 { 1530 struct i915_wa_list *w = &engine->whitelist; 1531 1532 switch (engine->class) { 1533 case RENDER_CLASS: 1534 /* 1535 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl 1536 * Wa_1408556865:tgl 1537 * 1538 * This covers 4 registers which are next to one another : 1539 * - PS_INVOCATION_COUNT 1540 * - PS_INVOCATION_COUNT_UDW 1541 * - PS_DEPTH_COUNT 1542 * - PS_DEPTH_COUNT_UDW 1543 */ 1544 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1545 RING_FORCE_TO_NONPRIV_ACCESS_RD | 1546 RING_FORCE_TO_NONPRIV_RANGE_4); 1547 1548 /* Wa_1808121037:tgl */ 1549 whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1); 1550 1551 /* Wa_1806527549:tgl */ 1552 whitelist_reg(w, HIZ_CHICKEN); 1553 break; 1554 default: 1555 whitelist_reg_ext(w, 1556 RING_CTX_TIMESTAMP(engine->mmio_base), 1557 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1558 break; 1559 } 1560 } 1561 1562 void intel_engine_init_whitelist(struct intel_engine_cs *engine) 1563 { 1564 struct drm_i915_private *i915 = engine->i915; 1565 struct i915_wa_list *w = &engine->whitelist; 1566 1567 wa_init_start(w, "whitelist", engine->name); 1568 1569 if (IS_GEN(i915, 12)) 1570 tgl_whitelist_build(engine); 1571 else if (IS_GEN(i915, 11)) 1572 icl_whitelist_build(engine); 1573 else if (IS_CANNONLAKE(i915)) 1574 cnl_whitelist_build(engine); 1575 else if (IS_COMETLAKE(i915)) 1576 cml_whitelist_build(engine); 1577 else if (IS_COFFEELAKE(i915)) 1578 cfl_whitelist_build(engine); 1579 else if (IS_GEMINILAKE(i915)) 1580 glk_whitelist_build(engine); 1581 else if (IS_KABYLAKE(i915)) 1582 kbl_whitelist_build(engine); 1583 else if (IS_BROXTON(i915)) 1584 bxt_whitelist_build(engine); 1585 else if (IS_SKYLAKE(i915)) 1586 skl_whitelist_build(engine); 1587 else if (INTEL_GEN(i915) <= 8) 1588 return; 1589 else 1590 MISSING_CASE(INTEL_GEN(i915)); 1591 1592 wa_init_finish(w); 1593 } 1594 1595 void intel_engine_apply_whitelist(struct intel_engine_cs *engine) 1596 { 1597 const struct i915_wa_list *wal = &engine->whitelist; 1598 struct intel_uncore *uncore = engine->uncore; 1599 const u32 base = engine->mmio_base; 1600 struct i915_wa *wa; 1601 unsigned int i; 1602 1603 if (!wal->count) 1604 return; 1605 1606 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1607 intel_uncore_write(uncore, 1608 RING_FORCE_TO_NONPRIV(base, i), 1609 i915_mmio_reg_offset(wa->reg)); 1610 1611 /* And clear the rest just in case of garbage */ 1612 for (; i < RING_MAX_NONPRIV_SLOTS; i++) 1613 intel_uncore_write(uncore, 1614 RING_FORCE_TO_NONPRIV(base, i), 1615 i915_mmio_reg_offset(RING_NOPID(base))); 1616 } 1617 1618 static void 1619 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1620 { 1621 struct drm_i915_private *i915 = engine->i915; 1622 1623 if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { 1624 /* 1625 * Wa_1607138336:tgl 1626 * Wa_1607063988:tgl 1627 */ 1628 wa_write_or(wal, 1629 GEN9_CTX_PREEMPT_REG, 1630 GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); 1631 1632 /* 1633 * Wa_1607030317:tgl 1634 * Wa_1607186500:tgl 1635 * Wa_1607297627:tgl there is 3 entries for this WA on BSpec, 2 1636 * of then says it is fixed on B0 the other one says it is 1637 * permanent 1638 */ 1639 wa_masked_en(wal, 1640 GEN6_RC_SLEEP_PSMI_CONTROL, 1641 GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | 1642 GEN8_RC_SEMA_IDLE_MSG_DISABLE); 1643 1644 /* 1645 * Wa_1606679103:tgl 1646 * (see also Wa_1606682166:icl) 1647 */ 1648 wa_write_or(wal, 1649 GEN7_SARCHKMD, 1650 GEN7_DISABLE_SAMPLER_PREFETCH); 1651 1652 /* Wa_1408615072:tgl */ 1653 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, 1654 VSUNIT_CLKGATE_DIS_TGL); 1655 } 1656 1657 if (IS_TIGERLAKE(i915)) { 1658 /* Wa_1606931601:tgl */ 1659 wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); 1660 1661 /* Wa_1409804808:tgl */ 1662 wa_masked_en(wal, GEN7_ROW_CHICKEN2, 1663 GEN12_PUSH_CONST_DEREF_HOLD_DIS); 1664 1665 /* Wa_1606700617:tgl */ 1666 wa_masked_en(wal, 1667 GEN9_CS_DEBUG_MODE1, 1668 FF_DOP_CLOCK_GATE_DISABLE); 1669 1670 /* 1671 * Wa_1409085225:tgl 1672 * Wa_14010229206:tgl 1673 */ 1674 wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); 1675 1676 /* 1677 * Wa_1407928979:tgl A* 1678 * Wa_18011464164:tgl B0+ 1679 * Wa_22010931296:tgl B0+ 1680 */ 1681 wa_write_or(wal, GEN7_FF_THREAD_MODE, 1682 GEN12_FF_TESSELATION_DOP_GATE_DISABLE); 1683 } 1684 1685 if (IS_GEN(i915, 11)) { 1686 /* This is not an Wa. Enable for better image quality */ 1687 wa_masked_en(wal, 1688 _3D_CHICKEN3, 1689 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); 1690 1691 /* WaPipelineFlushCoherentLines:icl */ 1692 wa_write_or(wal, 1693 GEN8_L3SQCREG4, 1694 GEN8_LQSC_FLUSH_COHERENT_LINES); 1695 1696 /* 1697 * Wa_1405543622:icl 1698 * Formerly known as WaGAPZPriorityScheme 1699 */ 1700 wa_write_or(wal, 1701 GEN8_GARBCNTL, 1702 GEN11_ARBITRATION_PRIO_ORDER_MASK); 1703 1704 /* 1705 * Wa_1604223664:icl 1706 * Formerly known as WaL3BankAddressHashing 1707 */ 1708 wa_write_masked_or(wal, 1709 GEN8_GARBCNTL, 1710 GEN11_HASH_CTRL_EXCL_MASK, 1711 GEN11_HASH_CTRL_EXCL_BIT0); 1712 wa_write_masked_or(wal, 1713 GEN11_GLBLINVL, 1714 GEN11_BANK_HASH_ADDR_EXCL_MASK, 1715 GEN11_BANK_HASH_ADDR_EXCL_BIT0); 1716 1717 /* 1718 * Wa_1405733216:icl 1719 * Formerly known as WaDisableCleanEvicts 1720 */ 1721 wa_write_or(wal, 1722 GEN8_L3SQCREG4, 1723 GEN11_LQSC_CLEAN_EVICT_DISABLE); 1724 1725 /* WaForwardProgressSoftReset:icl */ 1726 wa_write_or(wal, 1727 GEN10_SCRATCH_LNCF2, 1728 PMFLUSHDONE_LNICRSDROP | 1729 PMFLUSH_GAPL3UNBLOCK | 1730 PMFLUSHDONE_LNEBLK); 1731 1732 /* Wa_1406609255:icl (pre-prod) */ 1733 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 1734 wa_write_or(wal, 1735 GEN7_SARCHKMD, 1736 GEN7_DISABLE_DEMAND_PREFETCH); 1737 1738 /* Wa_1606682166:icl */ 1739 wa_write_or(wal, 1740 GEN7_SARCHKMD, 1741 GEN7_DISABLE_SAMPLER_PREFETCH); 1742 1743 /* Wa_1409178092:icl */ 1744 wa_write_masked_or(wal, 1745 GEN11_SCRATCH2, 1746 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, 1747 0); 1748 1749 /* WaEnable32PlaneMode:icl */ 1750 wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, 1751 GEN11_ENABLE_32_PLANE_MODE); 1752 1753 /* 1754 * Wa_1408615072:icl,ehl (vsunit) 1755 * Wa_1407596294:icl,ehl (hsunit) 1756 */ 1757 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, 1758 VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); 1759 1760 /* Wa_1407352427:icl,ehl */ 1761 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, 1762 PSDUNIT_CLKGATE_DIS); 1763 1764 /* Wa_1406680159:icl,ehl */ 1765 wa_write_or(wal, 1766 SUBSLICE_UNIT_LEVEL_CLKGATE, 1767 GWUNIT_CLKGATE_DIS); 1768 1769 /* 1770 * Wa_1408767742:icl[a2..forever],ehl[all] 1771 * Wa_1605460711:icl[a0..c0] 1772 */ 1773 wa_write_or(wal, 1774 GEN7_FF_THREAD_MODE, 1775 GEN12_FF_TESSELATION_DOP_GATE_DISABLE); 1776 1777 /* Wa_22010271021:ehl */ 1778 if (IS_ELKHARTLAKE(i915)) 1779 wa_masked_en(wal, 1780 GEN9_CS_DEBUG_MODE1, 1781 FF_DOP_CLOCK_GATE_DISABLE); 1782 } 1783 1784 if (IS_GEN_RANGE(i915, 9, 12)) { 1785 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */ 1786 wa_masked_en(wal, 1787 GEN7_FF_SLICE_CS_CHICKEN1, 1788 GEN9_FFSC_PERCTX_PREEMPT_CTRL); 1789 } 1790 1791 if (IS_SKYLAKE(i915) || 1792 IS_KABYLAKE(i915) || 1793 IS_COFFEELAKE(i915) || 1794 IS_COMETLAKE(i915)) { 1795 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */ 1796 wa_write_or(wal, 1797 GEN8_GARBCNTL, 1798 GEN9_GAPS_TSV_CREDIT_DISABLE); 1799 } 1800 1801 if (IS_BROXTON(i915)) { 1802 /* WaDisablePooledEuLoadBalancingFix:bxt */ 1803 wa_masked_en(wal, 1804 FF_SLICE_CS_CHICKEN2, 1805 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); 1806 } 1807 1808 if (IS_GEN(i915, 9)) { 1809 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ 1810 wa_masked_en(wal, 1811 GEN9_CSFE_CHICKEN1_RCS, 1812 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); 1813 1814 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ 1815 wa_write_or(wal, 1816 BDW_SCRATCH1, 1817 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 1818 1819 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ 1820 if (IS_GEN9_LP(i915)) 1821 wa_write_masked_or(wal, 1822 GEN8_L3SQCREG1, 1823 L3_PRIO_CREDITS_MASK, 1824 L3_GENERAL_PRIO_CREDITS(62) | 1825 L3_HIGH_PRIO_CREDITS(2)); 1826 1827 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ 1828 wa_write_or(wal, 1829 GEN8_L3SQCREG4, 1830 GEN8_LQSC_FLUSH_COHERENT_LINES); 1831 } 1832 1833 if (IS_GEN(i915, 7)) 1834 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ 1835 wa_masked_en(wal, 1836 GFX_MODE_GEN7, 1837 GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE); 1838 1839 if (IS_GEN_RANGE(i915, 6, 7)) 1840 /* 1841 * We need to disable the AsyncFlip performance optimisations in 1842 * order to use MI_WAIT_FOR_EVENT within the CS. It should 1843 * already be programmed to '1' on all products. 1844 * 1845 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv 1846 */ 1847 wa_masked_en(wal, 1848 MI_MODE, 1849 ASYNC_FLIP_PERF_DISABLE); 1850 1851 if (IS_GEN(i915, 6)) { 1852 /* 1853 * Required for the hardware to program scanline values for 1854 * waiting 1855 * WaEnableFlushTlbInvalidationMode:snb 1856 */ 1857 wa_masked_en(wal, 1858 GFX_MODE, 1859 GFX_TLB_INVALIDATE_EXPLICIT); 1860 1861 /* 1862 * From the Sandybridge PRM, volume 1 part 3, page 24: 1863 * "If this bit is set, STCunit will have LRA as replacement 1864 * policy. [...] This bit must be reset. LRA replacement 1865 * policy is not supported." 1866 */ 1867 wa_masked_dis(wal, 1868 CACHE_MODE_0, 1869 CM0_STC_EVICT_DISABLE_LRA_SNB); 1870 } 1871 1872 if (IS_GEN_RANGE(i915, 4, 6)) 1873 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ 1874 wa_add(wal, MI_MODE, 1875 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH), 1876 /* XXX bit doesn't stick on Broadwater */ 1877 IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH); 1878 1879 if (IS_GEN(i915, 4)) 1880 /* 1881 * Disable CONSTANT_BUFFER before it is loaded from the context 1882 * image. For as it is loaded, it is executed and the stored 1883 * address may no longer be valid, leading to a GPU hang. 1884 * 1885 * This imposes the requirement that userspace reload their 1886 * CONSTANT_BUFFER on every batch, fortunately a requirement 1887 * they are already accustomed to from before contexts were 1888 * enabled. 1889 */ 1890 wa_add(wal, ECOSKPD, 1891 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE), 1892 0 /* XXX bit doesn't stick on Broadwater */); 1893 } 1894 1895 static void 1896 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1897 { 1898 struct drm_i915_private *i915 = engine->i915; 1899 1900 /* WaKBLVECSSemaphoreWaitPoll:kbl */ 1901 if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { 1902 wa_write(wal, 1903 RING_SEMA_WAIT_POLL(engine->mmio_base), 1904 1); 1905 } 1906 } 1907 1908 static void 1909 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1910 { 1911 if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 4)) 1912 return; 1913 1914 if (engine->class == RENDER_CLASS) 1915 rcs_engine_wa_init(engine, wal); 1916 else 1917 xcs_engine_wa_init(engine, wal); 1918 } 1919 1920 void intel_engine_init_workarounds(struct intel_engine_cs *engine) 1921 { 1922 struct i915_wa_list *wal = &engine->wa_list; 1923 1924 if (INTEL_GEN(engine->i915) < 4) 1925 return; 1926 1927 wa_init_start(wal, "engine", engine->name); 1928 engine_init_workarounds(engine, wal); 1929 wa_init_finish(wal); 1930 } 1931 1932 void intel_engine_apply_workarounds(struct intel_engine_cs *engine) 1933 { 1934 wa_list_apply(engine->uncore, &engine->wa_list); 1935 } 1936 1937 static struct i915_vma * 1938 create_scratch(struct i915_address_space *vm, int count) 1939 { 1940 struct drm_i915_gem_object *obj; 1941 struct i915_vma *vma; 1942 unsigned int size; 1943 int err; 1944 1945 size = round_up(count * sizeof(u32), PAGE_SIZE); 1946 obj = i915_gem_object_create_internal(vm->i915, size); 1947 if (IS_ERR(obj)) 1948 return ERR_CAST(obj); 1949 1950 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 1951 1952 vma = i915_vma_instance(obj, vm, NULL); 1953 if (IS_ERR(vma)) { 1954 err = PTR_ERR(vma); 1955 goto err_obj; 1956 } 1957 1958 err = i915_vma_pin(vma, 0, 0, 1959 i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); 1960 if (err) 1961 goto err_obj; 1962 1963 return vma; 1964 1965 err_obj: 1966 i915_gem_object_put(obj); 1967 return ERR_PTR(err); 1968 } 1969 1970 static const struct { 1971 u32 start; 1972 u32 end; 1973 } mcr_ranges_gen8[] = { 1974 { .start = 0x5500, .end = 0x55ff }, 1975 { .start = 0x7000, .end = 0x7fff }, 1976 { .start = 0x9400, .end = 0x97ff }, 1977 { .start = 0xb000, .end = 0xb3ff }, 1978 { .start = 0xe000, .end = 0xe7ff }, 1979 {}, 1980 }; 1981 1982 static bool mcr_range(struct drm_i915_private *i915, u32 offset) 1983 { 1984 int i; 1985 1986 if (INTEL_GEN(i915) < 8) 1987 return false; 1988 1989 /* 1990 * Registers in these ranges are affected by the MCR selector 1991 * which only controls CPU initiated MMIO. Routing does not 1992 * work for CS access so we cannot verify them on this path. 1993 */ 1994 for (i = 0; mcr_ranges_gen8[i].start; i++) 1995 if (offset >= mcr_ranges_gen8[i].start && 1996 offset <= mcr_ranges_gen8[i].end) 1997 return true; 1998 1999 return false; 2000 } 2001 2002 static int 2003 wa_list_srm(struct i915_request *rq, 2004 const struct i915_wa_list *wal, 2005 struct i915_vma *vma) 2006 { 2007 struct drm_i915_private *i915 = rq->engine->i915; 2008 unsigned int i, count = 0; 2009 const struct i915_wa *wa; 2010 u32 srm, *cs; 2011 2012 srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 2013 if (INTEL_GEN(i915) >= 8) 2014 srm++; 2015 2016 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 2017 if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg))) 2018 count++; 2019 } 2020 2021 cs = intel_ring_begin(rq, 4 * count); 2022 if (IS_ERR(cs)) 2023 return PTR_ERR(cs); 2024 2025 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 2026 u32 offset = i915_mmio_reg_offset(wa->reg); 2027 2028 if (mcr_range(i915, offset)) 2029 continue; 2030 2031 *cs++ = srm; 2032 *cs++ = offset; 2033 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; 2034 *cs++ = 0; 2035 } 2036 intel_ring_advance(rq, cs); 2037 2038 return 0; 2039 } 2040 2041 static int engine_wa_list_verify(struct intel_context *ce, 2042 const struct i915_wa_list * const wal, 2043 const char *from) 2044 { 2045 const struct i915_wa *wa; 2046 struct i915_request *rq; 2047 struct i915_vma *vma; 2048 unsigned int i; 2049 u32 *results; 2050 int err; 2051 2052 if (!wal->count) 2053 return 0; 2054 2055 vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count); 2056 if (IS_ERR(vma)) 2057 return PTR_ERR(vma); 2058 2059 intel_engine_pm_get(ce->engine); 2060 rq = intel_context_create_request(ce); 2061 intel_engine_pm_put(ce->engine); 2062 if (IS_ERR(rq)) { 2063 err = PTR_ERR(rq); 2064 goto err_vma; 2065 } 2066 2067 i915_vma_lock(vma); 2068 err = i915_request_await_object(rq, vma->obj, true); 2069 if (err == 0) 2070 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 2071 i915_vma_unlock(vma); 2072 if (err) { 2073 i915_request_add(rq); 2074 goto err_vma; 2075 } 2076 2077 err = wa_list_srm(rq, wal, vma); 2078 if (err) 2079 goto err_vma; 2080 2081 i915_request_get(rq); 2082 i915_request_add(rq); 2083 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2084 err = -ETIME; 2085 goto err_rq; 2086 } 2087 2088 results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); 2089 if (IS_ERR(results)) { 2090 err = PTR_ERR(results); 2091 goto err_rq; 2092 } 2093 2094 err = 0; 2095 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 2096 if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg))) 2097 continue; 2098 2099 if (!wa_verify(wa, results[i], wal->name, from)) 2100 err = -ENXIO; 2101 } 2102 2103 i915_gem_object_unpin_map(vma->obj); 2104 2105 err_rq: 2106 i915_request_put(rq); 2107 err_vma: 2108 i915_vma_unpin(vma); 2109 i915_vma_put(vma); 2110 return err; 2111 } 2112 2113 int intel_engine_verify_workarounds(struct intel_engine_cs *engine, 2114 const char *from) 2115 { 2116 return engine_wa_list_verify(engine->kernel_context, 2117 &engine->wa_list, 2118 from); 2119 } 2120 2121 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 2122 #include "selftest_workarounds.c" 2123 #endif 2124