1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2018 Intel Corporation 5 */ 6 7 #include "i915_drv.h" 8 #include "intel_context.h" 9 #include "intel_workarounds.h" 10 11 /** 12 * DOC: Hardware workarounds 13 * 14 * This file is intended as a central place to implement most [1]_ of the 15 * required workarounds for hardware to work as originally intended. They fall 16 * in five basic categories depending on how/when they are applied: 17 * 18 * - Workarounds that touch registers that are saved/restored to/from the HW 19 * context image. The list is emitted (via Load Register Immediate commands) 20 * everytime a new context is created. 21 * - GT workarounds. The list of these WAs is applied whenever these registers 22 * revert to default values (on GPU reset, suspend/resume [2]_, etc..). 23 * - Display workarounds. The list is applied during display clock-gating 24 * initialization. 25 * - Workarounds that whitelist a privileged register, so that UMDs can manage 26 * them directly. This is just a special case of a MMMIO workaround (as we 27 * write the list of these to/be-whitelisted registers to some special HW 28 * registers). 29 * - Workaround batchbuffers, that get executed automatically by the hardware 30 * on every HW context restore. 31 * 32 * .. [1] Please notice that there are other WAs that, due to their nature, 33 * cannot be applied from a central place. Those are peppered around the rest 34 * of the code, as needed. 35 * 36 * .. [2] Technically, some registers are powercontext saved & restored, so they 37 * survive a suspend/resume. In practice, writing them again is not too 38 * costly and simplifies things. We can revisit this in the future. 39 * 40 * Layout 41 * ~~~~~~ 42 * 43 * Keep things in this file ordered by WA type, as per the above (context, GT, 44 * display, register whitelist, batchbuffer). Then, inside each type, keep the 45 * following order: 46 * 47 * - Infrastructure functions and macros 48 * - WAs per platform in standard gen/chrono order 49 * - Public functions to init or apply the given workaround type. 50 */ 51 52 static void wa_init_start(struct i915_wa_list *wal, const char *name) 53 { 54 wal->name = name; 55 } 56 57 #define WA_LIST_CHUNK (1 << 4) 58 59 static void wa_init_finish(struct i915_wa_list *wal) 60 { 61 /* Trim unused entries. */ 62 if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) { 63 struct i915_wa *list = kmemdup(wal->list, 64 wal->count * sizeof(*list), 65 GFP_KERNEL); 66 67 if (list) { 68 kfree(wal->list); 69 wal->list = list; 70 } 71 } 72 73 if (!wal->count) 74 return; 75 76 DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n", 77 wal->wa_count, wal->name); 78 } 79 80 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) 81 { 82 unsigned int addr = i915_mmio_reg_offset(wa->reg); 83 unsigned int start = 0, end = wal->count; 84 const unsigned int grow = WA_LIST_CHUNK; 85 struct i915_wa *wa_; 86 87 GEM_BUG_ON(!is_power_of_2(grow)); 88 89 if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ 90 struct i915_wa *list; 91 92 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), 93 GFP_KERNEL); 94 if (!list) { 95 DRM_ERROR("No space for workaround init!\n"); 96 return; 97 } 98 99 if (wal->list) 100 memcpy(list, wal->list, sizeof(*wa) * wal->count); 101 102 wal->list = list; 103 } 104 105 while (start < end) { 106 unsigned int mid = start + (end - start) / 2; 107 108 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) { 109 start = mid + 1; 110 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) { 111 end = mid; 112 } else { 113 wa_ = &wal->list[mid]; 114 115 if ((wa->mask & ~wa_->mask) == 0) { 116 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n", 117 i915_mmio_reg_offset(wa_->reg), 118 wa_->mask, wa_->val); 119 120 wa_->val &= ~wa->mask; 121 } 122 123 wal->wa_count++; 124 wa_->val |= wa->val; 125 wa_->mask |= wa->mask; 126 wa_->read |= wa->read; 127 return; 128 } 129 } 130 131 wal->wa_count++; 132 wa_ = &wal->list[wal->count++]; 133 *wa_ = *wa; 134 135 while (wa_-- > wal->list) { 136 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) == 137 i915_mmio_reg_offset(wa_[1].reg)); 138 if (i915_mmio_reg_offset(wa_[1].reg) > 139 i915_mmio_reg_offset(wa_[0].reg)) 140 break; 141 142 swap(wa_[1], wa_[0]); 143 } 144 } 145 146 static void 147 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, 148 u32 val) 149 { 150 struct i915_wa wa = { 151 .reg = reg, 152 .mask = mask, 153 .val = val, 154 .read = mask, 155 }; 156 157 _wa_add(wal, &wa); 158 } 159 160 static void 161 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 162 { 163 wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); 164 } 165 166 static void 167 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 168 { 169 wa_write_masked_or(wal, reg, ~0, val); 170 } 171 172 static void 173 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 174 { 175 wa_write_masked_or(wal, reg, val, val); 176 } 177 178 static void 179 ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) 180 { 181 struct i915_wa wa = { 182 .reg = reg, 183 .mask = mask, 184 .val = val, 185 /* Bonkers HW, skip verifying */ 186 }; 187 188 _wa_add(wal, &wa); 189 } 190 191 #define WA_SET_BIT_MASKED(addr, mask) \ 192 wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) 193 194 #define WA_CLR_BIT_MASKED(addr, mask) \ 195 wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask)) 196 197 #define WA_SET_FIELD_MASKED(addr, mask, value) \ 198 wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) 199 200 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, 201 struct i915_wa_list *wal) 202 { 203 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 204 205 /* WaDisableAsyncFlipPerfMode:bdw,chv */ 206 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 207 208 /* WaDisablePartialInstShootdown:bdw,chv */ 209 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 210 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 211 212 /* Use Force Non-Coherent whenever executing a 3D context. This is a 213 * workaround for for a possible hang in the unlikely event a TLB 214 * invalidation occurs during a PSD flush. 215 */ 216 /* WaForceEnableNonCoherent:bdw,chv */ 217 /* WaHdcDisableFetchWhenMasked:bdw,chv */ 218 WA_SET_BIT_MASKED(HDC_CHICKEN0, 219 HDC_DONOT_FETCH_MEM_WHEN_MASKED | 220 HDC_FORCE_NON_COHERENT); 221 222 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 223 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 224 * polygons in the same 8x4 pixel/sample area to be processed without 225 * stalling waiting for the earlier ones to write to Hierarchical Z 226 * buffer." 227 * 228 * This optimization is off by default for BDW and CHV; turn it on. 229 */ 230 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 231 232 /* Wa4x4STCOptimizationDisable:bdw,chv */ 233 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); 234 235 /* 236 * BSpec recommends 8x4 when MSAA is used, 237 * however in practice 16x4 seems fastest. 238 * 239 * Note that PS/WM thread counts depend on the WIZ hashing 240 * disable bit, which we don't touch here, but it's good 241 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 242 */ 243 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 244 GEN6_WIZ_HASHING_MASK, 245 GEN6_WIZ_HASHING_16x4); 246 } 247 248 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, 249 struct i915_wa_list *wal) 250 { 251 struct drm_i915_private *i915 = engine->i915; 252 253 gen8_ctx_workarounds_init(engine, wal); 254 255 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 256 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 257 258 /* WaDisableDopClockGating:bdw 259 * 260 * Also see the related UCGTCL1 write in broadwell_init_clock_gating() 261 * to disable EUTC clock gating. 262 */ 263 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 264 DOP_CLOCK_GATING_DISABLE); 265 266 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 267 GEN8_SAMPLER_POWER_BYPASS_DIS); 268 269 WA_SET_BIT_MASKED(HDC_CHICKEN0, 270 /* WaForceContextSaveRestoreNonCoherent:bdw */ 271 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 272 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 273 (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); 274 } 275 276 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, 277 struct i915_wa_list *wal) 278 { 279 gen8_ctx_workarounds_init(engine, wal); 280 281 /* WaDisableThreadStallDopClockGating:chv */ 282 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 283 284 /* Improve HiZ throughput on CHV. */ 285 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 286 } 287 288 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, 289 struct i915_wa_list *wal) 290 { 291 struct drm_i915_private *i915 = engine->i915; 292 293 if (HAS_LLC(i915)) { 294 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 295 * 296 * Must match Display Engine. See 297 * WaCompressedResourceDisplayNewHashMode. 298 */ 299 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 300 GEN9_PBE_COMPRESSED_HASH_SELECTION); 301 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 302 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); 303 } 304 305 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ 306 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ 307 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 308 FLOW_CONTROL_ENABLE | 309 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 310 311 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ 312 if (!IS_COFFEELAKE(i915)) 313 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 314 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); 315 316 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ 317 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ 318 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 319 GEN9_ENABLE_YV12_BUGFIX | 320 GEN9_ENABLE_GPGPU_PREEMPTION); 321 322 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ 323 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ 324 WA_SET_BIT_MASKED(CACHE_MODE_1, 325 GEN8_4x4_STC_OPTIMIZATION_DISABLE | 326 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); 327 328 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ 329 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 330 GEN9_CCS_TLB_PREFETCH_ENABLE); 331 332 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ 333 WA_SET_BIT_MASKED(HDC_CHICKEN0, 334 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 335 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); 336 337 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are 338 * both tied to WaForceContextSaveRestoreNonCoherent 339 * in some hsds for skl. We keep the tie for all gen9. The 340 * documentation is a bit hazy and so we want to get common behaviour, 341 * even though there is no clear evidence we would need both on kbl/bxt. 342 * This area has been source of system hangs so we play it safe 343 * and mimic the skl regardless of what bspec says. 344 * 345 * Use Force Non-Coherent whenever executing a 3D context. This 346 * is a workaround for a possible hang in the unlikely event 347 * a TLB invalidation occurs during a PSD flush. 348 */ 349 350 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ 351 WA_SET_BIT_MASKED(HDC_CHICKEN0, 352 HDC_FORCE_NON_COHERENT); 353 354 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ 355 if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) 356 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 357 GEN8_SAMPLER_POWER_BYPASS_DIS); 358 359 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ 360 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); 361 362 /* 363 * Supporting preemption with fine-granularity requires changes in the 364 * batch buffer programming. Since we can't break old userspace, we 365 * need to set our default preemption level to safe value. Userspace is 366 * still able to use more fine-grained preemption levels, since in 367 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the 368 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are 369 * not real HW workarounds, but merely a way to start using preemption 370 * while maintaining old contract with userspace. 371 */ 372 373 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ 374 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 375 376 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ 377 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 378 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 379 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 380 381 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ 382 if (IS_GEN9_LP(i915)) 383 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); 384 } 385 386 static void skl_tune_iz_hashing(struct intel_engine_cs *engine, 387 struct i915_wa_list *wal) 388 { 389 struct drm_i915_private *i915 = engine->i915; 390 u8 vals[3] = { 0, 0, 0 }; 391 unsigned int i; 392 393 for (i = 0; i < 3; i++) { 394 u8 ss; 395 396 /* 397 * Only consider slices where one, and only one, subslice has 7 398 * EUs 399 */ 400 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i])) 401 continue; 402 403 /* 404 * subslice_7eu[i] != 0 (because of the check above) and 405 * ss_max == 4 (maximum number of subslices possible per slice) 406 * 407 * -> 0 <= ss <= 3; 408 */ 409 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1; 410 vals[i] = 3 - ss; 411 } 412 413 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) 414 return; 415 416 /* Tune IZ hashing. See intel_device_info_runtime_init() */ 417 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 418 GEN9_IZ_HASHING_MASK(2) | 419 GEN9_IZ_HASHING_MASK(1) | 420 GEN9_IZ_HASHING_MASK(0), 421 GEN9_IZ_HASHING(2, vals[2]) | 422 GEN9_IZ_HASHING(1, vals[1]) | 423 GEN9_IZ_HASHING(0, vals[0])); 424 } 425 426 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine, 427 struct i915_wa_list *wal) 428 { 429 gen9_ctx_workarounds_init(engine, wal); 430 skl_tune_iz_hashing(engine, wal); 431 } 432 433 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine, 434 struct i915_wa_list *wal) 435 { 436 gen9_ctx_workarounds_init(engine, wal); 437 438 /* WaDisableThreadStallDopClockGating:bxt */ 439 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 440 STALL_DOP_GATING_DISABLE); 441 442 /* WaToEnableHwFixForPushConstHWBug:bxt */ 443 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 444 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 445 } 446 447 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, 448 struct i915_wa_list *wal) 449 { 450 struct drm_i915_private *i915 = engine->i915; 451 452 gen9_ctx_workarounds_init(engine, wal); 453 454 /* WaToEnableHwFixForPushConstHWBug:kbl */ 455 if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) 456 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 457 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 458 459 /* WaDisableSbeCacheDispatchPortSharing:kbl */ 460 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, 461 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 462 } 463 464 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, 465 struct i915_wa_list *wal) 466 { 467 gen9_ctx_workarounds_init(engine, wal); 468 469 /* WaToEnableHwFixForPushConstHWBug:glk */ 470 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 471 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 472 } 473 474 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, 475 struct i915_wa_list *wal) 476 { 477 gen9_ctx_workarounds_init(engine, wal); 478 479 /* WaToEnableHwFixForPushConstHWBug:cfl */ 480 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 481 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 482 483 /* WaDisableSbeCacheDispatchPortSharing:cfl */ 484 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, 485 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 486 } 487 488 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, 489 struct i915_wa_list *wal) 490 { 491 struct drm_i915_private *i915 = engine->i915; 492 493 /* WaForceContextSaveRestoreNonCoherent:cnl */ 494 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, 495 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); 496 497 /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ 498 if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) 499 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); 500 501 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ 502 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 503 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 504 505 /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ 506 if (IS_CNL_REVID(i915, 0, CNL_REVID_B0)) 507 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 508 GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); 509 510 /* WaPushConstantDereferenceHoldDisable:cnl */ 511 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); 512 513 /* FtrEnableFastAnisoL1BankingFix:cnl */ 514 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); 515 516 /* WaDisable3DMidCmdPreemption:cnl */ 517 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 518 519 /* WaDisableGPGPUMidCmdPreemption:cnl */ 520 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 521 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 522 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 523 524 /* WaDisableEarlyEOT:cnl */ 525 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); 526 } 527 528 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, 529 struct i915_wa_list *wal) 530 { 531 struct drm_i915_private *i915 = engine->i915; 532 533 /* WaDisableBankHangMode:icl */ 534 wa_write(wal, 535 GEN8_L3CNTLREG, 536 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | 537 GEN8_ERRDETBCTRL); 538 539 /* WaDisableBankHangMode:icl */ 540 wa_write(wal, 541 GEN8_L3CNTLREG, 542 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | 543 GEN8_ERRDETBCTRL); 544 545 /* Wa_1604370585:icl (pre-prod) 546 * Formerly known as WaPushConstantDereferenceHoldDisable 547 */ 548 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 549 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 550 PUSH_CONSTANT_DEREF_DISABLE); 551 552 /* WaForceEnableNonCoherent:icl 553 * This is not the same workaround as in early Gen9 platforms, where 554 * lacking this could cause system hangs, but coherency performance 555 * overhead is high and only a few compute workloads really need it 556 * (the register is whitelisted in hardware now, so UMDs can opt in 557 * for coherency if they have a good reason). 558 */ 559 WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); 560 561 /* Wa_2006611047:icl (pre-prod) 562 * Formerly known as WaDisableImprovedTdlClkGating 563 */ 564 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 565 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 566 GEN11_TDL_CLOCK_GATING_FIX_DISABLE); 567 568 /* Wa_2006665173:icl (pre-prod) */ 569 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 570 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, 571 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); 572 573 /* WaEnableFloatBlendOptimization:icl */ 574 wa_write_masked_or(wal, 575 GEN10_CACHE_MODE_SS, 576 0, /* write-only, so skip validation */ 577 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); 578 579 /* WaDisableGPGPUMidThreadPreemption:icl */ 580 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 581 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 582 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); 583 584 /* allow headerless messages for preemptible GPGPU context */ 585 WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE, 586 GEN11_SAMPLER_ENABLE_HEADLESS_MSG); 587 } 588 589 static void 590 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, 591 struct i915_wa_list *wal, 592 const char *name) 593 { 594 struct drm_i915_private *i915 = engine->i915; 595 596 if (engine->class != RENDER_CLASS) 597 return; 598 599 wa_init_start(wal, name); 600 601 if (IS_GEN(i915, 11)) 602 icl_ctx_workarounds_init(engine, wal); 603 else if (IS_CANNONLAKE(i915)) 604 cnl_ctx_workarounds_init(engine, wal); 605 else if (IS_COFFEELAKE(i915)) 606 cfl_ctx_workarounds_init(engine, wal); 607 else if (IS_GEMINILAKE(i915)) 608 glk_ctx_workarounds_init(engine, wal); 609 else if (IS_KABYLAKE(i915)) 610 kbl_ctx_workarounds_init(engine, wal); 611 else if (IS_BROXTON(i915)) 612 bxt_ctx_workarounds_init(engine, wal); 613 else if (IS_SKYLAKE(i915)) 614 skl_ctx_workarounds_init(engine, wal); 615 else if (IS_CHERRYVIEW(i915)) 616 chv_ctx_workarounds_init(engine, wal); 617 else if (IS_BROADWELL(i915)) 618 bdw_ctx_workarounds_init(engine, wal); 619 else if (INTEL_GEN(i915) < 8) 620 return; 621 else 622 MISSING_CASE(INTEL_GEN(i915)); 623 624 wa_init_finish(wal); 625 } 626 627 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) 628 { 629 __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context"); 630 } 631 632 int intel_engine_emit_ctx_wa(struct i915_request *rq) 633 { 634 struct i915_wa_list *wal = &rq->engine->ctx_wa_list; 635 struct i915_wa *wa; 636 unsigned int i; 637 u32 *cs; 638 int ret; 639 640 if (wal->count == 0) 641 return 0; 642 643 ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 644 if (ret) 645 return ret; 646 647 cs = intel_ring_begin(rq, (wal->count * 2 + 2)); 648 if (IS_ERR(cs)) 649 return PTR_ERR(cs); 650 651 *cs++ = MI_LOAD_REGISTER_IMM(wal->count); 652 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 653 *cs++ = i915_mmio_reg_offset(wa->reg); 654 *cs++ = wa->val; 655 } 656 *cs++ = MI_NOOP; 657 658 intel_ring_advance(rq, cs); 659 660 ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 661 if (ret) 662 return ret; 663 664 return 0; 665 } 666 667 static void 668 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 669 { 670 /* WaDisableKillLogic:bxt,skl,kbl */ 671 if (!IS_COFFEELAKE(i915)) 672 wa_write_or(wal, 673 GAM_ECOCHK, 674 ECOCHK_DIS_TLB); 675 676 if (HAS_LLC(i915)) { 677 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 678 * 679 * Must match Display Engine. See 680 * WaCompressedResourceDisplayNewHashMode. 681 */ 682 wa_write_or(wal, 683 MMCD_MISC_CTRL, 684 MMCD_PCLA | MMCD_HOTSPOT_EN); 685 } 686 687 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ 688 wa_write_or(wal, 689 GAM_ECOCHK, 690 BDW_DISABLE_HDC_INVALIDATION); 691 } 692 693 static void 694 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 695 { 696 gen9_gt_workarounds_init(i915, wal); 697 698 /* WaDisableGafsUnitClkGating:skl */ 699 wa_write_or(wal, 700 GEN7_UCGCTL4, 701 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 702 703 /* WaInPlaceDecompressionHang:skl */ 704 if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) 705 wa_write_or(wal, 706 GEN9_GAMT_ECO_REG_RW_IA, 707 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 708 } 709 710 static void 711 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 712 { 713 gen9_gt_workarounds_init(i915, wal); 714 715 /* WaInPlaceDecompressionHang:bxt */ 716 wa_write_or(wal, 717 GEN9_GAMT_ECO_REG_RW_IA, 718 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 719 } 720 721 static void 722 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 723 { 724 gen9_gt_workarounds_init(i915, wal); 725 726 /* WaDisableDynamicCreditSharing:kbl */ 727 if (IS_KBL_REVID(i915, 0, KBL_REVID_B0)) 728 wa_write_or(wal, 729 GAMT_CHKN_BIT_REG, 730 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); 731 732 /* WaDisableGafsUnitClkGating:kbl */ 733 wa_write_or(wal, 734 GEN7_UCGCTL4, 735 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 736 737 /* WaInPlaceDecompressionHang:kbl */ 738 wa_write_or(wal, 739 GEN9_GAMT_ECO_REG_RW_IA, 740 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 741 } 742 743 static void 744 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 745 { 746 gen9_gt_workarounds_init(i915, wal); 747 } 748 749 static void 750 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 751 { 752 gen9_gt_workarounds_init(i915, wal); 753 754 /* WaDisableGafsUnitClkGating:cfl */ 755 wa_write_or(wal, 756 GEN7_UCGCTL4, 757 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 758 759 /* WaInPlaceDecompressionHang:cfl */ 760 wa_write_or(wal, 761 GEN9_GAMT_ECO_REG_RW_IA, 762 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 763 } 764 765 static void 766 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) 767 { 768 const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; 769 u32 mcr_slice_subslice_mask; 770 771 /* 772 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl 773 * L3Banks could be fused off in single slice scenario. If that is 774 * the case, we might need to program MCR select to a valid L3Bank 775 * by default, to make sure we correctly read certain registers 776 * later on (in the range 0xB100 - 0xB3FF). 777 * This might be incompatible with 778 * WaProgramMgsrForCorrectSliceSpecificMmioReads. 779 * Fortunately, this should not happen in production hardware, so 780 * we only assert that this is the case (instead of implementing 781 * something more complex that requires checking the range of every 782 * MMIO read). 783 */ 784 if (INTEL_GEN(i915) >= 10 && 785 is_power_of_2(sseu->slice_mask)) { 786 /* 787 * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches 788 * enabled subslice, no need to redirect MCR packet 789 */ 790 u32 slice = fls(sseu->slice_mask); 791 u32 fuse3 = 792 intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3); 793 u8 ss_mask = sseu->subslice_mask[slice]; 794 795 u8 enabled_mask = (ss_mask | ss_mask >> 796 GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK; 797 u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK; 798 799 /* 800 * Production silicon should have matched L3Bank and 801 * subslice enabled 802 */ 803 WARN_ON((enabled_mask & disabled_mask) != enabled_mask); 804 } 805 806 if (INTEL_GEN(i915) >= 11) 807 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | 808 GEN11_MCR_SUBSLICE_MASK; 809 else 810 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | 811 GEN8_MCR_SUBSLICE_MASK; 812 /* 813 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl 814 * Before any MMIO read into slice/subslice specific registers, MCR 815 * packet control register needs to be programmed to point to any 816 * enabled s/ss pair. Otherwise, incorrect values will be returned. 817 * This means each subsequent MMIO read will be forwarded to an 818 * specific s/ss combination, but this is OK since these registers 819 * are consistent across s/ss in almost all cases. In the rare 820 * occasions, such as INSTDONE, where this value is dependent 821 * on s/ss combo, the read should be done with read_subslice_reg. 822 */ 823 wa_write_masked_or(wal, 824 GEN8_MCR_SELECTOR, 825 mcr_slice_subslice_mask, 826 intel_calculate_mcr_s_ss_select(i915)); 827 } 828 829 static void 830 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 831 { 832 wa_init_mcr(i915, wal); 833 834 /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ 835 if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) 836 wa_write_or(wal, 837 GAMT_CHKN_BIT_REG, 838 GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); 839 840 /* WaInPlaceDecompressionHang:cnl */ 841 wa_write_or(wal, 842 GEN9_GAMT_ECO_REG_RW_IA, 843 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 844 } 845 846 static void 847 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 848 { 849 wa_init_mcr(i915, wal); 850 851 /* WaInPlaceDecompressionHang:icl */ 852 wa_write_or(wal, 853 GEN9_GAMT_ECO_REG_RW_IA, 854 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 855 856 /* WaModifyGamTlbPartitioning:icl */ 857 wa_write_masked_or(wal, 858 GEN11_GACB_PERF_CTRL, 859 GEN11_HASH_CTRL_MASK, 860 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); 861 862 /* Wa_1405766107:icl 863 * Formerly known as WaCL2SFHalfMaxAlloc 864 */ 865 wa_write_or(wal, 866 GEN11_LSN_UNSLCVC, 867 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | 868 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); 869 870 /* Wa_220166154:icl 871 * Formerly known as WaDisCtxReload 872 */ 873 wa_write_or(wal, 874 GEN8_GAMW_ECO_DEV_RW_IA, 875 GAMW_ECO_DEV_CTX_RELOAD_DISABLE); 876 877 /* Wa_1405779004:icl (pre-prod) */ 878 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 879 wa_write_or(wal, 880 SLICE_UNIT_LEVEL_CLKGATE, 881 MSCUNIT_CLKGATE_DIS); 882 883 /* Wa_1406680159:icl */ 884 wa_write_or(wal, 885 SUBSLICE_UNIT_LEVEL_CLKGATE, 886 GWUNIT_CLKGATE_DIS); 887 888 /* Wa_1406838659:icl (pre-prod) */ 889 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 890 wa_write_or(wal, 891 INF_UNIT_LEVEL_CLKGATE, 892 CGPSF_CLKGATE_DIS); 893 894 /* Wa_1406463099:icl 895 * Formerly known as WaGamTlbPendError 896 */ 897 wa_write_or(wal, 898 GAMT_CHKN_BIT_REG, 899 GAMT_CHKN_DISABLE_L3_COH_PIPE); 900 } 901 902 static void 903 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) 904 { 905 if (IS_GEN(i915, 11)) 906 icl_gt_workarounds_init(i915, wal); 907 else if (IS_CANNONLAKE(i915)) 908 cnl_gt_workarounds_init(i915, wal); 909 else if (IS_COFFEELAKE(i915)) 910 cfl_gt_workarounds_init(i915, wal); 911 else if (IS_GEMINILAKE(i915)) 912 glk_gt_workarounds_init(i915, wal); 913 else if (IS_KABYLAKE(i915)) 914 kbl_gt_workarounds_init(i915, wal); 915 else if (IS_BROXTON(i915)) 916 bxt_gt_workarounds_init(i915, wal); 917 else if (IS_SKYLAKE(i915)) 918 skl_gt_workarounds_init(i915, wal); 919 else if (INTEL_GEN(i915) <= 8) 920 return; 921 else 922 MISSING_CASE(INTEL_GEN(i915)); 923 } 924 925 void intel_gt_init_workarounds(struct drm_i915_private *i915) 926 { 927 struct i915_wa_list *wal = &i915->gt_wa_list; 928 929 wa_init_start(wal, "GT"); 930 gt_init_workarounds(i915, wal); 931 wa_init_finish(wal); 932 } 933 934 static enum forcewake_domains 935 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) 936 { 937 enum forcewake_domains fw = 0; 938 struct i915_wa *wa; 939 unsigned int i; 940 941 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 942 fw |= intel_uncore_forcewake_for_reg(uncore, 943 wa->reg, 944 FW_REG_READ | 945 FW_REG_WRITE); 946 947 return fw; 948 } 949 950 static bool 951 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) 952 { 953 if ((cur ^ wa->val) & wa->read) { 954 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", 955 name, from, i915_mmio_reg_offset(wa->reg), 956 cur, cur & wa->read, 957 wa->val, wa->mask); 958 959 return false; 960 } 961 962 return true; 963 } 964 965 static void 966 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) 967 { 968 enum forcewake_domains fw; 969 unsigned long flags; 970 struct i915_wa *wa; 971 unsigned int i; 972 973 if (!wal->count) 974 return; 975 976 fw = wal_get_fw_for_rmw(uncore, wal); 977 978 spin_lock_irqsave(&uncore->lock, flags); 979 intel_uncore_forcewake_get__locked(uncore, fw); 980 981 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 982 intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); 983 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 984 wa_verify(wa, 985 intel_uncore_read_fw(uncore, wa->reg), 986 wal->name, "application"); 987 } 988 989 intel_uncore_forcewake_put__locked(uncore, fw); 990 spin_unlock_irqrestore(&uncore->lock, flags); 991 } 992 993 void intel_gt_apply_workarounds(struct drm_i915_private *i915) 994 { 995 wa_list_apply(&i915->uncore, &i915->gt_wa_list); 996 } 997 998 static bool wa_list_verify(struct intel_uncore *uncore, 999 const struct i915_wa_list *wal, 1000 const char *from) 1001 { 1002 struct i915_wa *wa; 1003 unsigned int i; 1004 bool ok = true; 1005 1006 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1007 ok &= wa_verify(wa, 1008 intel_uncore_read(uncore, wa->reg), 1009 wal->name, from); 1010 1011 return ok; 1012 } 1013 1014 bool intel_gt_verify_workarounds(struct drm_i915_private *i915, 1015 const char *from) 1016 { 1017 return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from); 1018 } 1019 1020 static void 1021 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) 1022 { 1023 struct i915_wa wa = { 1024 .reg = reg 1025 }; 1026 1027 if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) 1028 return; 1029 1030 wa.reg.reg |= flags; 1031 _wa_add(wal, &wa); 1032 } 1033 1034 static void 1035 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) 1036 { 1037 whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW); 1038 } 1039 1040 static void gen9_whitelist_build(struct i915_wa_list *w) 1041 { 1042 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ 1043 whitelist_reg(w, GEN9_CTX_PREEMPT_REG); 1044 1045 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ 1046 whitelist_reg(w, GEN8_CS_CHICKEN1); 1047 1048 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ 1049 whitelist_reg(w, GEN8_HDC_CHICKEN1); 1050 } 1051 1052 static void skl_whitelist_build(struct intel_engine_cs *engine) 1053 { 1054 struct i915_wa_list *w = &engine->whitelist; 1055 1056 if (engine->class != RENDER_CLASS) 1057 return; 1058 1059 gen9_whitelist_build(w); 1060 1061 /* WaDisableLSQCROPERFforOCL:skl */ 1062 whitelist_reg(w, GEN8_L3SQCREG4); 1063 } 1064 1065 static void bxt_whitelist_build(struct intel_engine_cs *engine) 1066 { 1067 if (engine->class != RENDER_CLASS) 1068 return; 1069 1070 gen9_whitelist_build(&engine->whitelist); 1071 } 1072 1073 static void kbl_whitelist_build(struct intel_engine_cs *engine) 1074 { 1075 struct i915_wa_list *w = &engine->whitelist; 1076 1077 if (engine->class != RENDER_CLASS) 1078 return; 1079 1080 gen9_whitelist_build(w); 1081 1082 /* WaDisableLSQCROPERFforOCL:kbl */ 1083 whitelist_reg(w, GEN8_L3SQCREG4); 1084 } 1085 1086 static void glk_whitelist_build(struct intel_engine_cs *engine) 1087 { 1088 struct i915_wa_list *w = &engine->whitelist; 1089 1090 if (engine->class != RENDER_CLASS) 1091 return; 1092 1093 gen9_whitelist_build(w); 1094 1095 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ 1096 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1097 } 1098 1099 static void cfl_whitelist_build(struct intel_engine_cs *engine) 1100 { 1101 struct i915_wa_list *w = &engine->whitelist; 1102 1103 if (engine->class != RENDER_CLASS) 1104 return; 1105 1106 gen9_whitelist_build(w); 1107 1108 /* 1109 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml 1110 * 1111 * This covers 4 register which are next to one another : 1112 * - PS_INVOCATION_COUNT 1113 * - PS_INVOCATION_COUNT_UDW 1114 * - PS_DEPTH_COUNT 1115 * - PS_DEPTH_COUNT_UDW 1116 */ 1117 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1118 RING_FORCE_TO_NONPRIV_RD | 1119 RING_FORCE_TO_NONPRIV_RANGE_4); 1120 } 1121 1122 static void cnl_whitelist_build(struct intel_engine_cs *engine) 1123 { 1124 struct i915_wa_list *w = &engine->whitelist; 1125 1126 if (engine->class != RENDER_CLASS) 1127 return; 1128 1129 /* WaEnablePreemptionGranularityControlByUMD:cnl */ 1130 whitelist_reg(w, GEN8_CS_CHICKEN1); 1131 } 1132 1133 static void icl_whitelist_build(struct intel_engine_cs *engine) 1134 { 1135 struct i915_wa_list *w = &engine->whitelist; 1136 1137 switch (engine->class) { 1138 case RENDER_CLASS: 1139 /* WaAllowUMDToModifyHalfSliceChicken7:icl */ 1140 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); 1141 1142 /* WaAllowUMDToModifySamplerMode:icl */ 1143 whitelist_reg(w, GEN10_SAMPLER_MODE); 1144 1145 /* WaEnableStateCacheRedirectToCS:icl */ 1146 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1147 1148 /* 1149 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl 1150 * 1151 * This covers 4 register which are next to one another : 1152 * - PS_INVOCATION_COUNT 1153 * - PS_INVOCATION_COUNT_UDW 1154 * - PS_DEPTH_COUNT 1155 * - PS_DEPTH_COUNT_UDW 1156 */ 1157 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1158 RING_FORCE_TO_NONPRIV_RD | 1159 RING_FORCE_TO_NONPRIV_RANGE_4); 1160 break; 1161 1162 case VIDEO_DECODE_CLASS: 1163 /* hucStatusRegOffset */ 1164 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base), 1165 RING_FORCE_TO_NONPRIV_RD); 1166 /* hucUKernelHdrInfoRegOffset */ 1167 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base), 1168 RING_FORCE_TO_NONPRIV_RD); 1169 /* hucStatus2RegOffset */ 1170 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), 1171 RING_FORCE_TO_NONPRIV_RD); 1172 break; 1173 1174 default: 1175 break; 1176 } 1177 } 1178 1179 void intel_engine_init_whitelist(struct intel_engine_cs *engine) 1180 { 1181 struct drm_i915_private *i915 = engine->i915; 1182 struct i915_wa_list *w = &engine->whitelist; 1183 1184 wa_init_start(w, "whitelist"); 1185 1186 if (IS_GEN(i915, 11)) 1187 icl_whitelist_build(engine); 1188 else if (IS_CANNONLAKE(i915)) 1189 cnl_whitelist_build(engine); 1190 else if (IS_COFFEELAKE(i915)) 1191 cfl_whitelist_build(engine); 1192 else if (IS_GEMINILAKE(i915)) 1193 glk_whitelist_build(engine); 1194 else if (IS_KABYLAKE(i915)) 1195 kbl_whitelist_build(engine); 1196 else if (IS_BROXTON(i915)) 1197 bxt_whitelist_build(engine); 1198 else if (IS_SKYLAKE(i915)) 1199 skl_whitelist_build(engine); 1200 else if (INTEL_GEN(i915) <= 8) 1201 return; 1202 else 1203 MISSING_CASE(INTEL_GEN(i915)); 1204 1205 wa_init_finish(w); 1206 } 1207 1208 void intel_engine_apply_whitelist(struct intel_engine_cs *engine) 1209 { 1210 const struct i915_wa_list *wal = &engine->whitelist; 1211 struct intel_uncore *uncore = engine->uncore; 1212 const u32 base = engine->mmio_base; 1213 struct i915_wa *wa; 1214 unsigned int i; 1215 1216 if (!wal->count) 1217 return; 1218 1219 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1220 intel_uncore_write(uncore, 1221 RING_FORCE_TO_NONPRIV(base, i), 1222 i915_mmio_reg_offset(wa->reg)); 1223 1224 /* And clear the rest just in case of garbage */ 1225 for (; i < RING_MAX_NONPRIV_SLOTS; i++) 1226 intel_uncore_write(uncore, 1227 RING_FORCE_TO_NONPRIV(base, i), 1228 i915_mmio_reg_offset(RING_NOPID(base))); 1229 } 1230 1231 static void 1232 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1233 { 1234 struct drm_i915_private *i915 = engine->i915; 1235 1236 if (IS_GEN(i915, 11)) { 1237 /* This is not an Wa. Enable for better image quality */ 1238 wa_masked_en(wal, 1239 _3D_CHICKEN3, 1240 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); 1241 1242 /* WaPipelineFlushCoherentLines:icl */ 1243 ignore_wa_write_or(wal, 1244 GEN8_L3SQCREG4, 1245 GEN8_LQSC_FLUSH_COHERENT_LINES, 1246 GEN8_LQSC_FLUSH_COHERENT_LINES); 1247 1248 /* 1249 * Wa_1405543622:icl 1250 * Formerly known as WaGAPZPriorityScheme 1251 */ 1252 wa_write_or(wal, 1253 GEN8_GARBCNTL, 1254 GEN11_ARBITRATION_PRIO_ORDER_MASK); 1255 1256 /* 1257 * Wa_1604223664:icl 1258 * Formerly known as WaL3BankAddressHashing 1259 */ 1260 wa_write_masked_or(wal, 1261 GEN8_GARBCNTL, 1262 GEN11_HASH_CTRL_EXCL_MASK, 1263 GEN11_HASH_CTRL_EXCL_BIT0); 1264 wa_write_masked_or(wal, 1265 GEN11_GLBLINVL, 1266 GEN11_BANK_HASH_ADDR_EXCL_MASK, 1267 GEN11_BANK_HASH_ADDR_EXCL_BIT0); 1268 1269 /* 1270 * Wa_1405733216:icl 1271 * Formerly known as WaDisableCleanEvicts 1272 */ 1273 ignore_wa_write_or(wal, 1274 GEN8_L3SQCREG4, 1275 GEN11_LQSC_CLEAN_EVICT_DISABLE, 1276 GEN11_LQSC_CLEAN_EVICT_DISABLE); 1277 1278 /* WaForwardProgressSoftReset:icl */ 1279 wa_write_or(wal, 1280 GEN10_SCRATCH_LNCF2, 1281 PMFLUSHDONE_LNICRSDROP | 1282 PMFLUSH_GAPL3UNBLOCK | 1283 PMFLUSHDONE_LNEBLK); 1284 1285 /* Wa_1406609255:icl (pre-prod) */ 1286 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 1287 wa_write_or(wal, 1288 GEN7_SARCHKMD, 1289 GEN7_DISABLE_DEMAND_PREFETCH); 1290 1291 /* Wa_1606682166:icl */ 1292 wa_write_or(wal, 1293 GEN7_SARCHKMD, 1294 GEN7_DISABLE_SAMPLER_PREFETCH); 1295 } 1296 1297 if (IS_GEN_RANGE(i915, 9, 11)) { 1298 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */ 1299 wa_masked_en(wal, 1300 GEN7_FF_SLICE_CS_CHICKEN1, 1301 GEN9_FFSC_PERCTX_PREEMPT_CTRL); 1302 } 1303 1304 if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) { 1305 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */ 1306 wa_write_or(wal, 1307 GEN8_GARBCNTL, 1308 GEN9_GAPS_TSV_CREDIT_DISABLE); 1309 } 1310 1311 if (IS_BROXTON(i915)) { 1312 /* WaDisablePooledEuLoadBalancingFix:bxt */ 1313 wa_masked_en(wal, 1314 FF_SLICE_CS_CHICKEN2, 1315 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); 1316 } 1317 1318 if (IS_GEN(i915, 9)) { 1319 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ 1320 wa_masked_en(wal, 1321 GEN9_CSFE_CHICKEN1_RCS, 1322 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); 1323 1324 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ 1325 wa_write_or(wal, 1326 BDW_SCRATCH1, 1327 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 1328 1329 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ 1330 if (IS_GEN9_LP(i915)) 1331 wa_write_masked_or(wal, 1332 GEN8_L3SQCREG1, 1333 L3_PRIO_CREDITS_MASK, 1334 L3_GENERAL_PRIO_CREDITS(62) | 1335 L3_HIGH_PRIO_CREDITS(2)); 1336 1337 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ 1338 wa_write_or(wal, 1339 GEN8_L3SQCREG4, 1340 GEN8_LQSC_FLUSH_COHERENT_LINES); 1341 } 1342 } 1343 1344 static void 1345 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1346 { 1347 struct drm_i915_private *i915 = engine->i915; 1348 1349 /* WaKBLVECSSemaphoreWaitPoll:kbl */ 1350 if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { 1351 wa_write(wal, 1352 RING_SEMA_WAIT_POLL(engine->mmio_base), 1353 1); 1354 } 1355 } 1356 1357 static void 1358 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1359 { 1360 if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8)) 1361 return; 1362 1363 if (engine->id == RCS0) 1364 rcs_engine_wa_init(engine, wal); 1365 else 1366 xcs_engine_wa_init(engine, wal); 1367 } 1368 1369 void intel_engine_init_workarounds(struct intel_engine_cs *engine) 1370 { 1371 struct i915_wa_list *wal = &engine->wa_list; 1372 1373 if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8)) 1374 return; 1375 1376 wa_init_start(wal, engine->name); 1377 engine_init_workarounds(engine, wal); 1378 wa_init_finish(wal); 1379 } 1380 1381 void intel_engine_apply_workarounds(struct intel_engine_cs *engine) 1382 { 1383 wa_list_apply(engine->uncore, &engine->wa_list); 1384 } 1385 1386 static struct i915_vma * 1387 create_scratch(struct i915_address_space *vm, int count) 1388 { 1389 struct drm_i915_gem_object *obj; 1390 struct i915_vma *vma; 1391 unsigned int size; 1392 int err; 1393 1394 size = round_up(count * sizeof(u32), PAGE_SIZE); 1395 obj = i915_gem_object_create_internal(vm->i915, size); 1396 if (IS_ERR(obj)) 1397 return ERR_CAST(obj); 1398 1399 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 1400 1401 vma = i915_vma_instance(obj, vm, NULL); 1402 if (IS_ERR(vma)) { 1403 err = PTR_ERR(vma); 1404 goto err_obj; 1405 } 1406 1407 err = i915_vma_pin(vma, 0, 0, 1408 i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); 1409 if (err) 1410 goto err_obj; 1411 1412 return vma; 1413 1414 err_obj: 1415 i915_gem_object_put(obj); 1416 return ERR_PTR(err); 1417 } 1418 1419 static int 1420 wa_list_srm(struct i915_request *rq, 1421 const struct i915_wa_list *wal, 1422 struct i915_vma *vma) 1423 { 1424 const struct i915_wa *wa; 1425 unsigned int i; 1426 u32 srm, *cs; 1427 1428 srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 1429 if (INTEL_GEN(rq->i915) >= 8) 1430 srm++; 1431 1432 cs = intel_ring_begin(rq, 4 * wal->count); 1433 if (IS_ERR(cs)) 1434 return PTR_ERR(cs); 1435 1436 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 1437 *cs++ = srm; 1438 *cs++ = i915_mmio_reg_offset(wa->reg); 1439 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; 1440 *cs++ = 0; 1441 } 1442 intel_ring_advance(rq, cs); 1443 1444 return 0; 1445 } 1446 1447 static int engine_wa_list_verify(struct intel_context *ce, 1448 const struct i915_wa_list * const wal, 1449 const char *from) 1450 { 1451 const struct i915_wa *wa; 1452 struct i915_request *rq; 1453 struct i915_vma *vma; 1454 unsigned int i; 1455 u32 *results; 1456 int err; 1457 1458 if (!wal->count) 1459 return 0; 1460 1461 vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count); 1462 if (IS_ERR(vma)) 1463 return PTR_ERR(vma); 1464 1465 rq = intel_context_create_request(ce); 1466 if (IS_ERR(rq)) { 1467 err = PTR_ERR(rq); 1468 goto err_vma; 1469 } 1470 1471 err = wa_list_srm(rq, wal, vma); 1472 if (err) 1473 goto err_vma; 1474 1475 i915_request_add(rq); 1476 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 1477 err = -ETIME; 1478 goto err_vma; 1479 } 1480 1481 results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); 1482 if (IS_ERR(results)) { 1483 err = PTR_ERR(results); 1484 goto err_vma; 1485 } 1486 1487 err = 0; 1488 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1489 if (!wa_verify(wa, results[i], wal->name, from)) 1490 err = -ENXIO; 1491 1492 i915_gem_object_unpin_map(vma->obj); 1493 1494 err_vma: 1495 i915_vma_unpin(vma); 1496 i915_vma_put(vma); 1497 return err; 1498 } 1499 1500 int intel_engine_verify_workarounds(struct intel_engine_cs *engine, 1501 const char *from) 1502 { 1503 return engine_wa_list_verify(engine->kernel_context, 1504 &engine->wa_list, 1505 from); 1506 } 1507 1508 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1509 #include "selftest_workarounds.c" 1510 #endif 1511