1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2018 Intel Corporation 5 */ 6 7 #include "i915_drv.h" 8 #include "intel_context.h" 9 #include "intel_gt.h" 10 #include "intel_workarounds.h" 11 12 /** 13 * DOC: Hardware workarounds 14 * 15 * This file is intended as a central place to implement most [1]_ of the 16 * required workarounds for hardware to work as originally intended. They fall 17 * in five basic categories depending on how/when they are applied: 18 * 19 * - Workarounds that touch registers that are saved/restored to/from the HW 20 * context image. The list is emitted (via Load Register Immediate commands) 21 * everytime a new context is created. 22 * - GT workarounds. The list of these WAs is applied whenever these registers 23 * revert to default values (on GPU reset, suspend/resume [2]_, etc..). 24 * - Display workarounds. The list is applied during display clock-gating 25 * initialization. 26 * - Workarounds that whitelist a privileged register, so that UMDs can manage 27 * them directly. This is just a special case of a MMMIO workaround (as we 28 * write the list of these to/be-whitelisted registers to some special HW 29 * registers). 30 * - Workaround batchbuffers, that get executed automatically by the hardware 31 * on every HW context restore. 32 * 33 * .. [1] Please notice that there are other WAs that, due to their nature, 34 * cannot be applied from a central place. Those are peppered around the rest 35 * of the code, as needed. 36 * 37 * .. [2] Technically, some registers are powercontext saved & restored, so they 38 * survive a suspend/resume. In practice, writing them again is not too 39 * costly and simplifies things. We can revisit this in the future. 40 * 41 * Layout 42 * ~~~~~~ 43 * 44 * Keep things in this file ordered by WA type, as per the above (context, GT, 45 * display, register whitelist, batchbuffer). Then, inside each type, keep the 46 * following order: 47 * 48 * - Infrastructure functions and macros 49 * - WAs per platform in standard gen/chrono order 50 * - Public functions to init or apply the given workaround type. 51 */ 52 53 static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name) 54 { 55 wal->name = name; 56 wal->engine_name = engine_name; 57 } 58 59 #define WA_LIST_CHUNK (1 << 4) 60 61 static void wa_init_finish(struct i915_wa_list *wal) 62 { 63 /* Trim unused entries. */ 64 if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) { 65 struct i915_wa *list = kmemdup(wal->list, 66 wal->count * sizeof(*list), 67 GFP_KERNEL); 68 69 if (list) { 70 kfree(wal->list); 71 wal->list = list; 72 } 73 } 74 75 if (!wal->count) 76 return; 77 78 DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n", 79 wal->wa_count, wal->name, wal->engine_name); 80 } 81 82 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) 83 { 84 unsigned int addr = i915_mmio_reg_offset(wa->reg); 85 unsigned int start = 0, end = wal->count; 86 const unsigned int grow = WA_LIST_CHUNK; 87 struct i915_wa *wa_; 88 89 GEM_BUG_ON(!is_power_of_2(grow)); 90 91 if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ 92 struct i915_wa *list; 93 94 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), 95 GFP_KERNEL); 96 if (!list) { 97 DRM_ERROR("No space for workaround init!\n"); 98 return; 99 } 100 101 if (wal->list) 102 memcpy(list, wal->list, sizeof(*wa) * wal->count); 103 104 wal->list = list; 105 } 106 107 while (start < end) { 108 unsigned int mid = start + (end - start) / 2; 109 110 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) { 111 start = mid + 1; 112 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) { 113 end = mid; 114 } else { 115 wa_ = &wal->list[mid]; 116 117 if ((wa->mask & ~wa_->mask) == 0) { 118 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n", 119 i915_mmio_reg_offset(wa_->reg), 120 wa_->mask, wa_->val); 121 122 wa_->val &= ~wa->mask; 123 } 124 125 wal->wa_count++; 126 wa_->val |= wa->val; 127 wa_->mask |= wa->mask; 128 wa_->read |= wa->read; 129 return; 130 } 131 } 132 133 wal->wa_count++; 134 wa_ = &wal->list[wal->count++]; 135 *wa_ = *wa; 136 137 while (wa_-- > wal->list) { 138 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) == 139 i915_mmio_reg_offset(wa_[1].reg)); 140 if (i915_mmio_reg_offset(wa_[1].reg) > 141 i915_mmio_reg_offset(wa_[0].reg)) 142 break; 143 144 swap(wa_[1], wa_[0]); 145 } 146 } 147 148 static void 149 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, 150 u32 val) 151 { 152 struct i915_wa wa = { 153 .reg = reg, 154 .mask = mask, 155 .val = val, 156 .read = mask, 157 }; 158 159 _wa_add(wal, &wa); 160 } 161 162 static void 163 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 164 { 165 wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); 166 } 167 168 static void 169 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 170 { 171 wa_write_masked_or(wal, reg, ~0, val); 172 } 173 174 static void 175 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 176 { 177 wa_write_masked_or(wal, reg, val, val); 178 } 179 180 #define WA_SET_BIT_MASKED(addr, mask) \ 181 wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) 182 183 #define WA_CLR_BIT_MASKED(addr, mask) \ 184 wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask)) 185 186 #define WA_SET_FIELD_MASKED(addr, mask, value) \ 187 wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) 188 189 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, 190 struct i915_wa_list *wal) 191 { 192 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 193 194 /* WaDisableAsyncFlipPerfMode:bdw,chv */ 195 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 196 197 /* WaDisablePartialInstShootdown:bdw,chv */ 198 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 199 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 200 201 /* Use Force Non-Coherent whenever executing a 3D context. This is a 202 * workaround for for a possible hang in the unlikely event a TLB 203 * invalidation occurs during a PSD flush. 204 */ 205 /* WaForceEnableNonCoherent:bdw,chv */ 206 /* WaHdcDisableFetchWhenMasked:bdw,chv */ 207 WA_SET_BIT_MASKED(HDC_CHICKEN0, 208 HDC_DONOT_FETCH_MEM_WHEN_MASKED | 209 HDC_FORCE_NON_COHERENT); 210 211 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 212 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 213 * polygons in the same 8x4 pixel/sample area to be processed without 214 * stalling waiting for the earlier ones to write to Hierarchical Z 215 * buffer." 216 * 217 * This optimization is off by default for BDW and CHV; turn it on. 218 */ 219 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 220 221 /* Wa4x4STCOptimizationDisable:bdw,chv */ 222 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); 223 224 /* 225 * BSpec recommends 8x4 when MSAA is used, 226 * however in practice 16x4 seems fastest. 227 * 228 * Note that PS/WM thread counts depend on the WIZ hashing 229 * disable bit, which we don't touch here, but it's good 230 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 231 */ 232 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 233 GEN6_WIZ_HASHING_MASK, 234 GEN6_WIZ_HASHING_16x4); 235 } 236 237 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, 238 struct i915_wa_list *wal) 239 { 240 struct drm_i915_private *i915 = engine->i915; 241 242 gen8_ctx_workarounds_init(engine, wal); 243 244 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 245 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 246 247 /* WaDisableDopClockGating:bdw 248 * 249 * Also see the related UCGTCL1 write in broadwell_init_clock_gating() 250 * to disable EUTC clock gating. 251 */ 252 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 253 DOP_CLOCK_GATING_DISABLE); 254 255 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 256 GEN8_SAMPLER_POWER_BYPASS_DIS); 257 258 WA_SET_BIT_MASKED(HDC_CHICKEN0, 259 /* WaForceContextSaveRestoreNonCoherent:bdw */ 260 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 261 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 262 (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); 263 } 264 265 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, 266 struct i915_wa_list *wal) 267 { 268 gen8_ctx_workarounds_init(engine, wal); 269 270 /* WaDisableThreadStallDopClockGating:chv */ 271 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 272 273 /* Improve HiZ throughput on CHV. */ 274 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 275 } 276 277 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, 278 struct i915_wa_list *wal) 279 { 280 struct drm_i915_private *i915 = engine->i915; 281 282 if (HAS_LLC(i915)) { 283 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 284 * 285 * Must match Display Engine. See 286 * WaCompressedResourceDisplayNewHashMode. 287 */ 288 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 289 GEN9_PBE_COMPRESSED_HASH_SELECTION); 290 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 291 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); 292 } 293 294 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ 295 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ 296 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 297 FLOW_CONTROL_ENABLE | 298 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 299 300 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ 301 if (!IS_COFFEELAKE(i915)) 302 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 303 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); 304 305 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ 306 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ 307 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 308 GEN9_ENABLE_YV12_BUGFIX | 309 GEN9_ENABLE_GPGPU_PREEMPTION); 310 311 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ 312 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ 313 WA_SET_BIT_MASKED(CACHE_MODE_1, 314 GEN8_4x4_STC_OPTIMIZATION_DISABLE | 315 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); 316 317 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ 318 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 319 GEN9_CCS_TLB_PREFETCH_ENABLE); 320 321 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ 322 WA_SET_BIT_MASKED(HDC_CHICKEN0, 323 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 324 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); 325 326 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are 327 * both tied to WaForceContextSaveRestoreNonCoherent 328 * in some hsds for skl. We keep the tie for all gen9. The 329 * documentation is a bit hazy and so we want to get common behaviour, 330 * even though there is no clear evidence we would need both on kbl/bxt. 331 * This area has been source of system hangs so we play it safe 332 * and mimic the skl regardless of what bspec says. 333 * 334 * Use Force Non-Coherent whenever executing a 3D context. This 335 * is a workaround for a possible hang in the unlikely event 336 * a TLB invalidation occurs during a PSD flush. 337 */ 338 339 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ 340 WA_SET_BIT_MASKED(HDC_CHICKEN0, 341 HDC_FORCE_NON_COHERENT); 342 343 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ 344 if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) 345 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 346 GEN8_SAMPLER_POWER_BYPASS_DIS); 347 348 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ 349 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); 350 351 /* 352 * Supporting preemption with fine-granularity requires changes in the 353 * batch buffer programming. Since we can't break old userspace, we 354 * need to set our default preemption level to safe value. Userspace is 355 * still able to use more fine-grained preemption levels, since in 356 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the 357 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are 358 * not real HW workarounds, but merely a way to start using preemption 359 * while maintaining old contract with userspace. 360 */ 361 362 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ 363 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 364 365 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ 366 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 367 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 368 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 369 370 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ 371 if (IS_GEN9_LP(i915)) 372 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); 373 } 374 375 static void skl_tune_iz_hashing(struct intel_engine_cs *engine, 376 struct i915_wa_list *wal) 377 { 378 struct drm_i915_private *i915 = engine->i915; 379 u8 vals[3] = { 0, 0, 0 }; 380 unsigned int i; 381 382 for (i = 0; i < 3; i++) { 383 u8 ss; 384 385 /* 386 * Only consider slices where one, and only one, subslice has 7 387 * EUs 388 */ 389 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i])) 390 continue; 391 392 /* 393 * subslice_7eu[i] != 0 (because of the check above) and 394 * ss_max == 4 (maximum number of subslices possible per slice) 395 * 396 * -> 0 <= ss <= 3; 397 */ 398 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1; 399 vals[i] = 3 - ss; 400 } 401 402 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) 403 return; 404 405 /* Tune IZ hashing. See intel_device_info_runtime_init() */ 406 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 407 GEN9_IZ_HASHING_MASK(2) | 408 GEN9_IZ_HASHING_MASK(1) | 409 GEN9_IZ_HASHING_MASK(0), 410 GEN9_IZ_HASHING(2, vals[2]) | 411 GEN9_IZ_HASHING(1, vals[1]) | 412 GEN9_IZ_HASHING(0, vals[0])); 413 } 414 415 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine, 416 struct i915_wa_list *wal) 417 { 418 gen9_ctx_workarounds_init(engine, wal); 419 skl_tune_iz_hashing(engine, wal); 420 } 421 422 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine, 423 struct i915_wa_list *wal) 424 { 425 gen9_ctx_workarounds_init(engine, wal); 426 427 /* WaDisableThreadStallDopClockGating:bxt */ 428 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 429 STALL_DOP_GATING_DISABLE); 430 431 /* WaToEnableHwFixForPushConstHWBug:bxt */ 432 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 433 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 434 } 435 436 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, 437 struct i915_wa_list *wal) 438 { 439 struct drm_i915_private *i915 = engine->i915; 440 441 gen9_ctx_workarounds_init(engine, wal); 442 443 /* WaToEnableHwFixForPushConstHWBug:kbl */ 444 if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) 445 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 446 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 447 448 /* WaDisableSbeCacheDispatchPortSharing:kbl */ 449 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, 450 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 451 } 452 453 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, 454 struct i915_wa_list *wal) 455 { 456 gen9_ctx_workarounds_init(engine, wal); 457 458 /* WaToEnableHwFixForPushConstHWBug:glk */ 459 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 460 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 461 } 462 463 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, 464 struct i915_wa_list *wal) 465 { 466 gen9_ctx_workarounds_init(engine, wal); 467 468 /* WaToEnableHwFixForPushConstHWBug:cfl */ 469 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 470 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 471 472 /* WaDisableSbeCacheDispatchPortSharing:cfl */ 473 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, 474 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 475 } 476 477 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, 478 struct i915_wa_list *wal) 479 { 480 struct drm_i915_private *i915 = engine->i915; 481 482 /* WaForceContextSaveRestoreNonCoherent:cnl */ 483 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, 484 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); 485 486 /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ 487 if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) 488 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); 489 490 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ 491 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 492 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 493 494 /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ 495 if (IS_CNL_REVID(i915, 0, CNL_REVID_B0)) 496 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 497 GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); 498 499 /* WaPushConstantDereferenceHoldDisable:cnl */ 500 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); 501 502 /* FtrEnableFastAnisoL1BankingFix:cnl */ 503 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); 504 505 /* WaDisable3DMidCmdPreemption:cnl */ 506 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 507 508 /* WaDisableGPGPUMidCmdPreemption:cnl */ 509 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 510 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 511 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 512 513 /* WaDisableEarlyEOT:cnl */ 514 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); 515 } 516 517 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, 518 struct i915_wa_list *wal) 519 { 520 struct drm_i915_private *i915 = engine->i915; 521 522 /* WaDisableBankHangMode:icl */ 523 wa_write(wal, 524 GEN8_L3CNTLREG, 525 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | 526 GEN8_ERRDETBCTRL); 527 528 /* Wa_1604370585:icl (pre-prod) 529 * Formerly known as WaPushConstantDereferenceHoldDisable 530 */ 531 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 532 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 533 PUSH_CONSTANT_DEREF_DISABLE); 534 535 /* WaForceEnableNonCoherent:icl 536 * This is not the same workaround as in early Gen9 platforms, where 537 * lacking this could cause system hangs, but coherency performance 538 * overhead is high and only a few compute workloads really need it 539 * (the register is whitelisted in hardware now, so UMDs can opt in 540 * for coherency if they have a good reason). 541 */ 542 WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); 543 544 /* Wa_2006611047:icl (pre-prod) 545 * Formerly known as WaDisableImprovedTdlClkGating 546 */ 547 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 548 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 549 GEN11_TDL_CLOCK_GATING_FIX_DISABLE); 550 551 /* Wa_2006665173:icl (pre-prod) */ 552 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 553 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, 554 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); 555 556 /* WaEnableFloatBlendOptimization:icl */ 557 wa_write_masked_or(wal, 558 GEN10_CACHE_MODE_SS, 559 0, /* write-only, so skip validation */ 560 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); 561 562 /* WaDisableGPGPUMidThreadPreemption:icl */ 563 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 564 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 565 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); 566 567 /* allow headerless messages for preemptible GPGPU context */ 568 WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE, 569 GEN11_SAMPLER_ENABLE_HEADLESS_MSG); 570 } 571 572 static void 573 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, 574 struct i915_wa_list *wal, 575 const char *name) 576 { 577 struct drm_i915_private *i915 = engine->i915; 578 579 if (engine->class != RENDER_CLASS) 580 return; 581 582 wa_init_start(wal, name, engine->name); 583 584 if (IS_GEN(i915, 11)) 585 icl_ctx_workarounds_init(engine, wal); 586 else if (IS_CANNONLAKE(i915)) 587 cnl_ctx_workarounds_init(engine, wal); 588 else if (IS_COFFEELAKE(i915)) 589 cfl_ctx_workarounds_init(engine, wal); 590 else if (IS_GEMINILAKE(i915)) 591 glk_ctx_workarounds_init(engine, wal); 592 else if (IS_KABYLAKE(i915)) 593 kbl_ctx_workarounds_init(engine, wal); 594 else if (IS_BROXTON(i915)) 595 bxt_ctx_workarounds_init(engine, wal); 596 else if (IS_SKYLAKE(i915)) 597 skl_ctx_workarounds_init(engine, wal); 598 else if (IS_CHERRYVIEW(i915)) 599 chv_ctx_workarounds_init(engine, wal); 600 else if (IS_BROADWELL(i915)) 601 bdw_ctx_workarounds_init(engine, wal); 602 else if (INTEL_GEN(i915) < 8) 603 return; 604 else 605 MISSING_CASE(INTEL_GEN(i915)); 606 607 wa_init_finish(wal); 608 } 609 610 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) 611 { 612 __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context"); 613 } 614 615 int intel_engine_emit_ctx_wa(struct i915_request *rq) 616 { 617 struct i915_wa_list *wal = &rq->engine->ctx_wa_list; 618 struct i915_wa *wa; 619 unsigned int i; 620 u32 *cs; 621 int ret; 622 623 if (wal->count == 0) 624 return 0; 625 626 ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 627 if (ret) 628 return ret; 629 630 cs = intel_ring_begin(rq, (wal->count * 2 + 2)); 631 if (IS_ERR(cs)) 632 return PTR_ERR(cs); 633 634 *cs++ = MI_LOAD_REGISTER_IMM(wal->count); 635 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 636 *cs++ = i915_mmio_reg_offset(wa->reg); 637 *cs++ = wa->val; 638 } 639 *cs++ = MI_NOOP; 640 641 intel_ring_advance(rq, cs); 642 643 ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 644 if (ret) 645 return ret; 646 647 return 0; 648 } 649 650 static void 651 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 652 { 653 /* WaDisableKillLogic:bxt,skl,kbl */ 654 if (!IS_COFFEELAKE(i915)) 655 wa_write_or(wal, 656 GAM_ECOCHK, 657 ECOCHK_DIS_TLB); 658 659 if (HAS_LLC(i915)) { 660 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 661 * 662 * Must match Display Engine. See 663 * WaCompressedResourceDisplayNewHashMode. 664 */ 665 wa_write_or(wal, 666 MMCD_MISC_CTRL, 667 MMCD_PCLA | MMCD_HOTSPOT_EN); 668 } 669 670 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ 671 wa_write_or(wal, 672 GAM_ECOCHK, 673 BDW_DISABLE_HDC_INVALIDATION); 674 } 675 676 static void 677 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 678 { 679 gen9_gt_workarounds_init(i915, wal); 680 681 /* WaDisableGafsUnitClkGating:skl */ 682 wa_write_or(wal, 683 GEN7_UCGCTL4, 684 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 685 686 /* WaInPlaceDecompressionHang:skl */ 687 if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) 688 wa_write_or(wal, 689 GEN9_GAMT_ECO_REG_RW_IA, 690 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 691 } 692 693 static void 694 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 695 { 696 gen9_gt_workarounds_init(i915, wal); 697 698 /* WaInPlaceDecompressionHang:bxt */ 699 wa_write_or(wal, 700 GEN9_GAMT_ECO_REG_RW_IA, 701 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 702 } 703 704 static void 705 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 706 { 707 gen9_gt_workarounds_init(i915, wal); 708 709 /* WaDisableDynamicCreditSharing:kbl */ 710 if (IS_KBL_REVID(i915, 0, KBL_REVID_B0)) 711 wa_write_or(wal, 712 GAMT_CHKN_BIT_REG, 713 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); 714 715 /* WaDisableGafsUnitClkGating:kbl */ 716 wa_write_or(wal, 717 GEN7_UCGCTL4, 718 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 719 720 /* WaInPlaceDecompressionHang:kbl */ 721 wa_write_or(wal, 722 GEN9_GAMT_ECO_REG_RW_IA, 723 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 724 } 725 726 static void 727 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 728 { 729 gen9_gt_workarounds_init(i915, wal); 730 } 731 732 static void 733 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 734 { 735 gen9_gt_workarounds_init(i915, wal); 736 737 /* WaDisableGafsUnitClkGating:cfl */ 738 wa_write_or(wal, 739 GEN7_UCGCTL4, 740 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 741 742 /* WaInPlaceDecompressionHang:cfl */ 743 wa_write_or(wal, 744 GEN9_GAMT_ECO_REG_RW_IA, 745 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 746 } 747 748 static void 749 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) 750 { 751 const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; 752 unsigned int slice, subslice; 753 u32 l3_en, mcr, mcr_mask; 754 755 GEM_BUG_ON(INTEL_GEN(i915) < 10); 756 757 /* 758 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl 759 * L3Banks could be fused off in single slice scenario. If that is 760 * the case, we might need to program MCR select to a valid L3Bank 761 * by default, to make sure we correctly read certain registers 762 * later on (in the range 0xB100 - 0xB3FF). 763 * 764 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl 765 * Before any MMIO read into slice/subslice specific registers, MCR 766 * packet control register needs to be programmed to point to any 767 * enabled s/ss pair. Otherwise, incorrect values will be returned. 768 * This means each subsequent MMIO read will be forwarded to an 769 * specific s/ss combination, but this is OK since these registers 770 * are consistent across s/ss in almost all cases. In the rare 771 * occasions, such as INSTDONE, where this value is dependent 772 * on s/ss combo, the read should be done with read_subslice_reg. 773 * 774 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both 775 * to which subslice, or to which L3 bank, the respective mmio reads 776 * will go, we have to find a common index which works for both 777 * accesses. 778 * 779 * Case where we cannot find a common index fortunately should not 780 * happen in production hardware, so we only emit a warning instead of 781 * implementing something more complex that requires checking the range 782 * of every MMIO read. 783 */ 784 785 if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { 786 u32 l3_fuse = 787 intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & 788 GEN10_L3BANK_MASK; 789 790 DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse); 791 l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse); 792 } else { 793 l3_en = ~0; 794 } 795 796 slice = fls(sseu->slice_mask) - 1; 797 GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); 798 subslice = fls(l3_en & sseu->subslice_mask[slice]); 799 if (!subslice) { 800 DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n", 801 sseu->subslice_mask[slice], l3_en); 802 subslice = fls(l3_en); 803 WARN_ON(!subslice); 804 } 805 subslice--; 806 807 if (INTEL_GEN(i915) >= 11) { 808 mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); 809 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; 810 } else { 811 mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); 812 mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; 813 } 814 815 DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr); 816 817 wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); 818 } 819 820 static void 821 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 822 { 823 wa_init_mcr(i915, wal); 824 825 /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ 826 if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) 827 wa_write_or(wal, 828 GAMT_CHKN_BIT_REG, 829 GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); 830 831 /* WaInPlaceDecompressionHang:cnl */ 832 wa_write_or(wal, 833 GEN9_GAMT_ECO_REG_RW_IA, 834 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 835 } 836 837 static void 838 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 839 { 840 wa_init_mcr(i915, wal); 841 842 /* WaInPlaceDecompressionHang:icl */ 843 wa_write_or(wal, 844 GEN9_GAMT_ECO_REG_RW_IA, 845 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 846 847 /* WaModifyGamTlbPartitioning:icl */ 848 wa_write_masked_or(wal, 849 GEN11_GACB_PERF_CTRL, 850 GEN11_HASH_CTRL_MASK, 851 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); 852 853 /* Wa_1405766107:icl 854 * Formerly known as WaCL2SFHalfMaxAlloc 855 */ 856 wa_write_or(wal, 857 GEN11_LSN_UNSLCVC, 858 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | 859 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); 860 861 /* Wa_220166154:icl 862 * Formerly known as WaDisCtxReload 863 */ 864 wa_write_or(wal, 865 GEN8_GAMW_ECO_DEV_RW_IA, 866 GAMW_ECO_DEV_CTX_RELOAD_DISABLE); 867 868 /* Wa_1405779004:icl (pre-prod) */ 869 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 870 wa_write_or(wal, 871 SLICE_UNIT_LEVEL_CLKGATE, 872 MSCUNIT_CLKGATE_DIS); 873 874 /* Wa_1406680159:icl */ 875 wa_write_or(wal, 876 SUBSLICE_UNIT_LEVEL_CLKGATE, 877 GWUNIT_CLKGATE_DIS); 878 879 /* Wa_1406838659:icl (pre-prod) */ 880 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 881 wa_write_or(wal, 882 INF_UNIT_LEVEL_CLKGATE, 883 CGPSF_CLKGATE_DIS); 884 885 /* Wa_1406463099:icl 886 * Formerly known as WaGamTlbPendError 887 */ 888 wa_write_or(wal, 889 GAMT_CHKN_BIT_REG, 890 GAMT_CHKN_DISABLE_L3_COH_PIPE); 891 } 892 893 static void 894 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) 895 { 896 if (IS_GEN(i915, 11)) 897 icl_gt_workarounds_init(i915, wal); 898 else if (IS_CANNONLAKE(i915)) 899 cnl_gt_workarounds_init(i915, wal); 900 else if (IS_COFFEELAKE(i915)) 901 cfl_gt_workarounds_init(i915, wal); 902 else if (IS_GEMINILAKE(i915)) 903 glk_gt_workarounds_init(i915, wal); 904 else if (IS_KABYLAKE(i915)) 905 kbl_gt_workarounds_init(i915, wal); 906 else if (IS_BROXTON(i915)) 907 bxt_gt_workarounds_init(i915, wal); 908 else if (IS_SKYLAKE(i915)) 909 skl_gt_workarounds_init(i915, wal); 910 else if (INTEL_GEN(i915) <= 8) 911 return; 912 else 913 MISSING_CASE(INTEL_GEN(i915)); 914 } 915 916 void intel_gt_init_workarounds(struct drm_i915_private *i915) 917 { 918 struct i915_wa_list *wal = &i915->gt_wa_list; 919 920 wa_init_start(wal, "GT", "global"); 921 gt_init_workarounds(i915, wal); 922 wa_init_finish(wal); 923 } 924 925 static enum forcewake_domains 926 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) 927 { 928 enum forcewake_domains fw = 0; 929 struct i915_wa *wa; 930 unsigned int i; 931 932 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 933 fw |= intel_uncore_forcewake_for_reg(uncore, 934 wa->reg, 935 FW_REG_READ | 936 FW_REG_WRITE); 937 938 return fw; 939 } 940 941 static bool 942 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) 943 { 944 if ((cur ^ wa->val) & wa->read) { 945 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", 946 name, from, i915_mmio_reg_offset(wa->reg), 947 cur, cur & wa->read, 948 wa->val, wa->mask); 949 950 return false; 951 } 952 953 return true; 954 } 955 956 static void 957 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) 958 { 959 enum forcewake_domains fw; 960 unsigned long flags; 961 struct i915_wa *wa; 962 unsigned int i; 963 964 if (!wal->count) 965 return; 966 967 fw = wal_get_fw_for_rmw(uncore, wal); 968 969 spin_lock_irqsave(&uncore->lock, flags); 970 intel_uncore_forcewake_get__locked(uncore, fw); 971 972 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 973 intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); 974 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 975 wa_verify(wa, 976 intel_uncore_read_fw(uncore, wa->reg), 977 wal->name, "application"); 978 } 979 980 intel_uncore_forcewake_put__locked(uncore, fw); 981 spin_unlock_irqrestore(&uncore->lock, flags); 982 } 983 984 void intel_gt_apply_workarounds(struct intel_gt *gt) 985 { 986 wa_list_apply(gt->uncore, >->i915->gt_wa_list); 987 } 988 989 static bool wa_list_verify(struct intel_uncore *uncore, 990 const struct i915_wa_list *wal, 991 const char *from) 992 { 993 struct i915_wa *wa; 994 unsigned int i; 995 bool ok = true; 996 997 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 998 ok &= wa_verify(wa, 999 intel_uncore_read(uncore, wa->reg), 1000 wal->name, from); 1001 1002 return ok; 1003 } 1004 1005 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) 1006 { 1007 return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from); 1008 } 1009 1010 static inline bool is_nonpriv_flags_valid(u32 flags) 1011 { 1012 /* Check only valid flag bits are set */ 1013 if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID) 1014 return false; 1015 1016 /* NB: Only 3 out of 4 enum values are valid for access field */ 1017 if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == 1018 RING_FORCE_TO_NONPRIV_ACCESS_INVALID) 1019 return false; 1020 1021 return true; 1022 } 1023 1024 static void 1025 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) 1026 { 1027 struct i915_wa wa = { 1028 .reg = reg 1029 }; 1030 1031 if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) 1032 return; 1033 1034 if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags))) 1035 return; 1036 1037 wa.reg.reg |= flags; 1038 _wa_add(wal, &wa); 1039 } 1040 1041 static void 1042 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) 1043 { 1044 whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); 1045 } 1046 1047 static void gen9_whitelist_build(struct i915_wa_list *w) 1048 { 1049 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ 1050 whitelist_reg(w, GEN9_CTX_PREEMPT_REG); 1051 1052 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ 1053 whitelist_reg(w, GEN8_CS_CHICKEN1); 1054 1055 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ 1056 whitelist_reg(w, GEN8_HDC_CHICKEN1); 1057 } 1058 1059 static void skl_whitelist_build(struct intel_engine_cs *engine) 1060 { 1061 struct i915_wa_list *w = &engine->whitelist; 1062 1063 if (engine->class != RENDER_CLASS) 1064 return; 1065 1066 gen9_whitelist_build(w); 1067 1068 /* WaDisableLSQCROPERFforOCL:skl */ 1069 whitelist_reg(w, GEN8_L3SQCREG4); 1070 } 1071 1072 static void bxt_whitelist_build(struct intel_engine_cs *engine) 1073 { 1074 if (engine->class != RENDER_CLASS) 1075 return; 1076 1077 gen9_whitelist_build(&engine->whitelist); 1078 } 1079 1080 static void kbl_whitelist_build(struct intel_engine_cs *engine) 1081 { 1082 struct i915_wa_list *w = &engine->whitelist; 1083 1084 if (engine->class != RENDER_CLASS) 1085 return; 1086 1087 gen9_whitelist_build(w); 1088 1089 /* WaDisableLSQCROPERFforOCL:kbl */ 1090 whitelist_reg(w, GEN8_L3SQCREG4); 1091 } 1092 1093 static void glk_whitelist_build(struct intel_engine_cs *engine) 1094 { 1095 struct i915_wa_list *w = &engine->whitelist; 1096 1097 if (engine->class != RENDER_CLASS) 1098 return; 1099 1100 gen9_whitelist_build(w); 1101 1102 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ 1103 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1104 } 1105 1106 static void cfl_whitelist_build(struct intel_engine_cs *engine) 1107 { 1108 struct i915_wa_list *w = &engine->whitelist; 1109 1110 if (engine->class != RENDER_CLASS) 1111 return; 1112 1113 gen9_whitelist_build(w); 1114 1115 /* 1116 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml 1117 * 1118 * This covers 4 register which are next to one another : 1119 * - PS_INVOCATION_COUNT 1120 * - PS_INVOCATION_COUNT_UDW 1121 * - PS_DEPTH_COUNT 1122 * - PS_DEPTH_COUNT_UDW 1123 */ 1124 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1125 RING_FORCE_TO_NONPRIV_ACCESS_RD | 1126 RING_FORCE_TO_NONPRIV_RANGE_4); 1127 } 1128 1129 static void cnl_whitelist_build(struct intel_engine_cs *engine) 1130 { 1131 struct i915_wa_list *w = &engine->whitelist; 1132 1133 if (engine->class != RENDER_CLASS) 1134 return; 1135 1136 /* WaEnablePreemptionGranularityControlByUMD:cnl */ 1137 whitelist_reg(w, GEN8_CS_CHICKEN1); 1138 } 1139 1140 static void icl_whitelist_build(struct intel_engine_cs *engine) 1141 { 1142 struct i915_wa_list *w = &engine->whitelist; 1143 1144 switch (engine->class) { 1145 case RENDER_CLASS: 1146 /* WaAllowUMDToModifyHalfSliceChicken7:icl */ 1147 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); 1148 1149 /* WaAllowUMDToModifySamplerMode:icl */ 1150 whitelist_reg(w, GEN10_SAMPLER_MODE); 1151 1152 /* WaEnableStateCacheRedirectToCS:icl */ 1153 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1154 1155 /* 1156 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl 1157 * 1158 * This covers 4 register which are next to one another : 1159 * - PS_INVOCATION_COUNT 1160 * - PS_INVOCATION_COUNT_UDW 1161 * - PS_DEPTH_COUNT 1162 * - PS_DEPTH_COUNT_UDW 1163 */ 1164 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1165 RING_FORCE_TO_NONPRIV_ACCESS_RD | 1166 RING_FORCE_TO_NONPRIV_RANGE_4); 1167 break; 1168 1169 case VIDEO_DECODE_CLASS: 1170 /* hucStatusRegOffset */ 1171 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base), 1172 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1173 /* hucUKernelHdrInfoRegOffset */ 1174 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base), 1175 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1176 /* hucStatus2RegOffset */ 1177 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), 1178 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1179 break; 1180 1181 default: 1182 break; 1183 } 1184 } 1185 1186 void intel_engine_init_whitelist(struct intel_engine_cs *engine) 1187 { 1188 struct drm_i915_private *i915 = engine->i915; 1189 struct i915_wa_list *w = &engine->whitelist; 1190 1191 wa_init_start(w, "whitelist", engine->name); 1192 1193 if (IS_GEN(i915, 11)) 1194 icl_whitelist_build(engine); 1195 else if (IS_CANNONLAKE(i915)) 1196 cnl_whitelist_build(engine); 1197 else if (IS_COFFEELAKE(i915)) 1198 cfl_whitelist_build(engine); 1199 else if (IS_GEMINILAKE(i915)) 1200 glk_whitelist_build(engine); 1201 else if (IS_KABYLAKE(i915)) 1202 kbl_whitelist_build(engine); 1203 else if (IS_BROXTON(i915)) 1204 bxt_whitelist_build(engine); 1205 else if (IS_SKYLAKE(i915)) 1206 skl_whitelist_build(engine); 1207 else if (INTEL_GEN(i915) <= 8) 1208 return; 1209 else 1210 MISSING_CASE(INTEL_GEN(i915)); 1211 1212 wa_init_finish(w); 1213 } 1214 1215 void intel_engine_apply_whitelist(struct intel_engine_cs *engine) 1216 { 1217 const struct i915_wa_list *wal = &engine->whitelist; 1218 struct intel_uncore *uncore = engine->uncore; 1219 const u32 base = engine->mmio_base; 1220 struct i915_wa *wa; 1221 unsigned int i; 1222 1223 if (!wal->count) 1224 return; 1225 1226 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1227 intel_uncore_write(uncore, 1228 RING_FORCE_TO_NONPRIV(base, i), 1229 i915_mmio_reg_offset(wa->reg)); 1230 1231 /* And clear the rest just in case of garbage */ 1232 for (; i < RING_MAX_NONPRIV_SLOTS; i++) 1233 intel_uncore_write(uncore, 1234 RING_FORCE_TO_NONPRIV(base, i), 1235 i915_mmio_reg_offset(RING_NOPID(base))); 1236 } 1237 1238 static void 1239 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1240 { 1241 struct drm_i915_private *i915 = engine->i915; 1242 1243 if (IS_GEN(i915, 11)) { 1244 /* This is not an Wa. Enable for better image quality */ 1245 wa_masked_en(wal, 1246 _3D_CHICKEN3, 1247 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); 1248 1249 /* WaPipelineFlushCoherentLines:icl */ 1250 wa_write_or(wal, 1251 GEN8_L3SQCREG4, 1252 GEN8_LQSC_FLUSH_COHERENT_LINES); 1253 1254 /* 1255 * Wa_1405543622:icl 1256 * Formerly known as WaGAPZPriorityScheme 1257 */ 1258 wa_write_or(wal, 1259 GEN8_GARBCNTL, 1260 GEN11_ARBITRATION_PRIO_ORDER_MASK); 1261 1262 /* 1263 * Wa_1604223664:icl 1264 * Formerly known as WaL3BankAddressHashing 1265 */ 1266 wa_write_masked_or(wal, 1267 GEN8_GARBCNTL, 1268 GEN11_HASH_CTRL_EXCL_MASK, 1269 GEN11_HASH_CTRL_EXCL_BIT0); 1270 wa_write_masked_or(wal, 1271 GEN11_GLBLINVL, 1272 GEN11_BANK_HASH_ADDR_EXCL_MASK, 1273 GEN11_BANK_HASH_ADDR_EXCL_BIT0); 1274 1275 /* 1276 * Wa_1405733216:icl 1277 * Formerly known as WaDisableCleanEvicts 1278 */ 1279 wa_write_or(wal, 1280 GEN8_L3SQCREG4, 1281 GEN11_LQSC_CLEAN_EVICT_DISABLE); 1282 1283 /* WaForwardProgressSoftReset:icl */ 1284 wa_write_or(wal, 1285 GEN10_SCRATCH_LNCF2, 1286 PMFLUSHDONE_LNICRSDROP | 1287 PMFLUSH_GAPL3UNBLOCK | 1288 PMFLUSHDONE_LNEBLK); 1289 1290 /* Wa_1406609255:icl (pre-prod) */ 1291 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 1292 wa_write_or(wal, 1293 GEN7_SARCHKMD, 1294 GEN7_DISABLE_DEMAND_PREFETCH); 1295 1296 /* Wa_1606682166:icl */ 1297 wa_write_or(wal, 1298 GEN7_SARCHKMD, 1299 GEN7_DISABLE_SAMPLER_PREFETCH); 1300 1301 /* Wa_1409178092:icl */ 1302 wa_write_masked_or(wal, 1303 GEN11_SCRATCH2, 1304 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, 1305 0); 1306 } 1307 1308 if (IS_GEN_RANGE(i915, 9, 11)) { 1309 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */ 1310 wa_masked_en(wal, 1311 GEN7_FF_SLICE_CS_CHICKEN1, 1312 GEN9_FFSC_PERCTX_PREEMPT_CTRL); 1313 } 1314 1315 if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) { 1316 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */ 1317 wa_write_or(wal, 1318 GEN8_GARBCNTL, 1319 GEN9_GAPS_TSV_CREDIT_DISABLE); 1320 } 1321 1322 if (IS_BROXTON(i915)) { 1323 /* WaDisablePooledEuLoadBalancingFix:bxt */ 1324 wa_masked_en(wal, 1325 FF_SLICE_CS_CHICKEN2, 1326 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); 1327 } 1328 1329 if (IS_GEN(i915, 9)) { 1330 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ 1331 wa_masked_en(wal, 1332 GEN9_CSFE_CHICKEN1_RCS, 1333 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); 1334 1335 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ 1336 wa_write_or(wal, 1337 BDW_SCRATCH1, 1338 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 1339 1340 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ 1341 if (IS_GEN9_LP(i915)) 1342 wa_write_masked_or(wal, 1343 GEN8_L3SQCREG1, 1344 L3_PRIO_CREDITS_MASK, 1345 L3_GENERAL_PRIO_CREDITS(62) | 1346 L3_HIGH_PRIO_CREDITS(2)); 1347 1348 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ 1349 wa_write_or(wal, 1350 GEN8_L3SQCREG4, 1351 GEN8_LQSC_FLUSH_COHERENT_LINES); 1352 } 1353 } 1354 1355 static void 1356 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1357 { 1358 struct drm_i915_private *i915 = engine->i915; 1359 1360 /* WaKBLVECSSemaphoreWaitPoll:kbl */ 1361 if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { 1362 wa_write(wal, 1363 RING_SEMA_WAIT_POLL(engine->mmio_base), 1364 1); 1365 } 1366 } 1367 1368 static void 1369 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1370 { 1371 if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8)) 1372 return; 1373 1374 if (engine->class == RENDER_CLASS) 1375 rcs_engine_wa_init(engine, wal); 1376 else 1377 xcs_engine_wa_init(engine, wal); 1378 } 1379 1380 void intel_engine_init_workarounds(struct intel_engine_cs *engine) 1381 { 1382 struct i915_wa_list *wal = &engine->wa_list; 1383 1384 if (INTEL_GEN(engine->i915) < 8) 1385 return; 1386 1387 wa_init_start(wal, "engine", engine->name); 1388 engine_init_workarounds(engine, wal); 1389 wa_init_finish(wal); 1390 } 1391 1392 void intel_engine_apply_workarounds(struct intel_engine_cs *engine) 1393 { 1394 wa_list_apply(engine->uncore, &engine->wa_list); 1395 } 1396 1397 static struct i915_vma * 1398 create_scratch(struct i915_address_space *vm, int count) 1399 { 1400 struct drm_i915_gem_object *obj; 1401 struct i915_vma *vma; 1402 unsigned int size; 1403 int err; 1404 1405 size = round_up(count * sizeof(u32), PAGE_SIZE); 1406 obj = i915_gem_object_create_internal(vm->i915, size); 1407 if (IS_ERR(obj)) 1408 return ERR_CAST(obj); 1409 1410 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 1411 1412 vma = i915_vma_instance(obj, vm, NULL); 1413 if (IS_ERR(vma)) { 1414 err = PTR_ERR(vma); 1415 goto err_obj; 1416 } 1417 1418 err = i915_vma_pin(vma, 0, 0, 1419 i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); 1420 if (err) 1421 goto err_obj; 1422 1423 return vma; 1424 1425 err_obj: 1426 i915_gem_object_put(obj); 1427 return ERR_PTR(err); 1428 } 1429 1430 static bool mcr_range(struct drm_i915_private *i915, u32 offset) 1431 { 1432 /* 1433 * Registers in this range are affected by the MCR selector 1434 * which only controls CPU initiated MMIO. Routing does not 1435 * work for CS access so we cannot verify them on this path. 1436 */ 1437 if (INTEL_GEN(i915) >= 8 && (offset >= 0xb100 && offset <= 0xb3ff)) 1438 return true; 1439 1440 return false; 1441 } 1442 1443 static int 1444 wa_list_srm(struct i915_request *rq, 1445 const struct i915_wa_list *wal, 1446 struct i915_vma *vma) 1447 { 1448 struct drm_i915_private *i915 = rq->i915; 1449 unsigned int i, count = 0; 1450 const struct i915_wa *wa; 1451 u32 srm, *cs; 1452 1453 srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 1454 if (INTEL_GEN(i915) >= 8) 1455 srm++; 1456 1457 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 1458 if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg))) 1459 count++; 1460 } 1461 1462 cs = intel_ring_begin(rq, 4 * count); 1463 if (IS_ERR(cs)) 1464 return PTR_ERR(cs); 1465 1466 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 1467 u32 offset = i915_mmio_reg_offset(wa->reg); 1468 1469 if (mcr_range(i915, offset)) 1470 continue; 1471 1472 *cs++ = srm; 1473 *cs++ = offset; 1474 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; 1475 *cs++ = 0; 1476 } 1477 intel_ring_advance(rq, cs); 1478 1479 return 0; 1480 } 1481 1482 static int engine_wa_list_verify(struct intel_context *ce, 1483 const struct i915_wa_list * const wal, 1484 const char *from) 1485 { 1486 const struct i915_wa *wa; 1487 struct i915_request *rq; 1488 struct i915_vma *vma; 1489 unsigned int i; 1490 u32 *results; 1491 int err; 1492 1493 if (!wal->count) 1494 return 0; 1495 1496 vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count); 1497 if (IS_ERR(vma)) 1498 return PTR_ERR(vma); 1499 1500 rq = intel_context_create_request(ce); 1501 if (IS_ERR(rq)) { 1502 err = PTR_ERR(rq); 1503 goto err_vma; 1504 } 1505 1506 err = wa_list_srm(rq, wal, vma); 1507 if (err) 1508 goto err_vma; 1509 1510 i915_request_add(rq); 1511 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 1512 err = -ETIME; 1513 goto err_vma; 1514 } 1515 1516 results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); 1517 if (IS_ERR(results)) { 1518 err = PTR_ERR(results); 1519 goto err_vma; 1520 } 1521 1522 err = 0; 1523 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 1524 if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg))) 1525 continue; 1526 1527 if (!wa_verify(wa, results[i], wal->name, from)) 1528 err = -ENXIO; 1529 } 1530 1531 i915_gem_object_unpin_map(vma->obj); 1532 1533 err_vma: 1534 i915_vma_unpin(vma); 1535 i915_vma_put(vma); 1536 return err; 1537 } 1538 1539 int intel_engine_verify_workarounds(struct intel_engine_cs *engine, 1540 const char *from) 1541 { 1542 return engine_wa_list_verify(engine->kernel_context, 1543 &engine->wa_list, 1544 from); 1545 } 1546 1547 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1548 #include "selftest_workarounds.c" 1549 #endif 1550