1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2018 Intel Corporation 5 */ 6 7 #include "i915_drv.h" 8 #include "intel_context.h" 9 #include "intel_engine_pm.h" 10 #include "intel_gpu_commands.h" 11 #include "intel_gt.h" 12 #include "intel_ring.h" 13 #include "intel_workarounds.h" 14 15 /** 16 * DOC: Hardware workarounds 17 * 18 * This file is intended as a central place to implement most [1]_ of the 19 * required workarounds for hardware to work as originally intended. They fall 20 * in five basic categories depending on how/when they are applied: 21 * 22 * - Workarounds that touch registers that are saved/restored to/from the HW 23 * context image. The list is emitted (via Load Register Immediate commands) 24 * everytime a new context is created. 25 * - GT workarounds. The list of these WAs is applied whenever these registers 26 * revert to default values (on GPU reset, suspend/resume [2]_, etc..). 27 * - Display workarounds. The list is applied during display clock-gating 28 * initialization. 29 * - Workarounds that whitelist a privileged register, so that UMDs can manage 30 * them directly. This is just a special case of a MMMIO workaround (as we 31 * write the list of these to/be-whitelisted registers to some special HW 32 * registers). 33 * - Workaround batchbuffers, that get executed automatically by the hardware 34 * on every HW context restore. 35 * 36 * .. [1] Please notice that there are other WAs that, due to their nature, 37 * cannot be applied from a central place. Those are peppered around the rest 38 * of the code, as needed. 39 * 40 * .. [2] Technically, some registers are powercontext saved & restored, so they 41 * survive a suspend/resume. In practice, writing them again is not too 42 * costly and simplifies things. We can revisit this in the future. 43 * 44 * Layout 45 * ~~~~~~ 46 * 47 * Keep things in this file ordered by WA type, as per the above (context, GT, 48 * display, register whitelist, batchbuffer). Then, inside each type, keep the 49 * following order: 50 * 51 * - Infrastructure functions and macros 52 * - WAs per platform in standard gen/chrono order 53 * - Public functions to init or apply the given workaround type. 54 */ 55 56 /* 57 * KBL revision ID ordering is bizarre; higher revision ID's map to lower 58 * steppings in some cases. So rather than test against the revision ID 59 * directly, let's map that into our own range of increasing ID's that we 60 * can test against in a regular manner. 61 */ 62 63 const struct i915_rev_steppings kbl_revids[] = { 64 [0] = { .gt_stepping = KBL_REVID_A0, .disp_stepping = KBL_REVID_A0 }, 65 [1] = { .gt_stepping = KBL_REVID_B0, .disp_stepping = KBL_REVID_B0 }, 66 [2] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B0 }, 67 [3] = { .gt_stepping = KBL_REVID_D0, .disp_stepping = KBL_REVID_B0 }, 68 [4] = { .gt_stepping = KBL_REVID_F0, .disp_stepping = KBL_REVID_C0 }, 69 [5] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B1 }, 70 [6] = { .gt_stepping = KBL_REVID_D1, .disp_stepping = KBL_REVID_B1 }, 71 [7] = { .gt_stepping = KBL_REVID_G0, .disp_stepping = KBL_REVID_C0 }, 72 }; 73 74 const struct i915_rev_steppings tgl_uy_revid_step_tbl[] = { 75 [0] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_A0 }, 76 [1] = { .gt_stepping = STEP_B0, .disp_stepping = STEP_C0 }, 77 [2] = { .gt_stepping = STEP_B1, .disp_stepping = STEP_C0 }, 78 [3] = { .gt_stepping = STEP_C0, .disp_stepping = STEP_D0 }, 79 }; 80 81 /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ 82 const struct i915_rev_steppings tgl_revid_step_tbl[] = { 83 [0] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_B0 }, 84 [1] = { .gt_stepping = STEP_B0, .disp_stepping = STEP_D0 }, 85 }; 86 87 const struct i915_rev_steppings adls_revid_step_tbl[] = { 88 [0x0] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_A0 }, 89 [0x1] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_A2 }, 90 [0x4] = { .gt_stepping = STEP_B0, .disp_stepping = STEP_B0 }, 91 [0x8] = { .gt_stepping = STEP_C0, .disp_stepping = STEP_B0 }, 92 [0xC] = { .gt_stepping = STEP_D0, .disp_stepping = STEP_C0 }, 93 }; 94 95 static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name) 96 { 97 wal->name = name; 98 wal->engine_name = engine_name; 99 } 100 101 #define WA_LIST_CHUNK (1 << 4) 102 103 static void wa_init_finish(struct i915_wa_list *wal) 104 { 105 /* Trim unused entries. */ 106 if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) { 107 struct i915_wa *list = kmemdup(wal->list, 108 wal->count * sizeof(*list), 109 GFP_KERNEL); 110 111 if (list) { 112 kfree(wal->list); 113 wal->list = list; 114 } 115 } 116 117 if (!wal->count) 118 return; 119 120 DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n", 121 wal->wa_count, wal->name, wal->engine_name); 122 } 123 124 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) 125 { 126 unsigned int addr = i915_mmio_reg_offset(wa->reg); 127 unsigned int start = 0, end = wal->count; 128 const unsigned int grow = WA_LIST_CHUNK; 129 struct i915_wa *wa_; 130 131 GEM_BUG_ON(!is_power_of_2(grow)); 132 133 if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ 134 struct i915_wa *list; 135 136 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), 137 GFP_KERNEL); 138 if (!list) { 139 DRM_ERROR("No space for workaround init!\n"); 140 return; 141 } 142 143 if (wal->list) { 144 memcpy(list, wal->list, sizeof(*wa) * wal->count); 145 kfree(wal->list); 146 } 147 148 wal->list = list; 149 } 150 151 while (start < end) { 152 unsigned int mid = start + (end - start) / 2; 153 154 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) { 155 start = mid + 1; 156 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) { 157 end = mid; 158 } else { 159 wa_ = &wal->list[mid]; 160 161 if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) { 162 DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n", 163 i915_mmio_reg_offset(wa_->reg), 164 wa_->clr, wa_->set); 165 166 wa_->set &= ~wa->clr; 167 } 168 169 wal->wa_count++; 170 wa_->set |= wa->set; 171 wa_->clr |= wa->clr; 172 wa_->read |= wa->read; 173 return; 174 } 175 } 176 177 wal->wa_count++; 178 wa_ = &wal->list[wal->count++]; 179 *wa_ = *wa; 180 181 while (wa_-- > wal->list) { 182 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) == 183 i915_mmio_reg_offset(wa_[1].reg)); 184 if (i915_mmio_reg_offset(wa_[1].reg) > 185 i915_mmio_reg_offset(wa_[0].reg)) 186 break; 187 188 swap(wa_[1], wa_[0]); 189 } 190 } 191 192 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, 193 u32 clear, u32 set, u32 read_mask) 194 { 195 struct i915_wa wa = { 196 .reg = reg, 197 .clr = clear, 198 .set = set, 199 .read = read_mask, 200 }; 201 202 _wa_add(wal, &wa); 203 } 204 205 static void 206 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) 207 { 208 wa_add(wal, reg, clear, set, clear); 209 } 210 211 static void 212 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set) 213 { 214 wa_write_clr_set(wal, reg, ~0, set); 215 } 216 217 static void 218 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) 219 { 220 wa_write_clr_set(wal, reg, set, set); 221 } 222 223 static void 224 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) 225 { 226 wa_write_clr_set(wal, reg, clr, 0); 227 } 228 229 /* 230 * WA operations on "masked register". A masked register has the upper 16 bits 231 * documented as "masked" in b-spec. Its purpose is to allow writing to just a 232 * portion of the register without a rmw: you simply write in the upper 16 bits 233 * the mask of bits you are going to modify. 234 * 235 * The wa_masked_* family of functions already does the necessary operations to 236 * calculate the mask based on the parameters passed, so user only has to 237 * provide the lower 16 bits of that register. 238 */ 239 240 static void 241 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 242 { 243 wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val); 244 } 245 246 static void 247 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 248 { 249 wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val); 250 } 251 252 static void 253 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, 254 u32 mask, u32 val) 255 { 256 wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask); 257 } 258 259 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, 260 struct i915_wa_list *wal) 261 { 262 wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); 263 } 264 265 static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine, 266 struct i915_wa_list *wal) 267 { 268 wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); 269 } 270 271 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, 272 struct i915_wa_list *wal) 273 { 274 wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); 275 276 /* WaDisableAsyncFlipPerfMode:bdw,chv */ 277 wa_masked_en(wal, MI_MODE, ASYNC_FLIP_PERF_DISABLE); 278 279 /* WaDisablePartialInstShootdown:bdw,chv */ 280 wa_masked_en(wal, GEN8_ROW_CHICKEN, 281 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 282 283 /* Use Force Non-Coherent whenever executing a 3D context. This is a 284 * workaround for for a possible hang in the unlikely event a TLB 285 * invalidation occurs during a PSD flush. 286 */ 287 /* WaForceEnableNonCoherent:bdw,chv */ 288 /* WaHdcDisableFetchWhenMasked:bdw,chv */ 289 wa_masked_en(wal, HDC_CHICKEN0, 290 HDC_DONOT_FETCH_MEM_WHEN_MASKED | 291 HDC_FORCE_NON_COHERENT); 292 293 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 294 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 295 * polygons in the same 8x4 pixel/sample area to be processed without 296 * stalling waiting for the earlier ones to write to Hierarchical Z 297 * buffer." 298 * 299 * This optimization is off by default for BDW and CHV; turn it on. 300 */ 301 wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 302 303 /* Wa4x4STCOptimizationDisable:bdw,chv */ 304 wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); 305 306 /* 307 * BSpec recommends 8x4 when MSAA is used, 308 * however in practice 16x4 seems fastest. 309 * 310 * Note that PS/WM thread counts depend on the WIZ hashing 311 * disable bit, which we don't touch here, but it's good 312 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 313 */ 314 wa_masked_field_set(wal, GEN7_GT_MODE, 315 GEN6_WIZ_HASHING_MASK, 316 GEN6_WIZ_HASHING_16x4); 317 } 318 319 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, 320 struct i915_wa_list *wal) 321 { 322 struct drm_i915_private *i915 = engine->i915; 323 324 gen8_ctx_workarounds_init(engine, wal); 325 326 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 327 wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 328 329 /* WaDisableDopClockGating:bdw 330 * 331 * Also see the related UCGTCL1 write in bdw_init_clock_gating() 332 * to disable EUTC clock gating. 333 */ 334 wa_masked_en(wal, GEN7_ROW_CHICKEN2, 335 DOP_CLOCK_GATING_DISABLE); 336 337 wa_masked_en(wal, HALF_SLICE_CHICKEN3, 338 GEN8_SAMPLER_POWER_BYPASS_DIS); 339 340 wa_masked_en(wal, HDC_CHICKEN0, 341 /* WaForceContextSaveRestoreNonCoherent:bdw */ 342 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 343 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 344 (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); 345 } 346 347 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, 348 struct i915_wa_list *wal) 349 { 350 gen8_ctx_workarounds_init(engine, wal); 351 352 /* WaDisableThreadStallDopClockGating:chv */ 353 wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 354 355 /* Improve HiZ throughput on CHV. */ 356 wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 357 } 358 359 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, 360 struct i915_wa_list *wal) 361 { 362 struct drm_i915_private *i915 = engine->i915; 363 364 if (HAS_LLC(i915)) { 365 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 366 * 367 * Must match Display Engine. See 368 * WaCompressedResourceDisplayNewHashMode. 369 */ 370 wa_masked_en(wal, COMMON_SLICE_CHICKEN2, 371 GEN9_PBE_COMPRESSED_HASH_SELECTION); 372 wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, 373 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); 374 } 375 376 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ 377 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ 378 wa_masked_en(wal, GEN8_ROW_CHICKEN, 379 FLOW_CONTROL_ENABLE | 380 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 381 382 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ 383 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ 384 wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, 385 GEN9_ENABLE_YV12_BUGFIX | 386 GEN9_ENABLE_GPGPU_PREEMPTION); 387 388 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ 389 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ 390 wa_masked_en(wal, CACHE_MODE_1, 391 GEN8_4x4_STC_OPTIMIZATION_DISABLE | 392 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); 393 394 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ 395 wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5, 396 GEN9_CCS_TLB_PREFETCH_ENABLE); 397 398 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ 399 wa_masked_en(wal, HDC_CHICKEN0, 400 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 401 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); 402 403 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are 404 * both tied to WaForceContextSaveRestoreNonCoherent 405 * in some hsds for skl. We keep the tie for all gen9. The 406 * documentation is a bit hazy and so we want to get common behaviour, 407 * even though there is no clear evidence we would need both on kbl/bxt. 408 * This area has been source of system hangs so we play it safe 409 * and mimic the skl regardless of what bspec says. 410 * 411 * Use Force Non-Coherent whenever executing a 3D context. This 412 * is a workaround for a possible hang in the unlikely event 413 * a TLB invalidation occurs during a PSD flush. 414 */ 415 416 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ 417 wa_masked_en(wal, HDC_CHICKEN0, 418 HDC_FORCE_NON_COHERENT); 419 420 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ 421 if (IS_SKYLAKE(i915) || 422 IS_KABYLAKE(i915) || 423 IS_COFFEELAKE(i915) || 424 IS_COMETLAKE(i915)) 425 wa_masked_en(wal, HALF_SLICE_CHICKEN3, 426 GEN8_SAMPLER_POWER_BYPASS_DIS); 427 428 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ 429 wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); 430 431 /* 432 * Supporting preemption with fine-granularity requires changes in the 433 * batch buffer programming. Since we can't break old userspace, we 434 * need to set our default preemption level to safe value. Userspace is 435 * still able to use more fine-grained preemption levels, since in 436 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the 437 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are 438 * not real HW workarounds, but merely a way to start using preemption 439 * while maintaining old contract with userspace. 440 */ 441 442 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ 443 wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 444 445 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ 446 wa_masked_field_set(wal, GEN8_CS_CHICKEN1, 447 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 448 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 449 450 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ 451 if (IS_GEN9_LP(i915)) 452 wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); 453 } 454 455 static void skl_tune_iz_hashing(struct intel_engine_cs *engine, 456 struct i915_wa_list *wal) 457 { 458 struct intel_gt *gt = engine->gt; 459 u8 vals[3] = { 0, 0, 0 }; 460 unsigned int i; 461 462 for (i = 0; i < 3; i++) { 463 u8 ss; 464 465 /* 466 * Only consider slices where one, and only one, subslice has 7 467 * EUs 468 */ 469 if (!is_power_of_2(gt->info.sseu.subslice_7eu[i])) 470 continue; 471 472 /* 473 * subslice_7eu[i] != 0 (because of the check above) and 474 * ss_max == 4 (maximum number of subslices possible per slice) 475 * 476 * -> 0 <= ss <= 3; 477 */ 478 ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1; 479 vals[i] = 3 - ss; 480 } 481 482 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) 483 return; 484 485 /* Tune IZ hashing. See intel_device_info_runtime_init() */ 486 wa_masked_field_set(wal, GEN7_GT_MODE, 487 GEN9_IZ_HASHING_MASK(2) | 488 GEN9_IZ_HASHING_MASK(1) | 489 GEN9_IZ_HASHING_MASK(0), 490 GEN9_IZ_HASHING(2, vals[2]) | 491 GEN9_IZ_HASHING(1, vals[1]) | 492 GEN9_IZ_HASHING(0, vals[0])); 493 } 494 495 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine, 496 struct i915_wa_list *wal) 497 { 498 gen9_ctx_workarounds_init(engine, wal); 499 skl_tune_iz_hashing(engine, wal); 500 } 501 502 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine, 503 struct i915_wa_list *wal) 504 { 505 gen9_ctx_workarounds_init(engine, wal); 506 507 /* WaDisableThreadStallDopClockGating:bxt */ 508 wa_masked_en(wal, GEN8_ROW_CHICKEN, 509 STALL_DOP_GATING_DISABLE); 510 511 /* WaToEnableHwFixForPushConstHWBug:bxt */ 512 wa_masked_en(wal, COMMON_SLICE_CHICKEN2, 513 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 514 } 515 516 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, 517 struct i915_wa_list *wal) 518 { 519 struct drm_i915_private *i915 = engine->i915; 520 521 gen9_ctx_workarounds_init(engine, wal); 522 523 /* WaToEnableHwFixForPushConstHWBug:kbl */ 524 if (IS_KBL_GT_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) 525 wa_masked_en(wal, COMMON_SLICE_CHICKEN2, 526 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 527 528 /* WaDisableSbeCacheDispatchPortSharing:kbl */ 529 wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, 530 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 531 } 532 533 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, 534 struct i915_wa_list *wal) 535 { 536 gen9_ctx_workarounds_init(engine, wal); 537 538 /* WaToEnableHwFixForPushConstHWBug:glk */ 539 wa_masked_en(wal, COMMON_SLICE_CHICKEN2, 540 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 541 } 542 543 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, 544 struct i915_wa_list *wal) 545 { 546 gen9_ctx_workarounds_init(engine, wal); 547 548 /* WaToEnableHwFixForPushConstHWBug:cfl */ 549 wa_masked_en(wal, COMMON_SLICE_CHICKEN2, 550 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 551 552 /* WaDisableSbeCacheDispatchPortSharing:cfl */ 553 wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, 554 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 555 } 556 557 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, 558 struct i915_wa_list *wal) 559 { 560 /* WaForceContextSaveRestoreNonCoherent:cnl */ 561 wa_masked_en(wal, CNL_HDC_CHICKEN0, 562 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); 563 564 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ 565 wa_masked_en(wal, COMMON_SLICE_CHICKEN2, 566 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 567 568 /* WaPushConstantDereferenceHoldDisable:cnl */ 569 wa_masked_en(wal, GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); 570 571 /* FtrEnableFastAnisoL1BankingFix:cnl */ 572 wa_masked_en(wal, HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); 573 574 /* WaDisable3DMidCmdPreemption:cnl */ 575 wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 576 577 /* WaDisableGPGPUMidCmdPreemption:cnl */ 578 wa_masked_field_set(wal, GEN8_CS_CHICKEN1, 579 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 580 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 581 582 /* WaDisableEarlyEOT:cnl */ 583 wa_masked_en(wal, GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); 584 } 585 586 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, 587 struct i915_wa_list *wal) 588 { 589 struct drm_i915_private *i915 = engine->i915; 590 591 /* WaDisableBankHangMode:icl */ 592 wa_write(wal, 593 GEN8_L3CNTLREG, 594 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | 595 GEN8_ERRDETBCTRL); 596 597 /* Wa_1604370585:icl (pre-prod) 598 * Formerly known as WaPushConstantDereferenceHoldDisable 599 */ 600 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 601 wa_masked_en(wal, GEN7_ROW_CHICKEN2, 602 PUSH_CONSTANT_DEREF_DISABLE); 603 604 /* WaForceEnableNonCoherent:icl 605 * This is not the same workaround as in early Gen9 platforms, where 606 * lacking this could cause system hangs, but coherency performance 607 * overhead is high and only a few compute workloads really need it 608 * (the register is whitelisted in hardware now, so UMDs can opt in 609 * for coherency if they have a good reason). 610 */ 611 wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); 612 613 /* Wa_2006611047:icl (pre-prod) 614 * Formerly known as WaDisableImprovedTdlClkGating 615 */ 616 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 617 wa_masked_en(wal, GEN7_ROW_CHICKEN2, 618 GEN11_TDL_CLOCK_GATING_FIX_DISABLE); 619 620 /* Wa_2006665173:icl (pre-prod) */ 621 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 622 wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, 623 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); 624 625 /* WaEnableFloatBlendOptimization:icl */ 626 wa_write_clr_set(wal, 627 GEN10_CACHE_MODE_SS, 628 0, /* write-only, so skip validation */ 629 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); 630 631 /* WaDisableGPGPUMidThreadPreemption:icl */ 632 wa_masked_field_set(wal, GEN8_CS_CHICKEN1, 633 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 634 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); 635 636 /* allow headerless messages for preemptible GPGPU context */ 637 wa_masked_en(wal, GEN10_SAMPLER_MODE, 638 GEN11_SAMPLER_ENABLE_HEADLESS_MSG); 639 640 /* Wa_1604278689:icl,ehl */ 641 wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID); 642 wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER, 643 0, /* write-only register; skip validation */ 644 0xFFFFFFFF); 645 646 /* Wa_1406306137:icl,ehl */ 647 wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); 648 } 649 650 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, 651 struct i915_wa_list *wal) 652 { 653 /* 654 * Wa_1409142259:tgl 655 * Wa_1409347922:tgl 656 * Wa_1409252684:tgl 657 * Wa_1409217633:tgl 658 * Wa_1409207793:tgl 659 * Wa_1409178076:tgl 660 * Wa_1408979724:tgl 661 * Wa_14010443199:rkl 662 * Wa_14010698770:rkl 663 */ 664 wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, 665 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); 666 667 /* WaDisableGPGPUMidThreadPreemption:gen12 */ 668 wa_masked_field_set(wal, GEN8_CS_CHICKEN1, 669 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 670 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); 671 } 672 673 static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, 674 struct i915_wa_list *wal) 675 { 676 gen12_ctx_workarounds_init(engine, wal); 677 678 /* 679 * Wa_1604555607:tgl,rkl 680 * 681 * Note that the implementation of this workaround is further modified 682 * according to the FF_MODE2 guidance given by Wa_1608008084:gen12. 683 * FF_MODE2 register will return the wrong value when read. The default 684 * value for this register is zero for all fields and there are no bit 685 * masks. So instead of doing a RMW we should just write the GS Timer 686 * and TDS timer values for Wa_1604555607 and Wa_16011163337. 687 */ 688 wa_add(wal, 689 FF_MODE2, 690 FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK, 691 FF_MODE2_GS_TIMER_224 | FF_MODE2_TDS_TIMER_128, 692 0); 693 } 694 695 static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, 696 struct i915_wa_list *wal) 697 { 698 gen12_ctx_workarounds_init(engine, wal); 699 700 /* Wa_1409044764 */ 701 wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3, 702 DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN); 703 704 /* Wa_22010493298 */ 705 wa_masked_en(wal, HIZ_CHICKEN, 706 DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE); 707 708 /* 709 * Wa_16011163337 710 * 711 * Like in tgl_ctx_workarounds_init(), read verification is ignored due 712 * to Wa_1608008084. 713 */ 714 wa_add(wal, 715 FF_MODE2, 716 FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0); 717 } 718 719 static void 720 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, 721 struct i915_wa_list *wal, 722 const char *name) 723 { 724 struct drm_i915_private *i915 = engine->i915; 725 726 if (engine->class != RENDER_CLASS) 727 return; 728 729 wa_init_start(wal, name, engine->name); 730 731 if (IS_DG1(i915)) 732 dg1_ctx_workarounds_init(engine, wal); 733 else if (IS_ALDERLAKE_S(i915) || IS_ROCKETLAKE(i915) || 734 IS_TIGERLAKE(i915)) 735 tgl_ctx_workarounds_init(engine, wal); 736 else if (IS_GEN(i915, 12)) 737 gen12_ctx_workarounds_init(engine, wal); 738 else if (IS_GEN(i915, 11)) 739 icl_ctx_workarounds_init(engine, wal); 740 else if (IS_CANNONLAKE(i915)) 741 cnl_ctx_workarounds_init(engine, wal); 742 else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) 743 cfl_ctx_workarounds_init(engine, wal); 744 else if (IS_GEMINILAKE(i915)) 745 glk_ctx_workarounds_init(engine, wal); 746 else if (IS_KABYLAKE(i915)) 747 kbl_ctx_workarounds_init(engine, wal); 748 else if (IS_BROXTON(i915)) 749 bxt_ctx_workarounds_init(engine, wal); 750 else if (IS_SKYLAKE(i915)) 751 skl_ctx_workarounds_init(engine, wal); 752 else if (IS_CHERRYVIEW(i915)) 753 chv_ctx_workarounds_init(engine, wal); 754 else if (IS_BROADWELL(i915)) 755 bdw_ctx_workarounds_init(engine, wal); 756 else if (IS_GEN(i915, 7)) 757 gen7_ctx_workarounds_init(engine, wal); 758 else if (IS_GEN(i915, 6)) 759 gen6_ctx_workarounds_init(engine, wal); 760 else if (INTEL_GEN(i915) < 8) 761 return; 762 else 763 MISSING_CASE(INTEL_GEN(i915)); 764 765 wa_init_finish(wal); 766 } 767 768 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) 769 { 770 __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context"); 771 } 772 773 int intel_engine_emit_ctx_wa(struct i915_request *rq) 774 { 775 struct i915_wa_list *wal = &rq->engine->ctx_wa_list; 776 struct i915_wa *wa; 777 unsigned int i; 778 u32 *cs; 779 int ret; 780 781 if (wal->count == 0) 782 return 0; 783 784 ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 785 if (ret) 786 return ret; 787 788 cs = intel_ring_begin(rq, (wal->count * 2 + 2)); 789 if (IS_ERR(cs)) 790 return PTR_ERR(cs); 791 792 *cs++ = MI_LOAD_REGISTER_IMM(wal->count); 793 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 794 *cs++ = i915_mmio_reg_offset(wa->reg); 795 *cs++ = wa->set; 796 } 797 *cs++ = MI_NOOP; 798 799 intel_ring_advance(rq, cs); 800 801 ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 802 if (ret) 803 return ret; 804 805 return 0; 806 } 807 808 static void 809 gen4_gt_workarounds_init(struct drm_i915_private *i915, 810 struct i915_wa_list *wal) 811 { 812 /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */ 813 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); 814 } 815 816 static void 817 g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 818 { 819 gen4_gt_workarounds_init(i915, wal); 820 821 /* WaDisableRenderCachePipelinedFlush:g4x,ilk */ 822 wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); 823 } 824 825 static void 826 ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 827 { 828 g4x_gt_workarounds_init(i915, wal); 829 830 wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); 831 } 832 833 static void 834 snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 835 { 836 } 837 838 static void 839 ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 840 { 841 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ 842 wa_masked_dis(wal, 843 GEN7_COMMON_SLICE_CHICKEN1, 844 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); 845 846 /* WaApplyL3ControlAndL3ChickenMode:ivb */ 847 wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL); 848 wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); 849 850 /* WaForceL3Serialization:ivb */ 851 wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); 852 } 853 854 static void 855 vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 856 { 857 /* WaForceL3Serialization:vlv */ 858 wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); 859 860 /* 861 * WaIncreaseL3CreditsForVLVB0:vlv 862 * This is the hardware default actually. 863 */ 864 wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); 865 } 866 867 static void 868 hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 869 { 870 /* L3 caching of data atomics doesn't work -- disable it. */ 871 wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); 872 873 wa_add(wal, 874 HSW_ROW_CHICKEN3, 0, 875 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), 876 0 /* XXX does this reg exist? */); 877 878 /* WaVSRefCountFullforceMissDisable:hsw */ 879 wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); 880 } 881 882 static void 883 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 884 { 885 /* WaDisableKillLogic:bxt,skl,kbl */ 886 if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915)) 887 wa_write_or(wal, 888 GAM_ECOCHK, 889 ECOCHK_DIS_TLB); 890 891 if (HAS_LLC(i915)) { 892 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 893 * 894 * Must match Display Engine. See 895 * WaCompressedResourceDisplayNewHashMode. 896 */ 897 wa_write_or(wal, 898 MMCD_MISC_CTRL, 899 MMCD_PCLA | MMCD_HOTSPOT_EN); 900 } 901 902 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ 903 wa_write_or(wal, 904 GAM_ECOCHK, 905 BDW_DISABLE_HDC_INVALIDATION); 906 } 907 908 static void 909 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 910 { 911 gen9_gt_workarounds_init(i915, wal); 912 913 /* WaDisableGafsUnitClkGating:skl */ 914 wa_write_or(wal, 915 GEN7_UCGCTL4, 916 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 917 918 /* WaInPlaceDecompressionHang:skl */ 919 if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) 920 wa_write_or(wal, 921 GEN9_GAMT_ECO_REG_RW_IA, 922 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 923 } 924 925 static void 926 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 927 { 928 gen9_gt_workarounds_init(i915, wal); 929 930 /* WaInPlaceDecompressionHang:bxt */ 931 wa_write_or(wal, 932 GEN9_GAMT_ECO_REG_RW_IA, 933 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 934 } 935 936 static void 937 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 938 { 939 gen9_gt_workarounds_init(i915, wal); 940 941 /* WaDisableDynamicCreditSharing:kbl */ 942 if (IS_KBL_GT_REVID(i915, 0, KBL_REVID_B0)) 943 wa_write_or(wal, 944 GAMT_CHKN_BIT_REG, 945 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); 946 947 /* WaDisableGafsUnitClkGating:kbl */ 948 wa_write_or(wal, 949 GEN7_UCGCTL4, 950 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 951 952 /* WaInPlaceDecompressionHang:kbl */ 953 wa_write_or(wal, 954 GEN9_GAMT_ECO_REG_RW_IA, 955 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 956 } 957 958 static void 959 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 960 { 961 gen9_gt_workarounds_init(i915, wal); 962 } 963 964 static void 965 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 966 { 967 gen9_gt_workarounds_init(i915, wal); 968 969 /* WaDisableGafsUnitClkGating:cfl */ 970 wa_write_or(wal, 971 GEN7_UCGCTL4, 972 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 973 974 /* WaInPlaceDecompressionHang:cfl */ 975 wa_write_or(wal, 976 GEN9_GAMT_ECO_REG_RW_IA, 977 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 978 } 979 980 static void 981 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) 982 { 983 const struct sseu_dev_info *sseu = &i915->gt.info.sseu; 984 unsigned int slice, subslice; 985 u32 l3_en, mcr, mcr_mask; 986 987 GEM_BUG_ON(INTEL_GEN(i915) < 10); 988 989 /* 990 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl 991 * L3Banks could be fused off in single slice scenario. If that is 992 * the case, we might need to program MCR select to a valid L3Bank 993 * by default, to make sure we correctly read certain registers 994 * later on (in the range 0xB100 - 0xB3FF). 995 * 996 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl 997 * Before any MMIO read into slice/subslice specific registers, MCR 998 * packet control register needs to be programmed to point to any 999 * enabled s/ss pair. Otherwise, incorrect values will be returned. 1000 * This means each subsequent MMIO read will be forwarded to an 1001 * specific s/ss combination, but this is OK since these registers 1002 * are consistent across s/ss in almost all cases. In the rare 1003 * occasions, such as INSTDONE, where this value is dependent 1004 * on s/ss combo, the read should be done with read_subslice_reg. 1005 * 1006 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both 1007 * to which subslice, or to which L3 bank, the respective mmio reads 1008 * will go, we have to find a common index which works for both 1009 * accesses. 1010 * 1011 * Case where we cannot find a common index fortunately should not 1012 * happen in production hardware, so we only emit a warning instead of 1013 * implementing something more complex that requires checking the range 1014 * of every MMIO read. 1015 */ 1016 1017 if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { 1018 u32 l3_fuse = 1019 intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & 1020 GEN10_L3BANK_MASK; 1021 1022 drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse); 1023 l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse); 1024 } else { 1025 l3_en = ~0; 1026 } 1027 1028 slice = fls(sseu->slice_mask) - 1; 1029 subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); 1030 if (!subslice) { 1031 drm_warn(&i915->drm, 1032 "No common index found between subslice mask %x and L3 bank mask %x!\n", 1033 intel_sseu_get_subslices(sseu, slice), l3_en); 1034 subslice = fls(l3_en); 1035 drm_WARN_ON(&i915->drm, !subslice); 1036 } 1037 subslice--; 1038 1039 if (INTEL_GEN(i915) >= 11) { 1040 mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); 1041 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; 1042 } else { 1043 mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); 1044 mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; 1045 } 1046 1047 drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr); 1048 1049 wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); 1050 } 1051 1052 static void 1053 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 1054 { 1055 wa_init_mcr(i915, wal); 1056 1057 /* WaInPlaceDecompressionHang:cnl */ 1058 wa_write_or(wal, 1059 GEN9_GAMT_ECO_REG_RW_IA, 1060 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1061 } 1062 1063 static void 1064 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 1065 { 1066 wa_init_mcr(i915, wal); 1067 1068 /* WaInPlaceDecompressionHang:icl */ 1069 wa_write_or(wal, 1070 GEN9_GAMT_ECO_REG_RW_IA, 1071 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1072 1073 /* WaModifyGamTlbPartitioning:icl */ 1074 wa_write_clr_set(wal, 1075 GEN11_GACB_PERF_CTRL, 1076 GEN11_HASH_CTRL_MASK, 1077 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); 1078 1079 /* Wa_1405766107:icl 1080 * Formerly known as WaCL2SFHalfMaxAlloc 1081 */ 1082 wa_write_or(wal, 1083 GEN11_LSN_UNSLCVC, 1084 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | 1085 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); 1086 1087 /* Wa_220166154:icl 1088 * Formerly known as WaDisCtxReload 1089 */ 1090 wa_write_or(wal, 1091 GEN8_GAMW_ECO_DEV_RW_IA, 1092 GAMW_ECO_DEV_CTX_RELOAD_DISABLE); 1093 1094 /* Wa_1405779004:icl (pre-prod) */ 1095 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 1096 wa_write_or(wal, 1097 SLICE_UNIT_LEVEL_CLKGATE, 1098 MSCUNIT_CLKGATE_DIS); 1099 1100 /* Wa_1406838659:icl (pre-prod) */ 1101 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 1102 wa_write_or(wal, 1103 INF_UNIT_LEVEL_CLKGATE, 1104 CGPSF_CLKGATE_DIS); 1105 1106 /* Wa_1406463099:icl 1107 * Formerly known as WaGamTlbPendError 1108 */ 1109 wa_write_or(wal, 1110 GAMT_CHKN_BIT_REG, 1111 GAMT_CHKN_DISABLE_L3_COH_PIPE); 1112 1113 /* Wa_1607087056:icl,ehl,jsl */ 1114 if (IS_ICELAKE(i915) || 1115 IS_JSL_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) { 1116 wa_write_or(wal, 1117 SLICE_UNIT_LEVEL_CLKGATE, 1118 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); 1119 } 1120 } 1121 1122 static void 1123 gen12_gt_workarounds_init(struct drm_i915_private *i915, 1124 struct i915_wa_list *wal) 1125 { 1126 wa_init_mcr(i915, wal); 1127 } 1128 1129 static void 1130 tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 1131 { 1132 gen12_gt_workarounds_init(i915, wal); 1133 1134 /* Wa_1409420604:tgl */ 1135 if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0)) 1136 wa_write_or(wal, 1137 SUBSLICE_UNIT_LEVEL_CLKGATE2, 1138 CPSSUNIT_CLKGATE_DIS); 1139 1140 /* Wa_1607087056:tgl also know as BUG:1409180338 */ 1141 if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0)) 1142 wa_write_or(wal, 1143 SLICE_UNIT_LEVEL_CLKGATE, 1144 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); 1145 1146 /* Wa_1408615072:tgl[a0] */ 1147 if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0)) 1148 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, 1149 VSUNIT_CLKGATE_DIS_TGL); 1150 } 1151 1152 static void 1153 dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 1154 { 1155 gen12_gt_workarounds_init(i915, wal); 1156 1157 /* Wa_1607087056:dg1 */ 1158 if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0)) 1159 wa_write_or(wal, 1160 SLICE_UNIT_LEVEL_CLKGATE, 1161 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); 1162 1163 /* Wa_1409420604:dg1 */ 1164 if (IS_DG1(i915)) 1165 wa_write_or(wal, 1166 SUBSLICE_UNIT_LEVEL_CLKGATE2, 1167 CPSSUNIT_CLKGATE_DIS); 1168 1169 /* Wa_1408615072:dg1 */ 1170 /* Empirical testing shows this register is unaffected by engine reset. */ 1171 if (IS_DG1(i915)) 1172 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, 1173 VSUNIT_CLKGATE_DIS_TGL); 1174 } 1175 1176 static void 1177 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) 1178 { 1179 if (IS_DG1(i915)) 1180 dg1_gt_workarounds_init(i915, wal); 1181 else if (IS_TIGERLAKE(i915)) 1182 tgl_gt_workarounds_init(i915, wal); 1183 else if (IS_GEN(i915, 12)) 1184 gen12_gt_workarounds_init(i915, wal); 1185 else if (IS_GEN(i915, 11)) 1186 icl_gt_workarounds_init(i915, wal); 1187 else if (IS_CANNONLAKE(i915)) 1188 cnl_gt_workarounds_init(i915, wal); 1189 else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) 1190 cfl_gt_workarounds_init(i915, wal); 1191 else if (IS_GEMINILAKE(i915)) 1192 glk_gt_workarounds_init(i915, wal); 1193 else if (IS_KABYLAKE(i915)) 1194 kbl_gt_workarounds_init(i915, wal); 1195 else if (IS_BROXTON(i915)) 1196 bxt_gt_workarounds_init(i915, wal); 1197 else if (IS_SKYLAKE(i915)) 1198 skl_gt_workarounds_init(i915, wal); 1199 else if (IS_HASWELL(i915)) 1200 hsw_gt_workarounds_init(i915, wal); 1201 else if (IS_VALLEYVIEW(i915)) 1202 vlv_gt_workarounds_init(i915, wal); 1203 else if (IS_IVYBRIDGE(i915)) 1204 ivb_gt_workarounds_init(i915, wal); 1205 else if (IS_GEN(i915, 6)) 1206 snb_gt_workarounds_init(i915, wal); 1207 else if (IS_GEN(i915, 5)) 1208 ilk_gt_workarounds_init(i915, wal); 1209 else if (IS_G4X(i915)) 1210 g4x_gt_workarounds_init(i915, wal); 1211 else if (IS_GEN(i915, 4)) 1212 gen4_gt_workarounds_init(i915, wal); 1213 else if (INTEL_GEN(i915) <= 8) 1214 return; 1215 else 1216 MISSING_CASE(INTEL_GEN(i915)); 1217 } 1218 1219 void intel_gt_init_workarounds(struct drm_i915_private *i915) 1220 { 1221 struct i915_wa_list *wal = &i915->gt_wa_list; 1222 1223 wa_init_start(wal, "GT", "global"); 1224 gt_init_workarounds(i915, wal); 1225 wa_init_finish(wal); 1226 } 1227 1228 static enum forcewake_domains 1229 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) 1230 { 1231 enum forcewake_domains fw = 0; 1232 struct i915_wa *wa; 1233 unsigned int i; 1234 1235 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1236 fw |= intel_uncore_forcewake_for_reg(uncore, 1237 wa->reg, 1238 FW_REG_READ | 1239 FW_REG_WRITE); 1240 1241 return fw; 1242 } 1243 1244 static bool 1245 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) 1246 { 1247 if ((cur ^ wa->set) & wa->read) { 1248 DRM_ERROR("%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n", 1249 name, from, i915_mmio_reg_offset(wa->reg), 1250 cur, cur & wa->read, wa->set & wa->read); 1251 1252 return false; 1253 } 1254 1255 return true; 1256 } 1257 1258 static void 1259 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) 1260 { 1261 enum forcewake_domains fw; 1262 unsigned long flags; 1263 struct i915_wa *wa; 1264 unsigned int i; 1265 1266 if (!wal->count) 1267 return; 1268 1269 fw = wal_get_fw_for_rmw(uncore, wal); 1270 1271 spin_lock_irqsave(&uncore->lock, flags); 1272 intel_uncore_forcewake_get__locked(uncore, fw); 1273 1274 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 1275 if (wa->clr) 1276 intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set); 1277 else 1278 intel_uncore_write_fw(uncore, wa->reg, wa->set); 1279 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 1280 wa_verify(wa, 1281 intel_uncore_read_fw(uncore, wa->reg), 1282 wal->name, "application"); 1283 } 1284 1285 intel_uncore_forcewake_put__locked(uncore, fw); 1286 spin_unlock_irqrestore(&uncore->lock, flags); 1287 } 1288 1289 void intel_gt_apply_workarounds(struct intel_gt *gt) 1290 { 1291 wa_list_apply(gt->uncore, >->i915->gt_wa_list); 1292 } 1293 1294 static bool wa_list_verify(struct intel_uncore *uncore, 1295 const struct i915_wa_list *wal, 1296 const char *from) 1297 { 1298 struct i915_wa *wa; 1299 unsigned int i; 1300 bool ok = true; 1301 1302 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1303 ok &= wa_verify(wa, 1304 intel_uncore_read(uncore, wa->reg), 1305 wal->name, from); 1306 1307 return ok; 1308 } 1309 1310 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) 1311 { 1312 return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from); 1313 } 1314 1315 __maybe_unused 1316 static bool is_nonpriv_flags_valid(u32 flags) 1317 { 1318 /* Check only valid flag bits are set */ 1319 if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID) 1320 return false; 1321 1322 /* NB: Only 3 out of 4 enum values are valid for access field */ 1323 if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == 1324 RING_FORCE_TO_NONPRIV_ACCESS_INVALID) 1325 return false; 1326 1327 return true; 1328 } 1329 1330 static void 1331 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) 1332 { 1333 struct i915_wa wa = { 1334 .reg = reg 1335 }; 1336 1337 if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) 1338 return; 1339 1340 if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags))) 1341 return; 1342 1343 wa.reg.reg |= flags; 1344 _wa_add(wal, &wa); 1345 } 1346 1347 static void 1348 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) 1349 { 1350 whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); 1351 } 1352 1353 static void gen9_whitelist_build(struct i915_wa_list *w) 1354 { 1355 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ 1356 whitelist_reg(w, GEN9_CTX_PREEMPT_REG); 1357 1358 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ 1359 whitelist_reg(w, GEN8_CS_CHICKEN1); 1360 1361 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ 1362 whitelist_reg(w, GEN8_HDC_CHICKEN1); 1363 1364 /* WaSendPushConstantsFromMMIO:skl,bxt */ 1365 whitelist_reg(w, COMMON_SLICE_CHICKEN2); 1366 } 1367 1368 static void skl_whitelist_build(struct intel_engine_cs *engine) 1369 { 1370 struct i915_wa_list *w = &engine->whitelist; 1371 1372 if (engine->class != RENDER_CLASS) 1373 return; 1374 1375 gen9_whitelist_build(w); 1376 1377 /* WaDisableLSQCROPERFforOCL:skl */ 1378 whitelist_reg(w, GEN8_L3SQCREG4); 1379 } 1380 1381 static void bxt_whitelist_build(struct intel_engine_cs *engine) 1382 { 1383 if (engine->class != RENDER_CLASS) 1384 return; 1385 1386 gen9_whitelist_build(&engine->whitelist); 1387 } 1388 1389 static void kbl_whitelist_build(struct intel_engine_cs *engine) 1390 { 1391 struct i915_wa_list *w = &engine->whitelist; 1392 1393 if (engine->class != RENDER_CLASS) 1394 return; 1395 1396 gen9_whitelist_build(w); 1397 1398 /* WaDisableLSQCROPERFforOCL:kbl */ 1399 whitelist_reg(w, GEN8_L3SQCREG4); 1400 } 1401 1402 static void glk_whitelist_build(struct intel_engine_cs *engine) 1403 { 1404 struct i915_wa_list *w = &engine->whitelist; 1405 1406 if (engine->class != RENDER_CLASS) 1407 return; 1408 1409 gen9_whitelist_build(w); 1410 1411 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ 1412 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1413 } 1414 1415 static void cfl_whitelist_build(struct intel_engine_cs *engine) 1416 { 1417 struct i915_wa_list *w = &engine->whitelist; 1418 1419 if (engine->class != RENDER_CLASS) 1420 return; 1421 1422 gen9_whitelist_build(w); 1423 1424 /* 1425 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml 1426 * 1427 * This covers 4 register which are next to one another : 1428 * - PS_INVOCATION_COUNT 1429 * - PS_INVOCATION_COUNT_UDW 1430 * - PS_DEPTH_COUNT 1431 * - PS_DEPTH_COUNT_UDW 1432 */ 1433 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1434 RING_FORCE_TO_NONPRIV_ACCESS_RD | 1435 RING_FORCE_TO_NONPRIV_RANGE_4); 1436 } 1437 1438 static void cml_whitelist_build(struct intel_engine_cs *engine) 1439 { 1440 struct i915_wa_list *w = &engine->whitelist; 1441 1442 if (engine->class != RENDER_CLASS) 1443 whitelist_reg_ext(w, 1444 RING_CTX_TIMESTAMP(engine->mmio_base), 1445 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1446 1447 cfl_whitelist_build(engine); 1448 } 1449 1450 static void cnl_whitelist_build(struct intel_engine_cs *engine) 1451 { 1452 struct i915_wa_list *w = &engine->whitelist; 1453 1454 if (engine->class != RENDER_CLASS) 1455 return; 1456 1457 /* WaEnablePreemptionGranularityControlByUMD:cnl */ 1458 whitelist_reg(w, GEN8_CS_CHICKEN1); 1459 } 1460 1461 static void icl_whitelist_build(struct intel_engine_cs *engine) 1462 { 1463 struct i915_wa_list *w = &engine->whitelist; 1464 1465 switch (engine->class) { 1466 case RENDER_CLASS: 1467 /* WaAllowUMDToModifyHalfSliceChicken7:icl */ 1468 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); 1469 1470 /* WaAllowUMDToModifySamplerMode:icl */ 1471 whitelist_reg(w, GEN10_SAMPLER_MODE); 1472 1473 /* WaEnableStateCacheRedirectToCS:icl */ 1474 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1475 1476 /* 1477 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl 1478 * 1479 * This covers 4 register which are next to one another : 1480 * - PS_INVOCATION_COUNT 1481 * - PS_INVOCATION_COUNT_UDW 1482 * - PS_DEPTH_COUNT 1483 * - PS_DEPTH_COUNT_UDW 1484 */ 1485 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1486 RING_FORCE_TO_NONPRIV_ACCESS_RD | 1487 RING_FORCE_TO_NONPRIV_RANGE_4); 1488 break; 1489 1490 case VIDEO_DECODE_CLASS: 1491 /* hucStatusRegOffset */ 1492 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base), 1493 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1494 /* hucUKernelHdrInfoRegOffset */ 1495 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base), 1496 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1497 /* hucStatus2RegOffset */ 1498 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), 1499 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1500 whitelist_reg_ext(w, 1501 RING_CTX_TIMESTAMP(engine->mmio_base), 1502 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1503 break; 1504 1505 default: 1506 whitelist_reg_ext(w, 1507 RING_CTX_TIMESTAMP(engine->mmio_base), 1508 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1509 break; 1510 } 1511 } 1512 1513 static void tgl_whitelist_build(struct intel_engine_cs *engine) 1514 { 1515 struct i915_wa_list *w = &engine->whitelist; 1516 1517 switch (engine->class) { 1518 case RENDER_CLASS: 1519 /* 1520 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl 1521 * Wa_1408556865:tgl 1522 * 1523 * This covers 4 registers which are next to one another : 1524 * - PS_INVOCATION_COUNT 1525 * - PS_INVOCATION_COUNT_UDW 1526 * - PS_DEPTH_COUNT 1527 * - PS_DEPTH_COUNT_UDW 1528 */ 1529 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1530 RING_FORCE_TO_NONPRIV_ACCESS_RD | 1531 RING_FORCE_TO_NONPRIV_RANGE_4); 1532 1533 /* Wa_1808121037:tgl */ 1534 whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1); 1535 1536 /* Wa_1806527549:tgl */ 1537 whitelist_reg(w, HIZ_CHICKEN); 1538 break; 1539 default: 1540 whitelist_reg_ext(w, 1541 RING_CTX_TIMESTAMP(engine->mmio_base), 1542 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1543 break; 1544 } 1545 } 1546 1547 static void dg1_whitelist_build(struct intel_engine_cs *engine) 1548 { 1549 struct i915_wa_list *w = &engine->whitelist; 1550 1551 tgl_whitelist_build(engine); 1552 1553 /* GEN:BUG:1409280441:dg1 */ 1554 if (IS_DG1_REVID(engine->i915, DG1_REVID_A0, DG1_REVID_A0) && 1555 (engine->class == RENDER_CLASS || 1556 engine->class == COPY_ENGINE_CLASS)) 1557 whitelist_reg_ext(w, RING_ID(engine->mmio_base), 1558 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1559 } 1560 1561 void intel_engine_init_whitelist(struct intel_engine_cs *engine) 1562 { 1563 struct drm_i915_private *i915 = engine->i915; 1564 struct i915_wa_list *w = &engine->whitelist; 1565 1566 wa_init_start(w, "whitelist", engine->name); 1567 1568 if (IS_DG1(i915)) 1569 dg1_whitelist_build(engine); 1570 else if (IS_GEN(i915, 12)) 1571 tgl_whitelist_build(engine); 1572 else if (IS_GEN(i915, 11)) 1573 icl_whitelist_build(engine); 1574 else if (IS_CANNONLAKE(i915)) 1575 cnl_whitelist_build(engine); 1576 else if (IS_COMETLAKE(i915)) 1577 cml_whitelist_build(engine); 1578 else if (IS_COFFEELAKE(i915)) 1579 cfl_whitelist_build(engine); 1580 else if (IS_GEMINILAKE(i915)) 1581 glk_whitelist_build(engine); 1582 else if (IS_KABYLAKE(i915)) 1583 kbl_whitelist_build(engine); 1584 else if (IS_BROXTON(i915)) 1585 bxt_whitelist_build(engine); 1586 else if (IS_SKYLAKE(i915)) 1587 skl_whitelist_build(engine); 1588 else if (INTEL_GEN(i915) <= 8) 1589 return; 1590 else 1591 MISSING_CASE(INTEL_GEN(i915)); 1592 1593 wa_init_finish(w); 1594 } 1595 1596 void intel_engine_apply_whitelist(struct intel_engine_cs *engine) 1597 { 1598 const struct i915_wa_list *wal = &engine->whitelist; 1599 struct intel_uncore *uncore = engine->uncore; 1600 const u32 base = engine->mmio_base; 1601 struct i915_wa *wa; 1602 unsigned int i; 1603 1604 if (!wal->count) 1605 return; 1606 1607 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1608 intel_uncore_write(uncore, 1609 RING_FORCE_TO_NONPRIV(base, i), 1610 i915_mmio_reg_offset(wa->reg)); 1611 1612 /* And clear the rest just in case of garbage */ 1613 for (; i < RING_MAX_NONPRIV_SLOTS; i++) 1614 intel_uncore_write(uncore, 1615 RING_FORCE_TO_NONPRIV(base, i), 1616 i915_mmio_reg_offset(RING_NOPID(base))); 1617 } 1618 1619 static void 1620 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1621 { 1622 struct drm_i915_private *i915 = engine->i915; 1623 1624 if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || 1625 IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0)) { 1626 /* 1627 * Wa_1607138336:tgl[a0],dg1[a0] 1628 * Wa_1607063988:tgl[a0],dg1[a0] 1629 */ 1630 wa_write_or(wal, 1631 GEN9_CTX_PREEMPT_REG, 1632 GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); 1633 } 1634 1635 if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0)) { 1636 /* 1637 * Wa_1606679103:tgl 1638 * (see also Wa_1606682166:icl) 1639 */ 1640 wa_write_or(wal, 1641 GEN7_SARCHKMD, 1642 GEN7_DISABLE_SAMPLER_PREFETCH); 1643 } 1644 1645 if (IS_ALDERLAKE_S(i915) || IS_DG1(i915) || 1646 IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { 1647 /* Wa_1606931601:tgl,rkl,dg1,adl-s */ 1648 wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); 1649 1650 /* 1651 * Wa_1407928979:tgl A* 1652 * Wa_18011464164:tgl[B0+],dg1[B0+] 1653 * Wa_22010931296:tgl[B0+],dg1[B0+] 1654 * Wa_14010919138:rkl,dg1,adl-s 1655 */ 1656 wa_write_or(wal, GEN7_FF_THREAD_MODE, 1657 GEN12_FF_TESSELATION_DOP_GATE_DISABLE); 1658 1659 /* 1660 * Wa_1606700617:tgl,dg1 1661 * Wa_22010271021:tgl,rkl,dg1, adl-s 1662 */ 1663 wa_masked_en(wal, 1664 GEN9_CS_DEBUG_MODE1, 1665 FF_DOP_CLOCK_GATE_DISABLE); 1666 } 1667 1668 if (IS_ALDERLAKE_S(i915) || IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || 1669 IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { 1670 /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s */ 1671 wa_masked_en(wal, GEN7_ROW_CHICKEN2, 1672 GEN12_PUSH_CONST_DEREF_HOLD_DIS); 1673 1674 /* 1675 * Wa_1409085225:tgl 1676 * Wa_14010229206:tgl,rkl,dg1[a0],adl-s 1677 */ 1678 wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); 1679 } 1680 1681 1682 if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || 1683 IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { 1684 /* 1685 * Wa_1607030317:tgl 1686 * Wa_1607186500:tgl 1687 * Wa_1607297627:tgl,rkl,dg1[a0] 1688 * 1689 * On TGL and RKL there are multiple entries for this WA in the 1690 * BSpec; some indicate this is an A0-only WA, others indicate 1691 * it applies to all steppings so we trust the "all steppings." 1692 * For DG1 this only applies to A0. 1693 */ 1694 wa_masked_en(wal, 1695 GEN6_RC_SLEEP_PSMI_CONTROL, 1696 GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | 1697 GEN8_RC_SEMA_IDLE_MSG_DISABLE); 1698 } 1699 1700 if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { 1701 /* Wa_1406941453:tgl,rkl,dg1 */ 1702 wa_masked_en(wal, 1703 GEN10_SAMPLER_MODE, 1704 ENABLE_SMALLPL); 1705 } 1706 1707 if (IS_GEN(i915, 11)) { 1708 /* This is not an Wa. Enable for better image quality */ 1709 wa_masked_en(wal, 1710 _3D_CHICKEN3, 1711 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); 1712 1713 /* WaPipelineFlushCoherentLines:icl */ 1714 wa_write_or(wal, 1715 GEN8_L3SQCREG4, 1716 GEN8_LQSC_FLUSH_COHERENT_LINES); 1717 1718 /* 1719 * Wa_1405543622:icl 1720 * Formerly known as WaGAPZPriorityScheme 1721 */ 1722 wa_write_or(wal, 1723 GEN8_GARBCNTL, 1724 GEN11_ARBITRATION_PRIO_ORDER_MASK); 1725 1726 /* 1727 * Wa_1604223664:icl 1728 * Formerly known as WaL3BankAddressHashing 1729 */ 1730 wa_write_clr_set(wal, 1731 GEN8_GARBCNTL, 1732 GEN11_HASH_CTRL_EXCL_MASK, 1733 GEN11_HASH_CTRL_EXCL_BIT0); 1734 wa_write_clr_set(wal, 1735 GEN11_GLBLINVL, 1736 GEN11_BANK_HASH_ADDR_EXCL_MASK, 1737 GEN11_BANK_HASH_ADDR_EXCL_BIT0); 1738 1739 /* 1740 * Wa_1405733216:icl 1741 * Formerly known as WaDisableCleanEvicts 1742 */ 1743 wa_write_or(wal, 1744 GEN8_L3SQCREG4, 1745 GEN11_LQSC_CLEAN_EVICT_DISABLE); 1746 1747 /* WaForwardProgressSoftReset:icl */ 1748 wa_write_or(wal, 1749 GEN10_SCRATCH_LNCF2, 1750 PMFLUSHDONE_LNICRSDROP | 1751 PMFLUSH_GAPL3UNBLOCK | 1752 PMFLUSHDONE_LNEBLK); 1753 1754 /* Wa_1406609255:icl (pre-prod) */ 1755 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 1756 wa_write_or(wal, 1757 GEN7_SARCHKMD, 1758 GEN7_DISABLE_DEMAND_PREFETCH); 1759 1760 /* Wa_1606682166:icl */ 1761 wa_write_or(wal, 1762 GEN7_SARCHKMD, 1763 GEN7_DISABLE_SAMPLER_PREFETCH); 1764 1765 /* Wa_1409178092:icl */ 1766 wa_write_clr_set(wal, 1767 GEN11_SCRATCH2, 1768 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, 1769 0); 1770 1771 /* WaEnable32PlaneMode:icl */ 1772 wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, 1773 GEN11_ENABLE_32_PLANE_MODE); 1774 1775 /* 1776 * Wa_1408615072:icl,ehl (vsunit) 1777 * Wa_1407596294:icl,ehl (hsunit) 1778 */ 1779 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, 1780 VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); 1781 1782 /* Wa_1407352427:icl,ehl */ 1783 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, 1784 PSDUNIT_CLKGATE_DIS); 1785 1786 /* Wa_1406680159:icl,ehl */ 1787 wa_write_or(wal, 1788 SUBSLICE_UNIT_LEVEL_CLKGATE, 1789 GWUNIT_CLKGATE_DIS); 1790 1791 /* 1792 * Wa_1408767742:icl[a2..forever],ehl[all] 1793 * Wa_1605460711:icl[a0..c0] 1794 */ 1795 wa_write_or(wal, 1796 GEN7_FF_THREAD_MODE, 1797 GEN12_FF_TESSELATION_DOP_GATE_DISABLE); 1798 1799 /* Wa_22010271021:ehl */ 1800 if (IS_JSL_EHL(i915)) 1801 wa_masked_en(wal, 1802 GEN9_CS_DEBUG_MODE1, 1803 FF_DOP_CLOCK_GATE_DISABLE); 1804 } 1805 1806 if (IS_GEN_RANGE(i915, 9, 12)) { 1807 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */ 1808 wa_masked_en(wal, 1809 GEN7_FF_SLICE_CS_CHICKEN1, 1810 GEN9_FFSC_PERCTX_PREEMPT_CTRL); 1811 } 1812 1813 if (IS_SKYLAKE(i915) || 1814 IS_KABYLAKE(i915) || 1815 IS_COFFEELAKE(i915) || 1816 IS_COMETLAKE(i915)) { 1817 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */ 1818 wa_write_or(wal, 1819 GEN8_GARBCNTL, 1820 GEN9_GAPS_TSV_CREDIT_DISABLE); 1821 } 1822 1823 if (IS_BROXTON(i915)) { 1824 /* WaDisablePooledEuLoadBalancingFix:bxt */ 1825 wa_masked_en(wal, 1826 FF_SLICE_CS_CHICKEN2, 1827 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); 1828 } 1829 1830 if (IS_GEN(i915, 9)) { 1831 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ 1832 wa_masked_en(wal, 1833 GEN9_CSFE_CHICKEN1_RCS, 1834 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); 1835 1836 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ 1837 wa_write_or(wal, 1838 BDW_SCRATCH1, 1839 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 1840 1841 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ 1842 if (IS_GEN9_LP(i915)) 1843 wa_write_clr_set(wal, 1844 GEN8_L3SQCREG1, 1845 L3_PRIO_CREDITS_MASK, 1846 L3_GENERAL_PRIO_CREDITS(62) | 1847 L3_HIGH_PRIO_CREDITS(2)); 1848 1849 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ 1850 wa_write_or(wal, 1851 GEN8_L3SQCREG4, 1852 GEN8_LQSC_FLUSH_COHERENT_LINES); 1853 1854 /* Disable atomics in L3 to prevent unrecoverable hangs */ 1855 wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1, 1856 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0); 1857 wa_write_clr_set(wal, GEN8_L3SQCREG4, 1858 GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0); 1859 wa_write_clr_set(wal, GEN9_SCRATCH1, 1860 EVICTION_PERF_FIX_ENABLE, 0); 1861 } 1862 1863 if (IS_HASWELL(i915)) { 1864 /* WaSampleCChickenBitEnable:hsw */ 1865 wa_masked_en(wal, 1866 HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); 1867 1868 wa_masked_dis(wal, 1869 CACHE_MODE_0_GEN7, 1870 /* enable HiZ Raw Stall Optimization */ 1871 HIZ_RAW_STALL_OPT_DISABLE); 1872 1873 /* WaDisable4x2SubspanOptimization:hsw */ 1874 wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); 1875 } 1876 1877 if (IS_VALLEYVIEW(i915)) { 1878 /* WaDisableEarlyCull:vlv */ 1879 wa_masked_en(wal, 1880 _3D_CHICKEN3, 1881 _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); 1882 1883 /* 1884 * WaVSThreadDispatchOverride:ivb,vlv 1885 * 1886 * This actually overrides the dispatch 1887 * mode for all thread types. 1888 */ 1889 wa_write_clr_set(wal, 1890 GEN7_FF_THREAD_MODE, 1891 GEN7_FF_SCHED_MASK, 1892 GEN7_FF_TS_SCHED_HW | 1893 GEN7_FF_VS_SCHED_HW | 1894 GEN7_FF_DS_SCHED_HW); 1895 1896 /* WaPsdDispatchEnable:vlv */ 1897 /* WaDisablePSDDualDispatchEnable:vlv */ 1898 wa_masked_en(wal, 1899 GEN7_HALF_SLICE_CHICKEN1, 1900 GEN7_MAX_PS_THREAD_DEP | 1901 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); 1902 } 1903 1904 if (IS_IVYBRIDGE(i915)) { 1905 /* WaDisableEarlyCull:ivb */ 1906 wa_masked_en(wal, 1907 _3D_CHICKEN3, 1908 _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); 1909 1910 if (0) { /* causes HiZ corruption on ivb:gt1 */ 1911 /* enable HiZ Raw Stall Optimization */ 1912 wa_masked_dis(wal, 1913 CACHE_MODE_0_GEN7, 1914 HIZ_RAW_STALL_OPT_DISABLE); 1915 } 1916 1917 /* 1918 * WaVSThreadDispatchOverride:ivb,vlv 1919 * 1920 * This actually overrides the dispatch 1921 * mode for all thread types. 1922 */ 1923 wa_write_clr_set(wal, 1924 GEN7_FF_THREAD_MODE, 1925 GEN7_FF_SCHED_MASK, 1926 GEN7_FF_TS_SCHED_HW | 1927 GEN7_FF_VS_SCHED_HW | 1928 GEN7_FF_DS_SCHED_HW); 1929 1930 /* WaDisablePSDDualDispatchEnable:ivb */ 1931 if (IS_IVB_GT1(i915)) 1932 wa_masked_en(wal, 1933 GEN7_HALF_SLICE_CHICKEN1, 1934 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); 1935 } 1936 1937 if (IS_GEN(i915, 7)) { 1938 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ 1939 wa_masked_en(wal, 1940 GFX_MODE_GEN7, 1941 GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE); 1942 1943 /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */ 1944 wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); 1945 1946 /* 1947 * BSpec says this must be set, even though 1948 * WaDisable4x2SubspanOptimization:ivb,hsw 1949 * WaDisable4x2SubspanOptimization isn't listed for VLV. 1950 */ 1951 wa_masked_en(wal, 1952 CACHE_MODE_1, 1953 PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); 1954 1955 /* 1956 * BSpec recommends 8x4 when MSAA is used, 1957 * however in practice 16x4 seems fastest. 1958 * 1959 * Note that PS/WM thread counts depend on the WIZ hashing 1960 * disable bit, which we don't touch here, but it's good 1961 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 1962 */ 1963 wa_add(wal, GEN7_GT_MODE, 0, 1964 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, 1965 GEN6_WIZ_HASHING_16x4), 1966 GEN6_WIZ_HASHING_16x4); 1967 } 1968 1969 if (IS_GEN_RANGE(i915, 6, 7)) 1970 /* 1971 * We need to disable the AsyncFlip performance optimisations in 1972 * order to use MI_WAIT_FOR_EVENT within the CS. It should 1973 * already be programmed to '1' on all products. 1974 * 1975 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv 1976 */ 1977 wa_masked_en(wal, 1978 MI_MODE, 1979 ASYNC_FLIP_PERF_DISABLE); 1980 1981 if (IS_GEN(i915, 6)) { 1982 /* 1983 * Required for the hardware to program scanline values for 1984 * waiting 1985 * WaEnableFlushTlbInvalidationMode:snb 1986 */ 1987 wa_masked_en(wal, 1988 GFX_MODE, 1989 GFX_TLB_INVALIDATE_EXPLICIT); 1990 1991 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ 1992 wa_masked_en(wal, 1993 _3D_CHICKEN, 1994 _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB); 1995 1996 wa_masked_en(wal, 1997 _3D_CHICKEN3, 1998 /* WaStripsFansDisableFastClipPerformanceFix:snb */ 1999 _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL | 2000 /* 2001 * Bspec says: 2002 * "This bit must be set if 3DSTATE_CLIP clip mode is set 2003 * to normal and 3DSTATE_SF number of SF output attributes 2004 * is more than 16." 2005 */ 2006 _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH); 2007 2008 /* 2009 * BSpec recommends 8x4 when MSAA is used, 2010 * however in practice 16x4 seems fastest. 2011 * 2012 * Note that PS/WM thread counts depend on the WIZ hashing 2013 * disable bit, which we don't touch here, but it's good 2014 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 2015 */ 2016 wa_add(wal, 2017 GEN6_GT_MODE, 0, 2018 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), 2019 GEN6_WIZ_HASHING_16x4); 2020 2021 /* WaDisable_RenderCache_OperationalFlush:snb */ 2022 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); 2023 2024 /* 2025 * From the Sandybridge PRM, volume 1 part 3, page 24: 2026 * "If this bit is set, STCunit will have LRA as replacement 2027 * policy. [...] This bit must be reset. LRA replacement 2028 * policy is not supported." 2029 */ 2030 wa_masked_dis(wal, 2031 CACHE_MODE_0, 2032 CM0_STC_EVICT_DISABLE_LRA_SNB); 2033 } 2034 2035 if (IS_GEN_RANGE(i915, 4, 6)) 2036 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ 2037 wa_add(wal, MI_MODE, 2038 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH), 2039 /* XXX bit doesn't stick on Broadwater */ 2040 IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH); 2041 2042 if (IS_GEN(i915, 4)) 2043 /* 2044 * Disable CONSTANT_BUFFER before it is loaded from the context 2045 * image. For as it is loaded, it is executed and the stored 2046 * address may no longer be valid, leading to a GPU hang. 2047 * 2048 * This imposes the requirement that userspace reload their 2049 * CONSTANT_BUFFER on every batch, fortunately a requirement 2050 * they are already accustomed to from before contexts were 2051 * enabled. 2052 */ 2053 wa_add(wal, ECOSKPD, 2054 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE), 2055 0 /* XXX bit doesn't stick on Broadwater */); 2056 } 2057 2058 static void 2059 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 2060 { 2061 struct drm_i915_private *i915 = engine->i915; 2062 2063 /* WaKBLVECSSemaphoreWaitPoll:kbl */ 2064 if (IS_KBL_GT_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { 2065 wa_write(wal, 2066 RING_SEMA_WAIT_POLL(engine->mmio_base), 2067 1); 2068 } 2069 } 2070 2071 static void 2072 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) 2073 { 2074 if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 4)) 2075 return; 2076 2077 if (engine->class == RENDER_CLASS) 2078 rcs_engine_wa_init(engine, wal); 2079 else 2080 xcs_engine_wa_init(engine, wal); 2081 } 2082 2083 void intel_engine_init_workarounds(struct intel_engine_cs *engine) 2084 { 2085 struct i915_wa_list *wal = &engine->wa_list; 2086 2087 if (INTEL_GEN(engine->i915) < 4) 2088 return; 2089 2090 wa_init_start(wal, "engine", engine->name); 2091 engine_init_workarounds(engine, wal); 2092 wa_init_finish(wal); 2093 } 2094 2095 void intel_engine_apply_workarounds(struct intel_engine_cs *engine) 2096 { 2097 wa_list_apply(engine->uncore, &engine->wa_list); 2098 } 2099 2100 struct mcr_range { 2101 u32 start; 2102 u32 end; 2103 }; 2104 2105 static const struct mcr_range mcr_ranges_gen8[] = { 2106 { .start = 0x5500, .end = 0x55ff }, 2107 { .start = 0x7000, .end = 0x7fff }, 2108 { .start = 0x9400, .end = 0x97ff }, 2109 { .start = 0xb000, .end = 0xb3ff }, 2110 { .start = 0xe000, .end = 0xe7ff }, 2111 {}, 2112 }; 2113 2114 static const struct mcr_range mcr_ranges_gen12[] = { 2115 { .start = 0x8150, .end = 0x815f }, 2116 { .start = 0x9520, .end = 0x955f }, 2117 { .start = 0xb100, .end = 0xb3ff }, 2118 { .start = 0xde80, .end = 0xe8ff }, 2119 { .start = 0x24a00, .end = 0x24a7f }, 2120 {}, 2121 }; 2122 2123 static bool mcr_range(struct drm_i915_private *i915, u32 offset) 2124 { 2125 const struct mcr_range *mcr_ranges; 2126 int i; 2127 2128 if (INTEL_GEN(i915) >= 12) 2129 mcr_ranges = mcr_ranges_gen12; 2130 else if (INTEL_GEN(i915) >= 8) 2131 mcr_ranges = mcr_ranges_gen8; 2132 else 2133 return false; 2134 2135 /* 2136 * Registers in these ranges are affected by the MCR selector 2137 * which only controls CPU initiated MMIO. Routing does not 2138 * work for CS access so we cannot verify them on this path. 2139 */ 2140 for (i = 0; mcr_ranges[i].start; i++) 2141 if (offset >= mcr_ranges[i].start && 2142 offset <= mcr_ranges[i].end) 2143 return true; 2144 2145 return false; 2146 } 2147 2148 static int 2149 wa_list_srm(struct i915_request *rq, 2150 const struct i915_wa_list *wal, 2151 struct i915_vma *vma) 2152 { 2153 struct drm_i915_private *i915 = rq->engine->i915; 2154 unsigned int i, count = 0; 2155 const struct i915_wa *wa; 2156 u32 srm, *cs; 2157 2158 srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 2159 if (INTEL_GEN(i915) >= 8) 2160 srm++; 2161 2162 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 2163 if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg))) 2164 count++; 2165 } 2166 2167 cs = intel_ring_begin(rq, 4 * count); 2168 if (IS_ERR(cs)) 2169 return PTR_ERR(cs); 2170 2171 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 2172 u32 offset = i915_mmio_reg_offset(wa->reg); 2173 2174 if (mcr_range(i915, offset)) 2175 continue; 2176 2177 *cs++ = srm; 2178 *cs++ = offset; 2179 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; 2180 *cs++ = 0; 2181 } 2182 intel_ring_advance(rq, cs); 2183 2184 return 0; 2185 } 2186 2187 static int engine_wa_list_verify(struct intel_context *ce, 2188 const struct i915_wa_list * const wal, 2189 const char *from) 2190 { 2191 const struct i915_wa *wa; 2192 struct i915_request *rq; 2193 struct i915_vma *vma; 2194 struct i915_gem_ww_ctx ww; 2195 unsigned int i; 2196 u32 *results; 2197 int err; 2198 2199 if (!wal->count) 2200 return 0; 2201 2202 vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm, 2203 wal->count * sizeof(u32)); 2204 if (IS_ERR(vma)) 2205 return PTR_ERR(vma); 2206 2207 intel_engine_pm_get(ce->engine); 2208 i915_gem_ww_ctx_init(&ww, false); 2209 retry: 2210 err = i915_gem_object_lock(vma->obj, &ww); 2211 if (err == 0) 2212 err = intel_context_pin_ww(ce, &ww); 2213 if (err) 2214 goto err_pm; 2215 2216 rq = i915_request_create(ce); 2217 if (IS_ERR(rq)) { 2218 err = PTR_ERR(rq); 2219 goto err_unpin; 2220 } 2221 2222 err = i915_request_await_object(rq, vma->obj, true); 2223 if (err == 0) 2224 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 2225 if (err == 0) 2226 err = wa_list_srm(rq, wal, vma); 2227 2228 i915_request_get(rq); 2229 if (err) 2230 i915_request_set_error_once(rq, err); 2231 i915_request_add(rq); 2232 2233 if (err) 2234 goto err_rq; 2235 2236 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2237 err = -ETIME; 2238 goto err_rq; 2239 } 2240 2241 results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); 2242 if (IS_ERR(results)) { 2243 err = PTR_ERR(results); 2244 goto err_rq; 2245 } 2246 2247 err = 0; 2248 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 2249 if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg))) 2250 continue; 2251 2252 if (!wa_verify(wa, results[i], wal->name, from)) 2253 err = -ENXIO; 2254 } 2255 2256 i915_gem_object_unpin_map(vma->obj); 2257 2258 err_rq: 2259 i915_request_put(rq); 2260 err_unpin: 2261 intel_context_unpin(ce); 2262 err_pm: 2263 if (err == -EDEADLK) { 2264 err = i915_gem_ww_ctx_backoff(&ww); 2265 if (!err) 2266 goto retry; 2267 } 2268 i915_gem_ww_ctx_fini(&ww); 2269 intel_engine_pm_put(ce->engine); 2270 i915_vma_unpin(vma); 2271 i915_vma_put(vma); 2272 return err; 2273 } 2274 2275 int intel_engine_verify_workarounds(struct intel_engine_cs *engine, 2276 const char *from) 2277 { 2278 return engine_wa_list_verify(engine->kernel_context, 2279 &engine->wa_list, 2280 from); 2281 } 2282 2283 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 2284 #include "selftest_workarounds.c" 2285 #endif 2286