1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "debugfs_gt.h" 7 #include "i915_drv.h" 8 #include "intel_context.h" 9 #include "intel_gt.h" 10 #include "intel_gt_pm.h" 11 #include "intel_gt_requests.h" 12 #include "intel_mocs.h" 13 #include "intel_rc6.h" 14 #include "intel_renderstate.h" 15 #include "intel_rps.h" 16 #include "intel_uncore.h" 17 #include "intel_pm.h" 18 19 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) 20 { 21 gt->i915 = i915; 22 gt->uncore = &i915->uncore; 23 24 spin_lock_init(>->irq_lock); 25 26 INIT_LIST_HEAD(>->closed_vma); 27 spin_lock_init(>->closed_lock); 28 29 intel_gt_init_reset(gt); 30 intel_gt_init_requests(gt); 31 intel_gt_init_timelines(gt); 32 intel_gt_pm_init_early(gt); 33 34 intel_rps_init_early(>->rps); 35 intel_uc_init_early(>->uc); 36 } 37 38 void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) 39 { 40 gt->ggtt = ggtt; 41 } 42 43 static void init_unused_ring(struct intel_gt *gt, u32 base) 44 { 45 struct intel_uncore *uncore = gt->uncore; 46 47 intel_uncore_write(uncore, RING_CTL(base), 0); 48 intel_uncore_write(uncore, RING_HEAD(base), 0); 49 intel_uncore_write(uncore, RING_TAIL(base), 0); 50 intel_uncore_write(uncore, RING_START(base), 0); 51 } 52 53 static void init_unused_rings(struct intel_gt *gt) 54 { 55 struct drm_i915_private *i915 = gt->i915; 56 57 if (IS_I830(i915)) { 58 init_unused_ring(gt, PRB1_BASE); 59 init_unused_ring(gt, SRB0_BASE); 60 init_unused_ring(gt, SRB1_BASE); 61 init_unused_ring(gt, SRB2_BASE); 62 init_unused_ring(gt, SRB3_BASE); 63 } else if (IS_GEN(i915, 2)) { 64 init_unused_ring(gt, SRB0_BASE); 65 init_unused_ring(gt, SRB1_BASE); 66 } else if (IS_GEN(i915, 3)) { 67 init_unused_ring(gt, PRB1_BASE); 68 init_unused_ring(gt, PRB2_BASE); 69 } 70 } 71 72 int intel_gt_init_hw(struct intel_gt *gt) 73 { 74 struct drm_i915_private *i915 = gt->i915; 75 struct intel_uncore *uncore = gt->uncore; 76 int ret; 77 78 gt->last_init_time = ktime_get(); 79 80 /* Double layer security blanket, see i915_gem_init() */ 81 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 82 83 if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9) 84 intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); 85 86 if (IS_HASWELL(i915)) 87 intel_uncore_write(uncore, 88 MI_PREDICATE_RESULT_2, 89 IS_HSW_GT3(i915) ? 90 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 91 92 /* Apply the GT workarounds... */ 93 intel_gt_apply_workarounds(gt); 94 /* ...and determine whether they are sticking. */ 95 intel_gt_verify_workarounds(gt, "init"); 96 97 intel_gt_init_swizzling(gt); 98 99 /* 100 * At least 830 can leave some of the unused rings 101 * "active" (ie. head != tail) after resume which 102 * will prevent c3 entry. Makes sure all unused rings 103 * are totally idle. 104 */ 105 init_unused_rings(gt); 106 107 ret = i915_ppgtt_init_hw(gt); 108 if (ret) { 109 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 110 goto out; 111 } 112 113 /* We can't enable contexts until all firmware is loaded */ 114 ret = intel_uc_init_hw(>->uc); 115 if (ret) { 116 i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); 117 goto out; 118 } 119 120 intel_mocs_init(gt); 121 122 out: 123 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 124 return ret; 125 } 126 127 static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) 128 { 129 intel_uncore_rmw(uncore, reg, 0, set); 130 } 131 132 static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) 133 { 134 intel_uncore_rmw(uncore, reg, clr, 0); 135 } 136 137 static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) 138 { 139 intel_uncore_rmw(uncore, reg, 0, 0); 140 } 141 142 static void gen8_clear_engine_error_register(struct intel_engine_cs *engine) 143 { 144 GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); 145 GEN6_RING_FAULT_REG_POSTING_READ(engine); 146 } 147 148 void 149 intel_gt_clear_error_registers(struct intel_gt *gt, 150 intel_engine_mask_t engine_mask) 151 { 152 struct drm_i915_private *i915 = gt->i915; 153 struct intel_uncore *uncore = gt->uncore; 154 u32 eir; 155 156 if (!IS_GEN(i915, 2)) 157 clear_register(uncore, PGTBL_ER); 158 159 if (INTEL_GEN(i915) < 4) 160 clear_register(uncore, IPEIR(RENDER_RING_BASE)); 161 else 162 clear_register(uncore, IPEIR_I965); 163 164 clear_register(uncore, EIR); 165 eir = intel_uncore_read(uncore, EIR); 166 if (eir) { 167 /* 168 * some errors might have become stuck, 169 * mask them. 170 */ 171 DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); 172 rmw_set(uncore, EMR, eir); 173 intel_uncore_write(uncore, GEN2_IIR, 174 I915_MASTER_ERROR_INTERRUPT); 175 } 176 177 if (INTEL_GEN(i915) >= 12) { 178 rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID); 179 intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG); 180 } else if (INTEL_GEN(i915) >= 8) { 181 rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID); 182 intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); 183 } else if (INTEL_GEN(i915) >= 6) { 184 struct intel_engine_cs *engine; 185 enum intel_engine_id id; 186 187 for_each_engine_masked(engine, gt, engine_mask, id) 188 gen8_clear_engine_error_register(engine); 189 } 190 } 191 192 static void gen6_check_faults(struct intel_gt *gt) 193 { 194 struct intel_engine_cs *engine; 195 enum intel_engine_id id; 196 u32 fault; 197 198 for_each_engine(engine, gt, id) { 199 fault = GEN6_RING_FAULT_REG_READ(engine); 200 if (fault & RING_FAULT_VALID) { 201 DRM_DEBUG_DRIVER("Unexpected fault\n" 202 "\tAddr: 0x%08lx\n" 203 "\tAddress space: %s\n" 204 "\tSource ID: %d\n" 205 "\tType: %d\n", 206 fault & PAGE_MASK, 207 fault & RING_FAULT_GTTSEL_MASK ? 208 "GGTT" : "PPGTT", 209 RING_FAULT_SRCID(fault), 210 RING_FAULT_FAULT_TYPE(fault)); 211 } 212 } 213 } 214 215 static void gen8_check_faults(struct intel_gt *gt) 216 { 217 struct intel_uncore *uncore = gt->uncore; 218 i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg; 219 u32 fault; 220 221 if (INTEL_GEN(gt->i915) >= 12) { 222 fault_reg = GEN12_RING_FAULT_REG; 223 fault_data0_reg = GEN12_FAULT_TLB_DATA0; 224 fault_data1_reg = GEN12_FAULT_TLB_DATA1; 225 } else { 226 fault_reg = GEN8_RING_FAULT_REG; 227 fault_data0_reg = GEN8_FAULT_TLB_DATA0; 228 fault_data1_reg = GEN8_FAULT_TLB_DATA1; 229 } 230 231 fault = intel_uncore_read(uncore, fault_reg); 232 if (fault & RING_FAULT_VALID) { 233 u32 fault_data0, fault_data1; 234 u64 fault_addr; 235 236 fault_data0 = intel_uncore_read(uncore, fault_data0_reg); 237 fault_data1 = intel_uncore_read(uncore, fault_data1_reg); 238 239 fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | 240 ((u64)fault_data0 << 12); 241 242 DRM_DEBUG_DRIVER("Unexpected fault\n" 243 "\tAddr: 0x%08x_%08x\n" 244 "\tAddress space: %s\n" 245 "\tEngine ID: %d\n" 246 "\tSource ID: %d\n" 247 "\tType: %d\n", 248 upper_32_bits(fault_addr), 249 lower_32_bits(fault_addr), 250 fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", 251 GEN8_RING_FAULT_ENGINE_ID(fault), 252 RING_FAULT_SRCID(fault), 253 RING_FAULT_FAULT_TYPE(fault)); 254 } 255 } 256 257 void intel_gt_check_and_clear_faults(struct intel_gt *gt) 258 { 259 struct drm_i915_private *i915 = gt->i915; 260 261 /* From GEN8 onwards we only have one 'All Engine Fault Register' */ 262 if (INTEL_GEN(i915) >= 8) 263 gen8_check_faults(gt); 264 else if (INTEL_GEN(i915) >= 6) 265 gen6_check_faults(gt); 266 else 267 return; 268 269 intel_gt_clear_error_registers(gt, ALL_ENGINES); 270 } 271 272 void intel_gt_flush_ggtt_writes(struct intel_gt *gt) 273 { 274 struct intel_uncore *uncore = gt->uncore; 275 intel_wakeref_t wakeref; 276 277 /* 278 * No actual flushing is required for the GTT write domain for reads 279 * from the GTT domain. Writes to it "immediately" go to main memory 280 * as far as we know, so there's no chipset flush. It also doesn't 281 * land in the GPU render cache. 282 * 283 * However, we do have to enforce the order so that all writes through 284 * the GTT land before any writes to the device, such as updates to 285 * the GATT itself. 286 * 287 * We also have to wait a bit for the writes to land from the GTT. 288 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 289 * timing. This issue has only been observed when switching quickly 290 * between GTT writes and CPU reads from inside the kernel on recent hw, 291 * and it appears to only affect discrete GTT blocks (i.e. on LLC 292 * system agents we cannot reproduce this behaviour, until Cannonlake 293 * that was!). 294 */ 295 296 wmb(); 297 298 if (INTEL_INFO(gt->i915)->has_coherent_ggtt) 299 return; 300 301 intel_gt_chipset_flush(gt); 302 303 with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) { 304 unsigned long flags; 305 306 spin_lock_irqsave(&uncore->lock, flags); 307 intel_uncore_posting_read_fw(uncore, 308 RING_HEAD(RENDER_RING_BASE)); 309 spin_unlock_irqrestore(&uncore->lock, flags); 310 } 311 } 312 313 void intel_gt_chipset_flush(struct intel_gt *gt) 314 { 315 wmb(); 316 if (INTEL_GEN(gt->i915) < 6) 317 intel_gtt_chipset_flush(); 318 } 319 320 void intel_gt_driver_register(struct intel_gt *gt) 321 { 322 intel_rps_driver_register(>->rps); 323 324 debugfs_gt_register(gt); 325 } 326 327 static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) 328 { 329 struct drm_i915_private *i915 = gt->i915; 330 struct drm_i915_gem_object *obj; 331 struct i915_vma *vma; 332 int ret; 333 334 obj = i915_gem_object_create_stolen(i915, size); 335 if (IS_ERR(obj)) 336 obj = i915_gem_object_create_internal(i915, size); 337 if (IS_ERR(obj)) { 338 DRM_ERROR("Failed to allocate scratch page\n"); 339 return PTR_ERR(obj); 340 } 341 342 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 343 if (IS_ERR(vma)) { 344 ret = PTR_ERR(vma); 345 goto err_unref; 346 } 347 348 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 349 if (ret) 350 goto err_unref; 351 352 gt->scratch = i915_vma_make_unshrinkable(vma); 353 354 return 0; 355 356 err_unref: 357 i915_gem_object_put(obj); 358 return ret; 359 } 360 361 static void intel_gt_fini_scratch(struct intel_gt *gt) 362 { 363 i915_vma_unpin_and_release(>->scratch, 0); 364 } 365 366 static struct i915_address_space *kernel_vm(struct intel_gt *gt) 367 { 368 if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING) 369 return &i915_ppgtt_create(gt)->vm; 370 else 371 return i915_vm_get(>->ggtt->vm); 372 } 373 374 static int __intel_context_flush_retire(struct intel_context *ce) 375 { 376 struct intel_timeline *tl; 377 378 tl = intel_context_timeline_lock(ce); 379 if (IS_ERR(tl)) 380 return PTR_ERR(tl); 381 382 intel_context_timeline_unlock(tl); 383 return 0; 384 } 385 386 static int __engines_record_defaults(struct intel_gt *gt) 387 { 388 struct i915_request *requests[I915_NUM_ENGINES] = {}; 389 struct intel_engine_cs *engine; 390 enum intel_engine_id id; 391 int err = 0; 392 393 /* 394 * As we reset the gpu during very early sanitisation, the current 395 * register state on the GPU should reflect its defaults values. 396 * We load a context onto the hw (with restore-inhibit), then switch 397 * over to a second context to save that default register state. We 398 * can then prime every new context with that state so they all start 399 * from the same default HW values. 400 */ 401 402 for_each_engine(engine, gt, id) { 403 struct intel_renderstate so; 404 struct intel_context *ce; 405 struct i915_request *rq; 406 407 /* We must be able to switch to something! */ 408 GEM_BUG_ON(!engine->kernel_context); 409 410 err = intel_renderstate_init(&so, engine); 411 if (err) 412 goto out; 413 414 ce = intel_context_create(engine); 415 if (IS_ERR(ce)) { 416 err = PTR_ERR(ce); 417 goto out; 418 } 419 420 rq = intel_context_create_request(ce); 421 if (IS_ERR(rq)) { 422 err = PTR_ERR(rq); 423 intel_context_put(ce); 424 goto out; 425 } 426 427 err = intel_engine_emit_ctx_wa(rq); 428 if (err) 429 goto err_rq; 430 431 err = intel_renderstate_emit(&so, rq); 432 if (err) 433 goto err_rq; 434 435 err_rq: 436 requests[id] = i915_request_get(rq); 437 i915_request_add(rq); 438 intel_renderstate_fini(&so); 439 if (err) 440 goto out; 441 } 442 443 /* Flush the default context image to memory, and enable powersaving. */ 444 if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { 445 err = -EIO; 446 goto out; 447 } 448 449 for (id = 0; id < ARRAY_SIZE(requests); id++) { 450 struct i915_request *rq; 451 struct i915_vma *state; 452 void *vaddr; 453 454 rq = requests[id]; 455 if (!rq) 456 continue; 457 458 GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); 459 state = rq->context->state; 460 if (!state) 461 continue; 462 463 /* Serialise with retirement on another CPU */ 464 GEM_BUG_ON(!i915_request_completed(rq)); 465 err = __intel_context_flush_retire(rq->context); 466 if (err) 467 goto out; 468 469 /* We want to be able to unbind the state from the GGTT */ 470 GEM_BUG_ON(intel_context_is_pinned(rq->context)); 471 472 /* 473 * As we will hold a reference to the logical state, it will 474 * not be torn down with the context, and importantly the 475 * object will hold onto its vma (making it possible for a 476 * stray GTT write to corrupt our defaults). Unmap the vma 477 * from the GTT to prevent such accidents and reclaim the 478 * space. 479 */ 480 err = i915_vma_unbind(state); 481 if (err) 482 goto out; 483 484 i915_gem_object_lock(state->obj); 485 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 486 i915_gem_object_unlock(state->obj); 487 if (err) 488 goto out; 489 490 i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC); 491 492 /* Check we can acquire the image of the context state */ 493 vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB); 494 if (IS_ERR(vaddr)) { 495 err = PTR_ERR(vaddr); 496 goto out; 497 } 498 499 rq->engine->default_state = i915_gem_object_get(state->obj); 500 i915_gem_object_unpin_map(state->obj); 501 } 502 503 out: 504 /* 505 * If we have to abandon now, we expect the engines to be idle 506 * and ready to be torn-down. The quickest way we can accomplish 507 * this is by declaring ourselves wedged. 508 */ 509 if (err) 510 intel_gt_set_wedged(gt); 511 512 for (id = 0; id < ARRAY_SIZE(requests); id++) { 513 struct intel_context *ce; 514 struct i915_request *rq; 515 516 rq = requests[id]; 517 if (!rq) 518 continue; 519 520 ce = rq->context; 521 i915_request_put(rq); 522 intel_context_put(ce); 523 } 524 return err; 525 } 526 527 static int __engines_verify_workarounds(struct intel_gt *gt) 528 { 529 struct intel_engine_cs *engine; 530 enum intel_engine_id id; 531 int err = 0; 532 533 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 534 return 0; 535 536 for_each_engine(engine, gt, id) { 537 if (intel_engine_verify_workarounds(engine, "load")) 538 err = -EIO; 539 } 540 541 return err; 542 } 543 544 static void __intel_gt_disable(struct intel_gt *gt) 545 { 546 intel_gt_set_wedged_on_init(gt); 547 548 intel_gt_suspend_prepare(gt); 549 intel_gt_suspend_late(gt); 550 551 GEM_BUG_ON(intel_gt_pm_is_awake(gt)); 552 } 553 554 int intel_gt_init(struct intel_gt *gt) 555 { 556 int err; 557 558 err = i915_inject_probe_error(gt->i915, -ENODEV); 559 if (err) 560 return err; 561 562 /* 563 * This is just a security blanket to placate dragons. 564 * On some systems, we very sporadically observe that the first TLBs 565 * used by the CS may be stale, despite us poking the TLB reset. If 566 * we hold the forcewake during initialisation these problems 567 * just magically go away. 568 */ 569 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 570 571 err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); 572 if (err) 573 goto out_fw; 574 575 intel_gt_pm_init(gt); 576 577 gt->vm = kernel_vm(gt); 578 if (!gt->vm) { 579 err = -ENOMEM; 580 goto err_pm; 581 } 582 583 err = intel_engines_init(gt); 584 if (err) 585 goto err_engines; 586 587 intel_uc_init(>->uc); 588 589 err = intel_gt_resume(gt); 590 if (err) 591 goto err_uc_init; 592 593 err = __engines_record_defaults(gt); 594 if (err) 595 goto err_gt; 596 597 err = __engines_verify_workarounds(gt); 598 if (err) 599 goto err_gt; 600 601 err = i915_inject_probe_error(gt->i915, -EIO); 602 if (err) 603 goto err_gt; 604 605 goto out_fw; 606 err_gt: 607 __intel_gt_disable(gt); 608 intel_uc_fini_hw(>->uc); 609 err_uc_init: 610 intel_uc_fini(>->uc); 611 err_engines: 612 intel_engines_release(gt); 613 i915_vm_put(fetch_and_zero(>->vm)); 614 err_pm: 615 intel_gt_pm_fini(gt); 616 intel_gt_fini_scratch(gt); 617 out_fw: 618 if (err) 619 intel_gt_set_wedged_on_init(gt); 620 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 621 return err; 622 } 623 624 void intel_gt_driver_remove(struct intel_gt *gt) 625 { 626 __intel_gt_disable(gt); 627 628 intel_uc_fini_hw(>->uc); 629 intel_uc_fini(>->uc); 630 631 intel_engines_release(gt); 632 } 633 634 void intel_gt_driver_unregister(struct intel_gt *gt) 635 { 636 intel_rps_driver_unregister(>->rps); 637 } 638 639 void intel_gt_driver_release(struct intel_gt *gt) 640 { 641 struct i915_address_space *vm; 642 643 vm = fetch_and_zero(>->vm); 644 if (vm) /* FIXME being called twice on error paths :( */ 645 i915_vm_put(vm); 646 647 intel_gt_pm_fini(gt); 648 intel_gt_fini_scratch(gt); 649 } 650 651 void intel_gt_driver_late_release(struct intel_gt *gt) 652 { 653 intel_uc_driver_late_release(>->uc); 654 intel_gt_fini_requests(gt); 655 intel_gt_fini_reset(gt); 656 intel_gt_fini_timelines(gt); 657 intel_engines_free(gt); 658 } 659