1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "debugfs_gt.h" 7 #include "i915_drv.h" 8 #include "intel_context.h" 9 #include "intel_gt.h" 10 #include "intel_gt_pm.h" 11 #include "intel_gt_requests.h" 12 #include "intel_mocs.h" 13 #include "intel_rc6.h" 14 #include "intel_renderstate.h" 15 #include "intel_rps.h" 16 #include "intel_uncore.h" 17 #include "intel_pm.h" 18 19 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) 20 { 21 gt->i915 = i915; 22 gt->uncore = &i915->uncore; 23 24 spin_lock_init(>->irq_lock); 25 26 INIT_LIST_HEAD(>->closed_vma); 27 spin_lock_init(>->closed_lock); 28 29 intel_gt_init_reset(gt); 30 intel_gt_init_requests(gt); 31 intel_gt_init_timelines(gt); 32 intel_gt_pm_init_early(gt); 33 34 intel_rps_init_early(>->rps); 35 intel_uc_init_early(>->uc); 36 } 37 38 void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) 39 { 40 gt->ggtt = ggtt; 41 } 42 43 static void init_unused_ring(struct intel_gt *gt, u32 base) 44 { 45 struct intel_uncore *uncore = gt->uncore; 46 47 intel_uncore_write(uncore, RING_CTL(base), 0); 48 intel_uncore_write(uncore, RING_HEAD(base), 0); 49 intel_uncore_write(uncore, RING_TAIL(base), 0); 50 intel_uncore_write(uncore, RING_START(base), 0); 51 } 52 53 static void init_unused_rings(struct intel_gt *gt) 54 { 55 struct drm_i915_private *i915 = gt->i915; 56 57 if (IS_I830(i915)) { 58 init_unused_ring(gt, PRB1_BASE); 59 init_unused_ring(gt, SRB0_BASE); 60 init_unused_ring(gt, SRB1_BASE); 61 init_unused_ring(gt, SRB2_BASE); 62 init_unused_ring(gt, SRB3_BASE); 63 } else if (IS_GEN(i915, 2)) { 64 init_unused_ring(gt, SRB0_BASE); 65 init_unused_ring(gt, SRB1_BASE); 66 } else if (IS_GEN(i915, 3)) { 67 init_unused_ring(gt, PRB1_BASE); 68 init_unused_ring(gt, PRB2_BASE); 69 } 70 } 71 72 int intel_gt_init_hw(struct intel_gt *gt) 73 { 74 struct drm_i915_private *i915 = gt->i915; 75 struct intel_uncore *uncore = gt->uncore; 76 int ret; 77 78 gt->last_init_time = ktime_get(); 79 80 /* Double layer security blanket, see i915_gem_init() */ 81 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 82 83 if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9) 84 intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); 85 86 if (IS_HASWELL(i915)) 87 intel_uncore_write(uncore, 88 MI_PREDICATE_RESULT_2, 89 IS_HSW_GT3(i915) ? 90 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 91 92 /* Apply the GT workarounds... */ 93 intel_gt_apply_workarounds(gt); 94 /* ...and determine whether they are sticking. */ 95 intel_gt_verify_workarounds(gt, "init"); 96 97 intel_gt_init_swizzling(gt); 98 99 /* 100 * At least 830 can leave some of the unused rings 101 * "active" (ie. head != tail) after resume which 102 * will prevent c3 entry. Makes sure all unused rings 103 * are totally idle. 104 */ 105 init_unused_rings(gt); 106 107 ret = i915_ppgtt_init_hw(gt); 108 if (ret) { 109 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 110 goto out; 111 } 112 113 /* We can't enable contexts until all firmware is loaded */ 114 ret = intel_uc_init_hw(>->uc); 115 if (ret) { 116 i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); 117 goto out; 118 } 119 120 intel_mocs_init(gt); 121 122 out: 123 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 124 return ret; 125 } 126 127 static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) 128 { 129 intel_uncore_rmw(uncore, reg, 0, set); 130 } 131 132 static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) 133 { 134 intel_uncore_rmw(uncore, reg, clr, 0); 135 } 136 137 static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) 138 { 139 intel_uncore_rmw(uncore, reg, 0, 0); 140 } 141 142 static void gen8_clear_engine_error_register(struct intel_engine_cs *engine) 143 { 144 GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); 145 GEN6_RING_FAULT_REG_POSTING_READ(engine); 146 } 147 148 void 149 intel_gt_clear_error_registers(struct intel_gt *gt, 150 intel_engine_mask_t engine_mask) 151 { 152 struct drm_i915_private *i915 = gt->i915; 153 struct intel_uncore *uncore = gt->uncore; 154 u32 eir; 155 156 if (!IS_GEN(i915, 2)) 157 clear_register(uncore, PGTBL_ER); 158 159 if (INTEL_GEN(i915) < 4) 160 clear_register(uncore, IPEIR(RENDER_RING_BASE)); 161 else 162 clear_register(uncore, IPEIR_I965); 163 164 clear_register(uncore, EIR); 165 eir = intel_uncore_read(uncore, EIR); 166 if (eir) { 167 /* 168 * some errors might have become stuck, 169 * mask them. 170 */ 171 DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); 172 rmw_set(uncore, EMR, eir); 173 intel_uncore_write(uncore, GEN2_IIR, 174 I915_MASTER_ERROR_INTERRUPT); 175 } 176 177 if (INTEL_GEN(i915) >= 12) { 178 rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID); 179 intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG); 180 } else if (INTEL_GEN(i915) >= 8) { 181 rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID); 182 intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); 183 } else if (INTEL_GEN(i915) >= 6) { 184 struct intel_engine_cs *engine; 185 enum intel_engine_id id; 186 187 for_each_engine_masked(engine, gt, engine_mask, id) 188 gen8_clear_engine_error_register(engine); 189 } 190 } 191 192 static void gen6_check_faults(struct intel_gt *gt) 193 { 194 struct intel_engine_cs *engine; 195 enum intel_engine_id id; 196 u32 fault; 197 198 for_each_engine(engine, gt, id) { 199 fault = GEN6_RING_FAULT_REG_READ(engine); 200 if (fault & RING_FAULT_VALID) { 201 drm_dbg(&engine->i915->drm, "Unexpected fault\n" 202 "\tAddr: 0x%08lx\n" 203 "\tAddress space: %s\n" 204 "\tSource ID: %d\n" 205 "\tType: %d\n", 206 fault & PAGE_MASK, 207 fault & RING_FAULT_GTTSEL_MASK ? 208 "GGTT" : "PPGTT", 209 RING_FAULT_SRCID(fault), 210 RING_FAULT_FAULT_TYPE(fault)); 211 } 212 } 213 } 214 215 static void gen8_check_faults(struct intel_gt *gt) 216 { 217 struct intel_uncore *uncore = gt->uncore; 218 i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg; 219 u32 fault; 220 221 if (INTEL_GEN(gt->i915) >= 12) { 222 fault_reg = GEN12_RING_FAULT_REG; 223 fault_data0_reg = GEN12_FAULT_TLB_DATA0; 224 fault_data1_reg = GEN12_FAULT_TLB_DATA1; 225 } else { 226 fault_reg = GEN8_RING_FAULT_REG; 227 fault_data0_reg = GEN8_FAULT_TLB_DATA0; 228 fault_data1_reg = GEN8_FAULT_TLB_DATA1; 229 } 230 231 fault = intel_uncore_read(uncore, fault_reg); 232 if (fault & RING_FAULT_VALID) { 233 u32 fault_data0, fault_data1; 234 u64 fault_addr; 235 236 fault_data0 = intel_uncore_read(uncore, fault_data0_reg); 237 fault_data1 = intel_uncore_read(uncore, fault_data1_reg); 238 239 fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | 240 ((u64)fault_data0 << 12); 241 242 drm_dbg(&uncore->i915->drm, "Unexpected fault\n" 243 "\tAddr: 0x%08x_%08x\n" 244 "\tAddress space: %s\n" 245 "\tEngine ID: %d\n" 246 "\tSource ID: %d\n" 247 "\tType: %d\n", 248 upper_32_bits(fault_addr), lower_32_bits(fault_addr), 249 fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", 250 GEN8_RING_FAULT_ENGINE_ID(fault), 251 RING_FAULT_SRCID(fault), 252 RING_FAULT_FAULT_TYPE(fault)); 253 } 254 } 255 256 void intel_gt_check_and_clear_faults(struct intel_gt *gt) 257 { 258 struct drm_i915_private *i915 = gt->i915; 259 260 /* From GEN8 onwards we only have one 'All Engine Fault Register' */ 261 if (INTEL_GEN(i915) >= 8) 262 gen8_check_faults(gt); 263 else if (INTEL_GEN(i915) >= 6) 264 gen6_check_faults(gt); 265 else 266 return; 267 268 intel_gt_clear_error_registers(gt, ALL_ENGINES); 269 } 270 271 void intel_gt_flush_ggtt_writes(struct intel_gt *gt) 272 { 273 struct intel_uncore *uncore = gt->uncore; 274 intel_wakeref_t wakeref; 275 276 /* 277 * No actual flushing is required for the GTT write domain for reads 278 * from the GTT domain. Writes to it "immediately" go to main memory 279 * as far as we know, so there's no chipset flush. It also doesn't 280 * land in the GPU render cache. 281 * 282 * However, we do have to enforce the order so that all writes through 283 * the GTT land before any writes to the device, such as updates to 284 * the GATT itself. 285 * 286 * We also have to wait a bit for the writes to land from the GTT. 287 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 288 * timing. This issue has only been observed when switching quickly 289 * between GTT writes and CPU reads from inside the kernel on recent hw, 290 * and it appears to only affect discrete GTT blocks (i.e. on LLC 291 * system agents we cannot reproduce this behaviour, until Cannonlake 292 * that was!). 293 */ 294 295 wmb(); 296 297 if (INTEL_INFO(gt->i915)->has_coherent_ggtt) 298 return; 299 300 intel_gt_chipset_flush(gt); 301 302 with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) { 303 unsigned long flags; 304 305 spin_lock_irqsave(&uncore->lock, flags); 306 intel_uncore_posting_read_fw(uncore, 307 RING_HEAD(RENDER_RING_BASE)); 308 spin_unlock_irqrestore(&uncore->lock, flags); 309 } 310 } 311 312 void intel_gt_chipset_flush(struct intel_gt *gt) 313 { 314 wmb(); 315 if (INTEL_GEN(gt->i915) < 6) 316 intel_gtt_chipset_flush(); 317 } 318 319 void intel_gt_driver_register(struct intel_gt *gt) 320 { 321 intel_rps_driver_register(>->rps); 322 323 debugfs_gt_register(gt); 324 } 325 326 static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) 327 { 328 struct drm_i915_private *i915 = gt->i915; 329 struct drm_i915_gem_object *obj; 330 struct i915_vma *vma; 331 int ret; 332 333 obj = i915_gem_object_create_stolen(i915, size); 334 if (IS_ERR(obj)) 335 obj = i915_gem_object_create_internal(i915, size); 336 if (IS_ERR(obj)) { 337 DRM_ERROR("Failed to allocate scratch page\n"); 338 return PTR_ERR(obj); 339 } 340 341 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 342 if (IS_ERR(vma)) { 343 ret = PTR_ERR(vma); 344 goto err_unref; 345 } 346 347 ret = i915_ggtt_pin(vma, 0, PIN_HIGH); 348 if (ret) 349 goto err_unref; 350 351 gt->scratch = i915_vma_make_unshrinkable(vma); 352 353 return 0; 354 355 err_unref: 356 i915_gem_object_put(obj); 357 return ret; 358 } 359 360 static void intel_gt_fini_scratch(struct intel_gt *gt) 361 { 362 i915_vma_unpin_and_release(>->scratch, 0); 363 } 364 365 static struct i915_address_space *kernel_vm(struct intel_gt *gt) 366 { 367 if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING) 368 return &i915_ppgtt_create(gt)->vm; 369 else 370 return i915_vm_get(>->ggtt->vm); 371 } 372 373 static int __intel_context_flush_retire(struct intel_context *ce) 374 { 375 struct intel_timeline *tl; 376 377 tl = intel_context_timeline_lock(ce); 378 if (IS_ERR(tl)) 379 return PTR_ERR(tl); 380 381 intel_context_timeline_unlock(tl); 382 return 0; 383 } 384 385 static int __engines_record_defaults(struct intel_gt *gt) 386 { 387 struct i915_request *requests[I915_NUM_ENGINES] = {}; 388 struct intel_engine_cs *engine; 389 enum intel_engine_id id; 390 int err = 0; 391 392 /* 393 * As we reset the gpu during very early sanitisation, the current 394 * register state on the GPU should reflect its defaults values. 395 * We load a context onto the hw (with restore-inhibit), then switch 396 * over to a second context to save that default register state. We 397 * can then prime every new context with that state so they all start 398 * from the same default HW values. 399 */ 400 401 for_each_engine(engine, gt, id) { 402 struct intel_renderstate so; 403 struct intel_context *ce; 404 struct i915_request *rq; 405 406 /* We must be able to switch to something! */ 407 GEM_BUG_ON(!engine->kernel_context); 408 409 err = intel_renderstate_init(&so, engine); 410 if (err) 411 goto out; 412 413 ce = intel_context_create(engine); 414 if (IS_ERR(ce)) { 415 err = PTR_ERR(ce); 416 goto out; 417 } 418 419 rq = intel_context_create_request(ce); 420 if (IS_ERR(rq)) { 421 err = PTR_ERR(rq); 422 intel_context_put(ce); 423 goto out; 424 } 425 426 err = intel_engine_emit_ctx_wa(rq); 427 if (err) 428 goto err_rq; 429 430 err = intel_renderstate_emit(&so, rq); 431 if (err) 432 goto err_rq; 433 434 err_rq: 435 requests[id] = i915_request_get(rq); 436 i915_request_add(rq); 437 intel_renderstate_fini(&so); 438 if (err) 439 goto out; 440 } 441 442 /* Flush the default context image to memory, and enable powersaving. */ 443 if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { 444 err = -EIO; 445 goto out; 446 } 447 448 for (id = 0; id < ARRAY_SIZE(requests); id++) { 449 struct i915_request *rq; 450 struct i915_vma *state; 451 void *vaddr; 452 453 rq = requests[id]; 454 if (!rq) 455 continue; 456 457 if (rq->fence.error) { 458 err = -EIO; 459 goto out; 460 } 461 462 GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); 463 state = rq->context->state; 464 if (!state) 465 continue; 466 467 /* Serialise with retirement on another CPU */ 468 GEM_BUG_ON(!i915_request_completed(rq)); 469 err = __intel_context_flush_retire(rq->context); 470 if (err) 471 goto out; 472 473 /* We want to be able to unbind the state from the GGTT */ 474 GEM_BUG_ON(intel_context_is_pinned(rq->context)); 475 476 /* 477 * As we will hold a reference to the logical state, it will 478 * not be torn down with the context, and importantly the 479 * object will hold onto its vma (making it possible for a 480 * stray GTT write to corrupt our defaults). Unmap the vma 481 * from the GTT to prevent such accidents and reclaim the 482 * space. 483 */ 484 err = i915_vma_unbind(state); 485 if (err) 486 goto out; 487 488 i915_gem_object_lock(state->obj); 489 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 490 i915_gem_object_unlock(state->obj); 491 if (err) 492 goto out; 493 494 i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC); 495 496 /* Check we can acquire the image of the context state */ 497 vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB); 498 if (IS_ERR(vaddr)) { 499 err = PTR_ERR(vaddr); 500 goto out; 501 } 502 503 rq->engine->default_state = i915_gem_object_get(state->obj); 504 i915_gem_object_unpin_map(state->obj); 505 } 506 507 out: 508 /* 509 * If we have to abandon now, we expect the engines to be idle 510 * and ready to be torn-down. The quickest way we can accomplish 511 * this is by declaring ourselves wedged. 512 */ 513 if (err) 514 intel_gt_set_wedged(gt); 515 516 for (id = 0; id < ARRAY_SIZE(requests); id++) { 517 struct intel_context *ce; 518 struct i915_request *rq; 519 520 rq = requests[id]; 521 if (!rq) 522 continue; 523 524 ce = rq->context; 525 i915_request_put(rq); 526 intel_context_put(ce); 527 } 528 return err; 529 } 530 531 static int __engines_verify_workarounds(struct intel_gt *gt) 532 { 533 struct intel_engine_cs *engine; 534 enum intel_engine_id id; 535 int err = 0; 536 537 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 538 return 0; 539 540 for_each_engine(engine, gt, id) { 541 if (intel_engine_verify_workarounds(engine, "load")) 542 err = -EIO; 543 } 544 545 /* Flush and restore the kernel context for safety */ 546 if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) 547 err = -EIO; 548 549 return err; 550 } 551 552 static void __intel_gt_disable(struct intel_gt *gt) 553 { 554 intel_gt_set_wedged_on_init(gt); 555 556 intel_gt_suspend_prepare(gt); 557 intel_gt_suspend_late(gt); 558 559 GEM_BUG_ON(intel_gt_pm_is_awake(gt)); 560 } 561 562 int intel_gt_init(struct intel_gt *gt) 563 { 564 int err; 565 566 err = i915_inject_probe_error(gt->i915, -ENODEV); 567 if (err) 568 return err; 569 570 /* 571 * This is just a security blanket to placate dragons. 572 * On some systems, we very sporadically observe that the first TLBs 573 * used by the CS may be stale, despite us poking the TLB reset. If 574 * we hold the forcewake during initialisation these problems 575 * just magically go away. 576 */ 577 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 578 579 err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); 580 if (err) 581 goto out_fw; 582 583 intel_gt_pm_init(gt); 584 585 gt->vm = kernel_vm(gt); 586 if (!gt->vm) { 587 err = -ENOMEM; 588 goto err_pm; 589 } 590 591 err = intel_engines_init(gt); 592 if (err) 593 goto err_engines; 594 595 err = intel_uc_init(>->uc); 596 if (err) 597 goto err_engines; 598 599 err = intel_gt_resume(gt); 600 if (err) 601 goto err_uc_init; 602 603 err = __engines_record_defaults(gt); 604 if (err) 605 goto err_gt; 606 607 err = __engines_verify_workarounds(gt); 608 if (err) 609 goto err_gt; 610 611 err = i915_inject_probe_error(gt->i915, -EIO); 612 if (err) 613 goto err_gt; 614 615 goto out_fw; 616 err_gt: 617 __intel_gt_disable(gt); 618 intel_uc_fini_hw(>->uc); 619 err_uc_init: 620 intel_uc_fini(>->uc); 621 err_engines: 622 intel_engines_release(gt); 623 i915_vm_put(fetch_and_zero(>->vm)); 624 err_pm: 625 intel_gt_pm_fini(gt); 626 intel_gt_fini_scratch(gt); 627 out_fw: 628 if (err) 629 intel_gt_set_wedged_on_init(gt); 630 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 631 return err; 632 } 633 634 void intel_gt_driver_remove(struct intel_gt *gt) 635 { 636 __intel_gt_disable(gt); 637 638 intel_uc_fini_hw(>->uc); 639 intel_uc_fini(>->uc); 640 641 intel_engines_release(gt); 642 } 643 644 void intel_gt_driver_unregister(struct intel_gt *gt) 645 { 646 intel_rps_driver_unregister(>->rps); 647 648 /* 649 * Upon unregistering the device to prevent any new users, cancel 650 * all in-flight requests so that we can quickly unbind the active 651 * resources. 652 */ 653 intel_gt_set_wedged(gt); 654 } 655 656 void intel_gt_driver_release(struct intel_gt *gt) 657 { 658 struct i915_address_space *vm; 659 660 vm = fetch_and_zero(>->vm); 661 if (vm) /* FIXME being called twice on error paths :( */ 662 i915_vm_put(vm); 663 664 intel_gt_pm_fini(gt); 665 intel_gt_fini_scratch(gt); 666 } 667 668 void intel_gt_driver_late_release(struct intel_gt *gt) 669 { 670 /* We need to wait for inflight RCU frees to release their grip */ 671 rcu_barrier(); 672 673 intel_uc_driver_late_release(>->uc); 674 intel_gt_fini_requests(gt); 675 intel_gt_fini_reset(gt); 676 intel_gt_fini_timelines(gt); 677 intel_engines_free(gt); 678 } 679